1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2009, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /***************************************************************************** 7 * 8 * File CCONVTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Madhu Katragadda 7/7/2000 Converter Tests for extended code coverage 13 ****************************************************************************** 14 */ 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include "unicode/uloc.h" 19 #include "unicode/ucnv.h" 20 #include "unicode/utypes.h" 21 #include "unicode/ustring.h" 22 #include "unicode/uset.h" 23 #include "cintltst.h" 24 25 #define MAX_LENGTH 999 26 27 #define UNICODE_LIMIT 0x10FFFF 28 #define SURROGATE_HIGH_START 0xD800 29 #define SURROGATE_LOW_END 0xDFFF 30 31 static int32_t gInBufferSize = 0; 32 static int32_t gOutBufferSize = 0; 33 static char gNuConvTestName[1024]; 34 35 #define nct_min(x,y) ((x<y) ? x : y) 36 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 37 38 static void printSeq(const unsigned char* a, int len); 39 static void printSeqErr(const unsigned char* a, int len); 40 static void printUSeq(const UChar* a, int len); 41 static void printUSeqErr(const UChar* a, int len); 42 static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 43 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus); 44 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen, 45 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus); 46 47 static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 48 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset); 49 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 50 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset); 51 52 static void setNuConvTestName(const char *codepage, const char *direction) 53 { 54 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 55 codepage, 56 direction, 57 (int)gInBufferSize, 58 (int)gOutBufferSize); 59 } 60 61 62 static void TestSurrogateBehaviour(void); 63 static void TestErrorBehaviour(void); 64 65 #if !UCONFIG_NO_LEGACY_CONVERSION 66 static void TestToUnicodeErrorBehaviour(void); 67 static void TestGetNextErrorBehaviour(void); 68 #endif 69 70 static void TestRegressionUTF8(void); 71 static void TestRegressionUTF32(void); 72 static void TestAvailableConverters(void); 73 static void TestFlushInternalBuffer(void); /*for improved code coverage in ucnv_cnv.c*/ 74 static void TestResetBehaviour(void); 75 static void TestTruncated(void); 76 static void TestUnicodeSet(void); 77 78 static void TestWithBufferSize(int32_t osize, int32_t isize); 79 80 81 static void printSeq(const unsigned char* a, int len) 82 { 83 int i=0; 84 log_verbose("\n{"); 85 while (i<len) 86 log_verbose("0x%02X ", a[i++]); 87 log_verbose("}\n"); 88 } 89 90 static void printUSeq(const UChar* a, int len) 91 { 92 int i=0; 93 log_verbose("\n{"); 94 while (i<len) 95 log_verbose("%0x04X ", a[i++]); 96 log_verbose("}\n"); 97 } 98 99 static void printSeqErr(const unsigned char* a, int len) 100 { 101 int i=0; 102 fprintf(stderr, "\n{"); 103 while (i<len) fprintf(stderr, "0x%02X ", a[i++]); 104 fprintf(stderr, "}\n"); 105 } 106 107 static void printUSeqErr(const UChar* a, int len) 108 { 109 int i=0; 110 fprintf(stderr, "\n{"); 111 while (i<len) 112 fprintf(stderr, "0x%04X ", a[i++]); 113 fprintf(stderr,"}\n"); 114 } 115 116 void addExtraTests(TestNode** root); 117 118 void addExtraTests(TestNode** root) 119 { 120 addTest(root, &TestSurrogateBehaviour, "tsconv/ncnvtst/TestSurrogateBehaviour"); 121 addTest(root, &TestErrorBehaviour, "tsconv/ncnvtst/TestErrorBehaviour"); 122 123 #if !UCONFIG_NO_LEGACY_CONVERSION 124 addTest(root, &TestToUnicodeErrorBehaviour, "tsconv/ncnvtst/ToUnicodeErrorBehaviour"); 125 addTest(root, &TestGetNextErrorBehaviour, "tsconv/ncnvtst/TestGetNextErrorBehaviour"); 126 #endif 127 128 addTest(root, &TestAvailableConverters, "tsconv/ncnvtst/TestAvailableConverters"); 129 addTest(root, &TestFlushInternalBuffer, "tsconv/ncnvtst/TestFlushInternalBuffer"); 130 addTest(root, &TestResetBehaviour, "tsconv/ncnvtst/TestResetBehaviour"); 131 addTest(root, &TestRegressionUTF8, "tsconv/ncnvtst/TestRegressionUTF8"); 132 addTest(root, &TestRegressionUTF32, "tsconv/ncnvtst/TestRegressionUTF32"); 133 addTest(root, &TestTruncated, "tsconv/ncnvtst/TestTruncated"); 134 addTest(root, &TestUnicodeSet, "tsconv/ncnvtst/TestUnicodeSet"); 135 } 136 137 /*test surrogate behaviour*/ 138 static void TestSurrogateBehaviour(){ 139 log_verbose("Testing for SBCS and LATIN_1\n"); 140 { 141 UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032}; 142 const uint8_t expected[] = {0x31, 0x1a, 0x32}; 143 144 #if !UCONFIG_NO_LEGACY_CONVERSION 145 /*SBCS*/ 146 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 147 expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR)) 148 log_err("u-> ibm-920 [UCNV_SBCS] not match.\n"); 149 #endif 150 151 /*LATIN_1*/ 152 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 153 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR )) 154 log_err("u-> LATIN_1 not match.\n"); 155 156 } 157 158 #if !UCONFIG_NO_LEGACY_CONVERSION 159 log_verbose("Testing for DBCS and MBCS\n"); 160 { 161 UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; 162 const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 163 int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; 164 165 /*DBCS*/ 166 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 167 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) 168 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 169 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 170 expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR)) 171 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 172 /*MBCS*/ 173 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 174 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) 175 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 176 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 177 expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR)) 178 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 179 } 180 181 log_verbose("Testing for ISO-2022-jp\n"); 182 { 183 UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 184 185 const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 186 0x31,0x1A, 0x32}; 187 188 189 int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; 190 191 /*iso-2022-jp*/ 192 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 193 expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR)) 194 log_err("u-> not match.\n"); 195 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 196 expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR)) 197 log_err("u-> not match.\n"); 198 } 199 200 log_verbose("Testing for ISO-2022-cn\n"); 201 { 202 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 203 204 static const uint8_t expected[] = { 205 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 206 0x36, 0x21, 207 0x0F, 0x31, 208 0x1A, 209 0x32 210 }; 211 212 213 214 static const int32_t offsets[] = { 215 0, 0, 0, 0, 0, 0, 0, 216 1, 1, 217 2, 2, 218 3, 219 5, }; 220 221 /*iso-2022-CN*/ 222 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 223 expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR)) 224 log_err("u-> not match.\n"); 225 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 226 expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR)) 227 log_err("u-> not match.\n"); 228 } 229 230 log_verbose("Testing for ISO-2022-kr\n"); 231 { 232 static const UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 233 234 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43, 235 0x0E, 0x6C, 0x69, 236 0x0f, 0x1A, 237 0x0e, 0x6F, 0x4B, 238 0x0F, 0x31, 239 0x1A, 240 0x32 }; 241 242 static const int32_t offsets[] = {-1, -1, -1, -1, 243 0, 0, 0, 244 1, 1, 245 3, 3, 3, 246 4, 4, 247 5, 248 7, 249 }; 250 251 /*iso-2022-kr*/ 252 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 253 expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR)) 254 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 255 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 256 expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR)) 257 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 258 } 259 260 log_verbose("Testing for HZ\n"); 261 { 262 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 263 264 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B, 265 0x7E, 0x7D, 0x1A, 266 0x7E, 0x7B, 0x36, 0x21, 267 0x7E, 0x7D, 0x31, 268 0x1A, 269 0x32 }; 270 271 272 static const int32_t offsets[] = {0,0,0,0, 273 1,1,1, 274 3,3,3,3, 275 4,4,4, 276 5, 277 7,}; 278 279 /*hz*/ 280 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 281 expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR)) 282 log_err("u-> HZ not match.\n"); 283 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 284 expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR)) 285 log_err("u-> HZ not match.\n"); 286 } 287 #endif 288 289 /*UTF-8*/ 290 log_verbose("Testing for UTF8\n"); 291 { 292 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032}; 293 static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 294 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 295 0x04, 0x06 }; 296 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31, 297 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32}; 298 299 300 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; 301 /*UTF-8*/ 302 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 303 expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR )) 304 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 305 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 306 expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR )) 307 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 308 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 309 expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR )) 310 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 311 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 312 expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR )) 313 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 314 315 if(!convertToU(expected, sizeof(expected), 316 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, TRUE, U_ZERO_ERROR )) 317 log_err("UTF8 -> u did not match.\n"); 318 if(!convertToU(expected, sizeof(expected), 319 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, FALSE, U_ZERO_ERROR )) 320 log_err("UTF8 -> u did not match.\n"); 321 if(!convertToU(expected, sizeof(expected), 322 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR )) 323 log_err("UTF8 ->u did not match.\n"); 324 if(!convertToU(expected, sizeof(expected), 325 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR )) 326 log_err("UTF8 -> u did not match.\n"); 327 328 } 329 } 330 331 /*test various error behaviours*/ 332 static void TestErrorBehaviour(){ 333 log_verbose("Testing for SBCS and LATIN_1\n"); 334 { 335 static const UChar sampleText[] = { 0x0031, 0xd801}; 336 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 337 static const uint8_t expected0[] = { 0x31}; 338 static const uint8_t expected[] = { 0x31, 0x1a}; 339 static const uint8_t expected2[] = { 0x31, 0x1a, 0x32}; 340 341 #if !UCONFIG_NO_LEGACY_CONVERSION 342 /*SBCS*/ 343 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 344 expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR)) 345 log_err("u-> ibm-920 [UCNV_SBCS] \n"); 346 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 347 expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR)) 348 log_err("u-> ibm-920 [UCNV_SBCS] \n"); 349 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 350 expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR)) 351 log_err("u-> ibm-920 [UCNV_SBCS] did not match\n"); 352 #endif 353 354 /*LATIN_1*/ 355 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 356 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) 357 log_err("u-> LATIN_1 is supposed to fail\n"); 358 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 359 expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR)) 360 log_err("u-> LATIN_1 is supposed to fail\n"); 361 362 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 363 expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) 364 log_err("u-> LATIN_1 did not match\n"); 365 } 366 367 #if !UCONFIG_NO_LEGACY_CONVERSION 368 log_verbose("Testing for DBCS and MBCS\n"); 369 { 370 static const UChar sampleText[] = { 0x00a1, 0xd801}; 371 static const uint8_t expected[] = { 0xa2, 0xae}; 372 static const int32_t offsets[] = { 0x00, 0x00}; 373 static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0}; 374 static const int32_t offsetsSUB[] = { 0x00, 0x00, 0x01, 0x01}; 375 376 static const UChar sampleText2[] = { 0x00a1, 0xd801, 0x00a4}; 377 static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 378 static const int32_t offsets2[] = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02}; 379 380 static const UChar sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01}; 381 static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0}; 382 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x01, 0x02, 0x02}; 383 384 /* BEGIN android-changed */ 385 /* Android uses a different EUC-JP table. We change one character, 386 * choosing a mapping that is common to both tables. */ 387 static const UChar sampleText4MBCS[] = { 0x0061, 0x9ED1, 0xdc01}; 388 static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xf4, 0xf8, 0xf4, 0xfe}; 389 /* static const UChar sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01}; */ 390 /* static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe}; */ 391 /* END android-changed */ 392 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 }; 393 394 /*DBCS*/ 395 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 396 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 397 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 398 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 399 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) 400 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 401 402 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 403 expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR)) 404 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 405 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 406 expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_ZERO_ERROR)) 407 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 408 409 410 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 411 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 412 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); 413 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 414 expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR)) 415 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); 416 417 /*MBCS*/ 418 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 419 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 420 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 421 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 422 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) 423 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 424 425 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 426 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 427 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 428 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 429 expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) 430 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 431 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 432 expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR)) 433 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 434 435 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 436 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR)) 437 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 438 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 439 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR)) 440 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 441 442 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 443 expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, TRUE, U_ZERO_ERROR)) 444 log_err("u-> euc-jp [UCNV_MBCS] \n"); 445 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 446 expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, FALSE, U_ZERO_ERROR)) 447 log_err("u-> euc-jp [UCNV_MBCS] \n"); 448 } 449 450 /*iso-2022-jp*/ 451 log_verbose("Testing for iso-2022-jp\n"); 452 { 453 static const UChar sampleText[] = { 0x0031, 0xd801}; 454 static const uint8_t expected[] = { 0x31}; 455 static const uint8_t expectedSUB[] = { 0x31, 0x1a}; 456 static const int32_t offsets[] = { 0x00, 1}; 457 458 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 459 static const uint8_t expected2[] = { 0x31,0x1A,0x32}; 460 static const int32_t offsets2[] = { 0x00,0x01,0x02}; 461 462 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 463 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a}; 464 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 }; 465 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 466 expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR)) 467 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 468 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 469 expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_ZERO_ERROR)) 470 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 471 472 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 473 expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR)) 474 log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n"); 475 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 476 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) 477 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); 478 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 479 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) 480 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); 481 482 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 483 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR)) 484 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 485 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 486 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR)) 487 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 488 } 489 490 /*iso-2022-cn*/ 491 log_verbose("Testing for iso-2022-cn\n"); 492 { 493 static const UChar sampleText[] = { 0x0031, 0xd801}; 494 static const uint8_t expected[] = { 0x31}; 495 static const uint8_t expectedSUB[] = { 0x31, 0x1A}; 496 static const int32_t offsets[] = { 0x00, 1}; 497 498 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 499 static const uint8_t expected2[] = { 0x31, 0x1A,0x32}; 500 static const int32_t offsets2[] = { 0x00, 0x01,0x02}; 501 502 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 503 static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A}; 504 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x02 }; 505 506 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 507 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a }; 508 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 }; 509 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 510 expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR)) 511 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 512 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 513 expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR)) 514 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 515 516 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 517 expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR)) 518 log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n"); 519 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 520 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) 521 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); 522 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 523 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) 524 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); 525 526 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 527 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR)) 528 log_err("u->iso-2022-cn [UCNV_MBCS] \n"); 529 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 530 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR)) 531 log_err("u-> iso-2022-cn[UCNV_MBCS] \n"); 532 533 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 534 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR)) 535 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 536 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 537 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR)) 538 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 539 } 540 541 /*iso-2022-kr*/ 542 log_verbose("Testing for iso-2022-kr\n"); 543 { 544 static const UChar sampleText[] = { 0x0031, 0xd801}; 545 static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31}; 546 static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A}; 547 static const int32_t offsets[] = { -1, -1, -1, -1, 0x00, 1}; 548 549 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 550 static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32}; 551 static const int32_t offsets2[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02}; 552 553 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 554 static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43, 0x51, 0x50, 0x1A }; 555 static const int32_t offsets3MBCS[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02, 0x02 }; 556 557 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 558 expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR)) 559 log_err("u-> iso-2022-kr [UCNV_MBCS] \n"); 560 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 561 expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR)) 562 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 563 564 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 565 expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR)) 566 log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n"); 567 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 568 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) 569 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); 570 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 571 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) 572 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); 573 574 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 575 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR)) 576 log_err("u->iso-2022-kr [UCNV_MBCS] \n"); 577 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 578 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR)) 579 log_err("u-> iso-2022-kr[UCNV_MBCS] \n"); 580 } 581 582 /*HZ*/ 583 log_verbose("Testing for HZ\n"); 584 { 585 static const UChar sampleText[] = { 0x0031, 0xd801}; 586 static const uint8_t expected[] = { 0x7e, 0x7d, 0x31}; 587 static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A}; 588 static const int32_t offsets[] = { 0x00, 0x00, 0x00, 1}; 589 590 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 591 static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31, 0x1A, 0x32 }; 592 static const int32_t offsets2[] = { 0x00, 0x00, 0x00, 0x01, 0x02 }; 593 594 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 595 static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50, 0x1A }; 596 static const int32_t offsets3MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x02}; 597 598 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 599 static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a }; 600 static const int32_t offsets4MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 }; 601 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 602 expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR)) 603 log_err("u-> HZ [UCNV_MBCS] \n"); 604 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 605 expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR)) 606 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 607 608 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 609 expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR)) 610 log_err("u->HZ[UCNV_DBCS] did not match\n"); 611 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 612 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) 613 log_err("u-> HZ [UCNV_DBCS] did not match\n"); 614 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 615 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) 616 log_err("u-> HZ [UCNV_DBCS] did not match\n"); 617 618 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 619 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR)) 620 log_err("u->HZ [UCNV_MBCS] \n"); 621 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 622 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR)) 623 log_err("u-> HZ[UCNV_MBCS] \n"); 624 625 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 626 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR)) 627 log_err("u-> HZ [UCNV_MBCS] \n"); 628 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 629 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR)) 630 log_err("u-> HZ [UCNV_MBCS] \n"); 631 } 632 #endif 633 } 634 635 #if !UCONFIG_NO_LEGACY_CONVERSION 636 /*test different convertToUnicode error behaviours*/ 637 static void TestToUnicodeErrorBehaviour() 638 { 639 log_verbose("Testing error conditions for DBCS\n"); 640 { 641 uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04}; 642 const UChar expected[] = { 0x00a1 }; 643 644 if(!convertToU(sampleText, sizeof(sampleText), 645 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, TRUE, U_ZERO_ERROR )) 646 log_err("DBCS (ibm-1363)->Unicode did not match.\n"); 647 if(!convertToU(sampleText, sizeof(sampleText), 648 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, FALSE, U_ZERO_ERROR )) 649 log_err("DBCS (ibm-1363)->Unicode with flush = false did not match.\n"); 650 } 651 log_verbose("Testing error conditions for SBCS\n"); 652 { 653 uint8_t sampleText[] = { 0xa2, 0xFF}; 654 const UChar expected[] = { 0x00c2 }; 655 656 /* uint8_t sampleText2[] = { 0xa2, 0x70 }; 657 const UChar expected2[] = { 0x0073 };*/ 658 659 if(!convertToU(sampleText, sizeof(sampleText), 660 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, TRUE, U_ZERO_ERROR )) 661 log_err("SBCS (ibm-1051)->Unicode did not match.\n"); 662 if(!convertToU(sampleText, sizeof(sampleText), 663 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, FALSE, U_ZERO_ERROR )) 664 log_err("SBCS (ibm-1051)->Unicode with flush = false did not match.\n"); 665 666 } 667 } 668 669 static void TestGetNextErrorBehaviour(){ 670 /*Test for unassigned character*/ 671 #define INPUT_SIZE 1 672 static const char input1[INPUT_SIZE]={ 0x70 }; 673 const char* source=(const char*)input1; 674 UErrorCode err=U_ZERO_ERROR; 675 UChar32 c=0; 676 UConverter *cnv=ucnv_open("ibm-424", &err); 677 if(U_FAILURE(err)) { 678 log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err)); 679 return; 680 } 681 c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err); 682 if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){ 683 log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n", myErrorName(err), c); 684 } 685 ucnv_close(cnv); 686 } 687 #endif 688 689 #define MAX_UTF16_LEN 2 690 #define MAX_UTF8_LEN 4 691 692 /*Regression test for utf8 converter*/ 693 static void TestRegressionUTF8(){ 694 UChar32 currCh = 0; 695 int32_t offset8; 696 int32_t offset16; 697 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar)); 698 uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH); 699 700 while (currCh <= UNICODE_LIMIT) { 701 offset16 = 0; 702 offset8 = 0; 703 while(currCh <= UNICODE_LIMIT 704 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN) 705 && offset8 < (MAX_LENGTH - MAX_UTF8_LEN)) 706 { 707 if (currCh == SURROGATE_HIGH_START) { 708 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ 709 } 710 UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh); 711 UTF8_APPEND_CHAR_SAFE(utf8, offset8, MAX_LENGTH, currCh); 712 currCh++; 713 } 714 if(!convertFromU(standardForm, offset16, 715 utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) { 716 log_err("Unicode->UTF8 did not match.\n"); 717 } 718 if(!convertToU(utf8, offset8, 719 standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) { 720 log_err("UTF8->Unicode did not match.\n"); 721 } 722 } 723 724 free(standardForm); 725 free(utf8); 726 727 { 728 static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 }; 729 static const UChar expected[] = { 0x0301, 0x0300 }; 730 UConverter *conv8; 731 UErrorCode err = U_ZERO_ERROR; 732 UChar pivotBuffer[100]; 733 const UChar* const pivEnd = pivotBuffer + 100; 734 const char* srcBeg; 735 const char* srcEnd; 736 UChar* pivBeg; 737 738 conv8 = ucnv_open("UTF-8", &err); 739 740 srcBeg = src8; 741 pivBeg = pivotBuffer; 742 srcEnd = src8 + 3; 743 ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 744 if (srcBeg != srcEnd) { 745 log_err("Did not consume whole buffer on first call.\n"); 746 } 747 748 srcEnd = src8 + 4; 749 ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 750 if (srcBeg != srcEnd) { 751 log_err("Did not consume whole buffer on second call.\n"); 752 } 753 754 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 755 log_err("Did not get expected results for UTF-8.\n"); 756 } 757 ucnv_close(conv8); 758 } 759 } 760 761 #define MAX_UTF32_LEN 1 762 763 static void TestRegressionUTF32(){ 764 UChar32 currCh = 0; 765 int32_t offset32; 766 int32_t offset16; 767 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar)); 768 UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32)); 769 770 while (currCh <= UNICODE_LIMIT) { 771 offset16 = 0; 772 offset32 = 0; 773 while(currCh <= UNICODE_LIMIT 774 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN) 775 && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN)) 776 { 777 if (currCh == SURROGATE_HIGH_START) { 778 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ 779 } 780 UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh); 781 UTF32_APPEND_CHAR_SAFE(utf32, offset32, MAX_LENGTH, currCh); 782 currCh++; 783 } 784 if(!convertFromU(standardForm, offset16, 785 (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) { 786 log_err("Unicode->UTF32 did not match.\n"); 787 } 788 if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32), 789 standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) { 790 log_err("UTF32->Unicode did not match.\n"); 791 } 792 } 793 free(standardForm); 794 free(utf32); 795 796 { 797 /* Check for lone surrogate error handling. */ 798 static const UChar sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 }; 799 static const UChar sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 }; 800 static const uint8_t expectedUTF32BE[] = { 801 0x00, 0x00, 0x00, 0x31, 802 0x00, 0x00, 0xff, 0xfd, 803 0x00, 0x00, 0x00, 0x32 804 }; 805 static const uint8_t expectedUTF32LE[] = { 806 0x31, 0x00, 0x00, 0x00, 807 0xfd, 0xff, 0x00, 0x00, 808 0x32, 0x00, 0x00, 0x00 809 }; 810 static const int32_t offsetsUTF32[] = { 811 0x00, 0x00, 0x00, 0x00, 812 0x01, 0x01, 0x01, 0x01, 813 0x02, 0x02, 0x02, 0x02 814 }; 815 816 if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]), 817 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 818 log_err("u->UTF-32BE\n"); 819 if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]), 820 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 821 log_err("u->UTF-32BE\n"); 822 823 if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]), 824 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 825 log_err("u->UTF-32LE\n"); 826 if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]), 827 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 828 log_err("u->UTF-32LE\n"); 829 } 830 831 { 832 static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 }; 833 static const UChar expected[] = { 0x0031, 0x0030 }; 834 UConverter *convBE; 835 UErrorCode err = U_ZERO_ERROR; 836 UChar pivotBuffer[100]; 837 const UChar* const pivEnd = pivotBuffer + 100; 838 const char* srcBeg; 839 const char* srcEnd; 840 UChar* pivBeg; 841 842 convBE = ucnv_open("UTF-32BE", &err); 843 844 srcBeg = srcBE; 845 pivBeg = pivotBuffer; 846 srcEnd = srcBE + 5; 847 ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 848 if (srcBeg != srcEnd) { 849 log_err("Did not consume whole buffer on first call.\n"); 850 } 851 852 srcEnd = srcBE + 8; 853 ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 854 if (srcBeg != srcEnd) { 855 log_err("Did not consume whole buffer on second call.\n"); 856 } 857 858 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 859 log_err("Did not get expected results for UTF-32BE.\n"); 860 } 861 ucnv_close(convBE); 862 } 863 { 864 static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 }; 865 static const UChar expected[] = { 0x0031, 0x0030 }; 866 UConverter *convLE; 867 UErrorCode err = U_ZERO_ERROR; 868 UChar pivotBuffer[100]; 869 const UChar* const pivEnd = pivotBuffer + 100; 870 const char* srcBeg; 871 const char* srcEnd; 872 UChar* pivBeg; 873 874 convLE = ucnv_open("UTF-32LE", &err); 875 876 srcBeg = srcLE; 877 pivBeg = pivotBuffer; 878 srcEnd = srcLE + 5; 879 ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 880 if (srcBeg != srcEnd) { 881 log_err("Did not consume whole buffer on first call.\n"); 882 } 883 884 srcEnd = srcLE + 8; 885 ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 886 if (srcBeg != srcEnd) { 887 log_err("Did not consume whole buffer on second call.\n"); 888 } 889 890 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 891 log_err("Did not get expected results for UTF-32LE.\n"); 892 } 893 ucnv_close(convLE); 894 } 895 } 896 897 /*Walk through the available converters*/ 898 static void TestAvailableConverters(){ 899 UErrorCode status=U_ZERO_ERROR; 900 UConverter *conv=NULL; 901 int32_t i=0; 902 for(i=0; i < ucnv_countAvailable(); i++){ 903 status=U_ZERO_ERROR; 904 conv=ucnv_open(ucnv_getAvailableName(i), &status); 905 if(U_FAILURE(status)){ 906 log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n", 907 ucnv_getAvailableName(i), myErrorName(status)); 908 continue; 909 } 910 ucnv_close(conv); 911 } 912 913 } 914 915 static void TestFlushInternalBuffer(){ 916 TestWithBufferSize(MAX_LENGTH, 1); 917 TestWithBufferSize(1, 1); 918 TestWithBufferSize(1, MAX_LENGTH); 919 TestWithBufferSize(MAX_LENGTH, MAX_LENGTH); 920 } 921 922 static void TestWithBufferSize(int32_t insize, int32_t outsize){ 923 924 gInBufferSize =insize; 925 gOutBufferSize = outsize; 926 927 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 928 { 929 UChar sampleText[] = 930 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E }; 931 const uint8_t expectedUTF8[] = 932 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 933 int32_t toUTF8Offs[] = 934 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07}; 935 /* int32_t fmUTF8Offs[] = 936 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/ 937 938 /*UTF-8*/ 939 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 940 expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE)) 941 log_err("u-> UTF8 did not match.\n"); 942 } 943 944 #if !UCONFIG_NO_LEGACY_CONVERSION 945 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); 946 { 947 UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 948 const uint8_t toIBM943[]= { 0x61, 949 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 950 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 951 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 952 0x61 }; 953 int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; 954 955 if(!testConvertFromU(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 956 toIBM943, sizeof(toIBM943), "ibm-943", 957 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE)) 958 log_err("u-> ibm-943 with subst with value did not match.\n"); 959 } 960 #endif 961 962 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 963 { 964 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 965 0xe0, 0x80, 0x61}; 966 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; 967 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; 968 969 if(!testConvertToU(sampleText1, sizeof(sampleText1), 970 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE)) 971 log_err("utf8->u with substitute did not match.\n");; 972 } 973 974 #if !UCONFIG_NO_LEGACY_CONVERSION 975 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); 976 /*to Unicode*/ 977 { 978 const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, 979 0x81, 0xad, /*unassigned*/ 980 0x89, 0xd3 }; 981 UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, 982 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, 983 0x7B87}; 984 int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; 985 986 if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU), 987 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943", 988 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE)) 989 log_err("ibm-943->u with substitute with value did not match.\n"); 990 991 } 992 #endif 993 } 994 995 static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 996 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus) 997 { 998 999 int32_t i=0; 1000 char *p=0; 1001 const UChar *src; 1002 char buffer[MAX_LENGTH]; 1003 int32_t offsetBuffer[MAX_LENGTH]; 1004 int32_t *offs=0; 1005 char *targ; 1006 char *targetLimit; 1007 UChar *sourceLimit=0; 1008 UErrorCode status = U_ZERO_ERROR; 1009 UConverter *conv = 0; 1010 conv = ucnv_open(codepage, &status); 1011 if(U_FAILURE(status)) 1012 { 1013 log_data_err("Couldn't open converter %s\n",codepage); 1014 return TRUE; 1015 } 1016 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); 1017 1018 for(i=0; i<MAX_LENGTH; i++){ 1019 buffer[i]=(char)0xF0; 1020 offsetBuffer[i]=0xFF; 1021 } 1022 1023 src=source; 1024 sourceLimit=(UChar*)src+(sourceLen); 1025 targ=buffer; 1026 targetLimit=targ+MAX_LENGTH; 1027 offs=offsetBuffer; 1028 ucnv_fromUnicode (conv, 1029 (char **)&targ, 1030 (const char *)targetLimit, 1031 &src, 1032 sourceLimit, 1033 expectOffsets ? offs : NULL, 1034 doFlush, 1035 &status); 1036 ucnv_close(conv); 1037 if(status != expectedStatus){ 1038 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus)); 1039 return FALSE; 1040 } 1041 1042 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1043 sourceLen, targ-buffer); 1044 1045 if(expectLen != targ-buffer) 1046 { 1047 log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage); 1048 log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage); 1049 printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer)); 1050 printSeqErr((const unsigned char*)expect, expectLen); 1051 return FALSE; 1052 } 1053 1054 if(memcmp(buffer, expect, expectLen)){ 1055 log_err("String does not match. FROM Unicode to codePage%s\n", codepage); 1056 log_info("\nGot:"); 1057 printSeqErr((const unsigned char *)buffer, expectLen); 1058 log_info("\nExpected:"); 1059 printSeqErr((const unsigned char *)expect, expectLen); 1060 return FALSE; 1061 } 1062 else { 1063 log_verbose("Matches!\n"); 1064 } 1065 1066 if (expectOffsets != 0){ 1067 log_verbose("comparing %d offsets..\n", targ-buffer); 1068 if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){ 1069 log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage); 1070 log_info("\nGot : "); 1071 printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer)); 1072 for(p=buffer;p<targ;p++) 1073 log_info("%d, ", offsetBuffer[p-buffer]); 1074 log_info("\nExpected: "); 1075 for(i=0; i< (targ-buffer); i++) 1076 log_info("%d,", expectOffsets[i]); 1077 } 1078 } 1079 1080 return TRUE; 1081 } 1082 1083 1084 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen, 1085 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus) 1086 { 1087 UErrorCode status = U_ZERO_ERROR; 1088 UConverter *conv = 0; 1089 int32_t i=0; 1090 UChar *p=0; 1091 const char* src; 1092 UChar buffer[MAX_LENGTH]; 1093 int32_t offsetBuffer[MAX_LENGTH]; 1094 int32_t *offs=0; 1095 UChar *targ; 1096 UChar *targetLimit; 1097 uint8_t *sourceLimit=0; 1098 1099 1100 1101 conv = ucnv_open(codepage, &status); 1102 if(U_FAILURE(status)) 1103 { 1104 log_data_err("Couldn't open converter %s\n",codepage); 1105 return TRUE; 1106 } 1107 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); 1108 1109 1110 1111 for(i=0; i<MAX_LENGTH; i++){ 1112 buffer[i]=0xFFFE; 1113 offsetBuffer[i]=-1; 1114 } 1115 1116 src=(const char *)source; 1117 sourceLimit=(uint8_t*)(src+(sourceLen)); 1118 targ=buffer; 1119 targetLimit=targ+MAX_LENGTH; 1120 offs=offsetBuffer; 1121 1122 1123 1124 ucnv_toUnicode (conv, 1125 &targ, 1126 targetLimit, 1127 (const char **)&src, 1128 (const char *)sourceLimit, 1129 expectOffsets ? offs : NULL, 1130 doFlush, 1131 &status); 1132 1133 ucnv_close(conv); 1134 if(status != expectedStatus){ 1135 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus)); 1136 return FALSE; 1137 } 1138 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1139 sourceLen, targ-buffer); 1140 1141 1142 1143 1144 log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2); 1145 1146 if (expectOffsets != 0) { 1147 if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){ 1148 1149 log_err("did not get the expected offsets from %s To UNICODE\n", codepage); 1150 log_info("\nGot : "); 1151 for(p=buffer;p<targ;p++) 1152 log_info("%d, ", offsetBuffer[p-buffer]); 1153 log_info("\nExpected: "); 1154 for(i=0; i<(targ-buffer); i++) 1155 log_info("%d, ", expectOffsets[i]); 1156 log_info("\nGot result:"); 1157 for(i=0; i<(targ-buffer); i++) 1158 log_info("0x%04X,", buffer[i]); 1159 log_info("\nFrom Input:"); 1160 for(i=0; i<(src-(const char *)source); i++) 1161 log_info("0x%02X,", (unsigned char)source[i]); 1162 log_info("\n"); 1163 } 1164 } 1165 if(memcmp(buffer, expect, expectLen*2)){ 1166 log_err("String does not match. from codePage %s TO Unicode\n", codepage); 1167 log_info("\nGot:"); 1168 printUSeqErr(buffer, expectLen); 1169 log_info("\nExpected:"); 1170 printUSeqErr(expect, expectLen); 1171 return FALSE; 1172 } 1173 else { 1174 log_verbose("Matches!\n"); 1175 } 1176 1177 return TRUE; 1178 } 1179 1180 1181 static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 1182 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset) 1183 { 1184 UErrorCode status = U_ZERO_ERROR; 1185 UConverter *conv = 0; 1186 char junkout[MAX_LENGTH]; /* FIX */ 1187 int32_t junokout[MAX_LENGTH]; /* FIX */ 1188 char *p; 1189 const UChar *src; 1190 char *end; 1191 char *targ; 1192 int32_t *offs; 1193 int i; 1194 int32_t realBufferSize; 1195 char *realBufferEnd; 1196 const UChar *realSourceEnd; 1197 const UChar *sourceLimit; 1198 UBool checkOffsets = TRUE; 1199 UBool doFlush; 1200 1201 UConverterFromUCallback oldAction = NULL; 1202 const void* oldContext = NULL; 1203 1204 for(i=0;i<MAX_LENGTH;i++) 1205 junkout[i] = (char)0xF0; 1206 for(i=0;i<MAX_LENGTH;i++) 1207 junokout[i] = 0xFF; 1208 1209 setNuConvTestName(codepage, "FROM"); 1210 1211 log_verbose("\n========= %s\n", gNuConvTestName); 1212 1213 conv = ucnv_open(codepage, &status); 1214 if(U_FAILURE(status)) 1215 { 1216 log_data_err("Couldn't open converter %s\n",codepage); 1217 return TRUE; 1218 } 1219 1220 log_verbose("Converter opened..\n"); 1221 /*----setting the callback routine----*/ 1222 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 1223 if (U_FAILURE(status)) { 1224 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 1225 } 1226 /*------------------------*/ 1227 1228 src = source; 1229 targ = junkout; 1230 offs = junokout; 1231 1232 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 1233 realBufferEnd = junkout + realBufferSize; 1234 realSourceEnd = source + sourceLen; 1235 1236 if ( gOutBufferSize != realBufferSize ) 1237 checkOffsets = FALSE; 1238 1239 if( gInBufferSize != MAX_LENGTH ) 1240 checkOffsets = FALSE; 1241 1242 do 1243 { 1244 end = nct_min(targ + gOutBufferSize, realBufferEnd); 1245 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 1246 1247 doFlush = (UBool)(sourceLimit == realSourceEnd); 1248 1249 if(targ == realBufferEnd) 1250 { 1251 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 1252 return FALSE; 1253 } 1254 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 1255 1256 1257 status = U_ZERO_ERROR; 1258 if(gInBufferSize ==999 && gOutBufferSize==999) 1259 doFlush = FALSE; 1260 ucnv_fromUnicode (conv, 1261 (char **)&targ, 1262 (const char *)end, 1263 &src, 1264 sourceLimit, 1265 offs, 1266 doFlush, /* flush if we're at the end of the input data */ 1267 &status); 1268 if(testReset) 1269 ucnv_resetToUnicode(conv); 1270 if(gInBufferSize ==999 && gOutBufferSize==999) 1271 ucnv_resetToUnicode(conv); 1272 1273 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 1274 1275 if(U_FAILURE(status)) { 1276 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 1277 return FALSE; 1278 } 1279 1280 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1281 sourceLen, targ-junkout); 1282 if(VERBOSITY) 1283 { 1284 char junk[999]; 1285 char offset_str[999]; 1286 char *ptr; 1287 1288 junk[0] = 0; 1289 offset_str[0] = 0; 1290 for(ptr = junkout;ptr<targ;ptr++) 1291 { 1292 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr); 1293 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]); 1294 } 1295 1296 log_verbose(junk); 1297 printSeq((const unsigned char *)expect, expectLen); 1298 if ( checkOffsets ) 1299 { 1300 log_verbose("\nOffsets:"); 1301 log_verbose(offset_str); 1302 } 1303 log_verbose("\n"); 1304 } 1305 ucnv_close(conv); 1306 1307 1308 if(expectLen != targ-junkout) 1309 { 1310 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 1311 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 1312 log_info("\nGot:"); 1313 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 1314 log_info("\nExpected:"); 1315 printSeqErr((const unsigned char*)expect, expectLen); 1316 return FALSE; 1317 } 1318 1319 if (checkOffsets && (expectOffsets != 0) ) 1320 { 1321 log_verbose("comparing %d offsets..\n", targ-junkout); 1322 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 1323 log_err("did not get the expected offsets. %s", gNuConvTestName); 1324 log_err("Got : "); 1325 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 1326 for(p=junkout;p<targ;p++) 1327 log_err("%d, ", junokout[p-junkout]); 1328 log_err("\nExpected: "); 1329 for(i=0; i<(targ-junkout); i++) 1330 log_err("%d,", expectOffsets[i]); 1331 } 1332 } 1333 1334 log_verbose("comparing..\n"); 1335 if(!memcmp(junkout, expect, expectLen)) 1336 { 1337 log_verbose("Matches!\n"); 1338 return TRUE; 1339 } 1340 else 1341 { 1342 log_err("String does not match. %s\n", gNuConvTestName); 1343 printUSeqErr(source, sourceLen); 1344 log_info("\nGot:"); 1345 printSeqErr((const unsigned char *)junkout, expectLen); 1346 log_info("\nExpected:"); 1347 printSeqErr((const unsigned char *)expect, expectLen); 1348 1349 return FALSE; 1350 } 1351 } 1352 1353 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 1354 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset) 1355 { 1356 UErrorCode status = U_ZERO_ERROR; 1357 UConverter *conv = 0; 1358 UChar junkout[MAX_LENGTH]; /* FIX */ 1359 int32_t junokout[MAX_LENGTH]; /* FIX */ 1360 const char *src; 1361 const char *realSourceEnd; 1362 const char *srcLimit; 1363 UChar *p; 1364 UChar *targ; 1365 UChar *end; 1366 int32_t *offs; 1367 int i; 1368 UBool checkOffsets = TRUE; 1369 int32_t realBufferSize; 1370 UChar *realBufferEnd; 1371 UBool doFlush; 1372 1373 UConverterToUCallback oldAction = NULL; 1374 const void* oldContext = NULL; 1375 1376 1377 for(i=0;i<MAX_LENGTH;i++) 1378 junkout[i] = 0xFFFE; 1379 1380 for(i=0;i<MAX_LENGTH;i++) 1381 junokout[i] = -1; 1382 1383 setNuConvTestName(codepage, "TO"); 1384 1385 log_verbose("\n========= %s\n", gNuConvTestName); 1386 1387 conv = ucnv_open(codepage, &status); 1388 if(U_FAILURE(status)) 1389 { 1390 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 1391 return TRUE; 1392 } 1393 1394 log_verbose("Converter opened..\n"); 1395 /*----setting the callback routine----*/ 1396 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 1397 if (U_FAILURE(status)) { 1398 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 1399 } 1400 /*-------------------------------------*/ 1401 1402 src = (const char *)source; 1403 targ = junkout; 1404 offs = junokout; 1405 1406 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 1407 realBufferEnd = junkout + realBufferSize; 1408 realSourceEnd = src + sourcelen; 1409 1410 if ( gOutBufferSize != realBufferSize ) 1411 checkOffsets = FALSE; 1412 1413 if( gInBufferSize != MAX_LENGTH ) 1414 checkOffsets = FALSE; 1415 1416 do 1417 { 1418 end = nct_min( targ + gOutBufferSize, realBufferEnd); 1419 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 1420 1421 if(targ == realBufferEnd) 1422 { 1423 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 1424 return FALSE; 1425 } 1426 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 1427 1428 /* oldTarg = targ; */ 1429 1430 status = U_ZERO_ERROR; 1431 doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE); 1432 1433 ucnv_toUnicode (conv, 1434 &targ, 1435 end, 1436 (const char **)&src, 1437 (const char *)srcLimit, 1438 offs, 1439 doFlush, /* flush if we're at the end of hte source data */ 1440 &status); 1441 if(testReset) 1442 ucnv_resetFromUnicode(conv); 1443 if(gInBufferSize ==999 && gOutBufferSize==999) 1444 ucnv_resetToUnicode(conv); 1445 /* offs += (targ-oldTarg); */ 1446 1447 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 1448 1449 if(U_FAILURE(status)) 1450 { 1451 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 1452 return FALSE; 1453 } 1454 1455 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 1456 sourcelen, targ-junkout); 1457 if(VERBOSITY) 1458 { 1459 char junk[999]; 1460 char offset_str[999]; 1461 1462 UChar *ptr; 1463 1464 junk[0] = 0; 1465 offset_str[0] = 0; 1466 1467 for(ptr = junkout;ptr<targ;ptr++) 1468 { 1469 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 1470 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 1471 } 1472 1473 log_verbose(junk); 1474 1475 if ( checkOffsets ) 1476 { 1477 log_verbose("\nOffsets:"); 1478 log_verbose(offset_str); 1479 } 1480 log_verbose("\n"); 1481 } 1482 ucnv_close(conv); 1483 1484 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 1485 1486 if (checkOffsets && (expectOffsets != 0)) 1487 { 1488 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 1489 1490 log_err("did not get the expected offsets. %s",gNuConvTestName); 1491 for(p=junkout;p<targ;p++) 1492 log_err("%d, ", junokout[p-junkout]); 1493 log_err("\nExpected: "); 1494 for(i=0; i<(targ-junkout); i++) 1495 log_err("%d,", expectOffsets[i]); 1496 log_err(""); 1497 for(i=0; i<(targ-junkout); i++) 1498 log_err("%X,", junkout[i]); 1499 log_err(""); 1500 for(i=0; i<(src-(const char *)source); i++) 1501 log_err("%X,", (unsigned char)source[i]); 1502 } 1503 } 1504 1505 if(!memcmp(junkout, expect, expectlen*2)) 1506 { 1507 log_verbose("Matches!\n"); 1508 return TRUE; 1509 } 1510 else 1511 { 1512 log_err("String does not match. %s\n", gNuConvTestName); 1513 log_verbose("String does not match. %s\n", gNuConvTestName); 1514 log_info("\nGot:"); 1515 printUSeq(junkout, expectlen); 1516 log_info("\nExpected:"); 1517 printUSeq(expect, expectlen); 1518 return FALSE; 1519 } 1520 } 1521 1522 1523 static void TestResetBehaviour(void){ 1524 #if !UCONFIG_NO_LEGACY_CONVERSION 1525 log_verbose("Testing Reset for DBCS and MBCS\n"); 1526 { 1527 static const UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; 1528 static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 1529 static const int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; 1530 1531 1532 static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8}; 1533 static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7}; 1534 static const int32_t offsets1[] = { 0,2,4,6}; 1535 1536 /*DBCS*/ 1537 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1538 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1539 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 1540 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1541 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1542 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 1543 1544 if(!testConvertToU(expected1, sizeof(expected1), 1545 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1546 offsets1, TRUE)) 1547 log_err("ibm-1363 -> did not match.\n"); 1548 /*MBCS*/ 1549 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1550 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1551 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 1552 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1553 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1554 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 1555 1556 if(!testConvertToU(expected1, sizeof(expected1), 1557 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1558 offsets1, TRUE)) 1559 log_err("ibm-1363 -> did not match.\n"); 1560 1561 } 1562 1563 log_verbose("Testing Reset for ISO-2022-jp\n"); 1564 { 1565 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1566 1567 static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 1568 0x31,0x1A, 0x32}; 1569 1570 1571 static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; 1572 1573 1574 static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; 1575 static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 1576 0x31,0x1A, 0x32}; 1577 static const int32_t offsets1[] = { 3,5,10,11,12}; 1578 1579 /*iso-2022-jp*/ 1580 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1581 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1582 log_err("u-> not match.\n"); 1583 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1584 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1585 log_err("u-> not match.\n"); 1586 1587 if(!testConvertToU(expected1, sizeof(expected1), 1588 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1589 offsets1, TRUE)) 1590 log_err("iso-2022-jp -> did not match.\n"); 1591 1592 } 1593 1594 log_verbose("Testing Reset for ISO-2022-cn\n"); 1595 { 1596 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1597 1598 static const uint8_t expected[] = { 1599 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 1600 0x36, 0x21, 1601 0x0f, 0x31, 1602 0x1A, 1603 0x32 1604 }; 1605 1606 1607 static const int32_t offsets[] = { 1608 0, 0, 0, 0, 0, 0, 0, 1609 1, 1, 1610 2, 2, 1611 3, 1612 5, }; 1613 1614 UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; 1615 static const uint8_t expected1[] = { 1616 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 1617 0x36, 0x21, 1618 0x1B, 0x24, 0x29, 0x47, 0x24, 0x22, 1619 0x0f, 0x1A, 1620 0x32 1621 }; 1622 static const int32_t offsets1[] = { 5,7,13,16,17}; 1623 1624 /*iso-2022-CN*/ 1625 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1626 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1627 log_err("u-> not match.\n"); 1628 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1629 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1630 log_err("u-> not match.\n"); 1631 1632 if(!testConvertToU(expected1, sizeof(expected1), 1633 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1634 offsets1, TRUE)) 1635 log_err("iso-2022-cn -> did not match.\n"); 1636 } 1637 1638 log_verbose("Testing Reset for ISO-2022-kr\n"); 1639 { 1640 UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1641 1642 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43, 1643 0x0E, 0x6C, 0x69, 1644 0x0f, 0x1A, 1645 0x0e, 0x6F, 0x4B, 1646 0x0F, 0x31, 1647 0x1A, 1648 0x32 }; 1649 1650 static const int32_t offsets[] = {-1, -1, -1, -1, 1651 0, 0, 0, 1652 1, 1, 1653 3, 3, 3, 1654 4, 4, 1655 5, 1656 7, 1657 }; 1658 static const UChar sampleText1[] = { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032}; 1659 1660 static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43, 1661 0x0E, 0x6C, 0x69, 1662 0x0f, 0x41, 1663 0x0e, 0x6F, 0x4B, 1664 0x0F, 0x31, 1665 0x42, 1666 0x32 }; 1667 1668 static const int32_t offsets1[] = { 1669 5, 8, 10, 1670 13, 14, 15 1671 1672 }; 1673 /*iso-2022-kr*/ 1674 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1675 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1676 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 1677 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1678 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1679 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 1680 if(!testConvertToU(expected1, sizeof(expected1), 1681 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1682 offsets1, TRUE)) 1683 log_err("iso-2022-kr -> did not match.\n"); 1684 } 1685 1686 log_verbose("Testing Reset for HZ\n"); 1687 { 1688 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1689 1690 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B, 1691 0x7E, 0x7D, 0x1A, 1692 0x7E, 0x7B, 0x36, 0x21, 1693 0x7E, 0x7D, 0x31, 1694 0x1A, 1695 0x32 }; 1696 1697 1698 static const int32_t offsets[] = {0,0,0,0, 1699 1,1,1, 1700 3,3,3,3, 1701 4,4,4, 1702 5, 1703 7,}; 1704 static const UChar sampleText1[] = { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032}; 1705 1706 static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B, 1707 0x7E, 0x7D, 0x35, 1708 0x7E, 0x7B, 0x36, 0x21, 1709 0x7E, 0x7D, 0x31, 1710 0x41, 1711 0x32 }; 1712 1713 1714 static const int32_t offsets1[] = {2,6,9,13,14,15 1715 }; 1716 1717 /*hz*/ 1718 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1719 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1720 log_err("u-> not match.\n"); 1721 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1722 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1723 log_err("u-> not match.\n"); 1724 if(!testConvertToU(expected1, sizeof(expected1), 1725 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1726 offsets1, TRUE)) 1727 log_err("hz -> did not match.\n"); 1728 } 1729 #endif 1730 1731 /*UTF-8*/ 1732 log_verbose("Testing for UTF8\n"); 1733 { 1734 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032}; 1735 int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 1736 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 1737 0x04, 0x06 }; 1738 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31, 1739 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32}; 1740 1741 1742 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; 1743 /*UTF-8*/ 1744 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1745 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1746 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1747 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1748 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1749 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1750 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1751 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1752 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1753 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1754 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1755 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1756 if(!testConvertToU(expected, sizeof(expected), 1757 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1758 log_err("UTF8 -> did not match.\n"); 1759 if(!testConvertToU(expected, sizeof(expected), 1760 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1761 log_err("UTF8 -> did not match.\n"); 1762 if(!testConvertToU(expected, sizeof(expected), 1763 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) 1764 log_err("UTF8 -> did not match.\n"); 1765 if(!testConvertToU(expected, sizeof(expected), 1766 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) 1767 log_err("UTF8 -> did not match.\n"); 1768 1769 } 1770 1771 } 1772 1773 /* Test that U_TRUNCATED_CHAR_FOUND is set. */ 1774 static void 1775 doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) { 1776 UConverter *cnv; 1777 1778 UChar buffer[2]; 1779 UChar *target, *targetLimit; 1780 const char *source, *sourceLimit; 1781 1782 UErrorCode errorCode; 1783 1784 errorCode=U_ZERO_ERROR; 1785 cnv=ucnv_open(cnvName, &errorCode); 1786 if(U_FAILURE(errorCode)) { 1787 log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode)); 1788 return; 1789 } 1790 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 1791 if(U_FAILURE(errorCode)) { 1792 log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n", 1793 cnvName, u_errorName(errorCode)); 1794 ucnv_close(cnv); 1795 return; 1796 } 1797 1798 source=(const char *)bytes; 1799 sourceLimit=source+length; 1800 target=buffer; 1801 targetLimit=buffer+LENGTHOF(buffer); 1802 1803 /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */ 1804 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode); 1805 if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) { 1806 log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n", 1807 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); 1808 } 1809 1810 errorCode=U_ZERO_ERROR; 1811 source=sourceLimit; 1812 target=buffer; 1813 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); 1814 if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) { 1815 log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n", 1816 cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer)); 1817 } 1818 1819 /* 2. input bytes with flush=TRUE */ 1820 ucnv_resetToUnicode(cnv); 1821 1822 errorCode=U_ZERO_ERROR; 1823 source=(const char *)bytes; 1824 target=buffer; 1825 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); 1826 if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) { 1827 log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n", 1828 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); 1829 } 1830 1831 1832 ucnv_close(cnv); 1833 } 1834 1835 static void 1836 TestTruncated() { 1837 static const struct { 1838 const char *cnvName; 1839 uint8_t bytes[8]; /* partial input bytes resulting in no output */ 1840 int32_t length; 1841 } testCases[]={ 1842 { "IMAP-mailbox-name", { 0x26 }, 1 }, /* & */ 1843 { "IMAP-mailbox-name", { 0x26, 0x42 }, 2 }, /* &B */ 1844 { "IMAP-mailbox-name", { 0x26, 0x42, 0x42 }, 3 }, /* &BB */ 1845 { "IMAP-mailbox-name", { 0x26, 0x41, 0x41 }, 3 }, /* &AA */ 1846 1847 { "UTF-7", { 0x2b, 0x42 }, 2 }, /* +B */ 1848 { "UTF-8", { 0xd1 }, 1 }, 1849 1850 { "UTF-16BE", { 0x4e }, 1 }, 1851 { "UTF-16LE", { 0x4e }, 1 }, 1852 { "UTF-16", { 0x4e }, 1 }, 1853 { "UTF-16", { 0xff }, 1 }, 1854 { "UTF-16", { 0xfe, 0xff, 0x4e }, 3 }, 1855 1856 { "UTF-32BE", { 0, 0, 0x4e }, 3 }, 1857 { "UTF-32LE", { 0x4e }, 1 }, 1858 { "UTF-32", { 0, 0, 0x4e }, 3 }, 1859 { "UTF-32", { 0xff }, 1 }, 1860 { "UTF-32", { 0, 0, 0xfe, 0xff, 0 }, 5 }, 1861 { "SCSU", { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */ 1862 1863 #if !UCONFIG_NO_LEGACY_CONVERSION 1864 { "BOCU-1", { 0xd5 }, 1 }, 1865 1866 { "Shift-JIS", { 0xe0 }, 1 }, 1867 1868 { "ibm-939", { 0x0e, 0x41 }, 2 } /* SO 0x41 */ 1869 #else 1870 { "BOCU-1", { 0xd5 }, 1 ,} 1871 #endif 1872 }; 1873 int32_t i; 1874 1875 for(i=0; i<LENGTHOF(testCases); ++i) { 1876 doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length); 1877 } 1878 } 1879 1880 typedef struct NameRange { 1881 const char *name; 1882 UChar32 start, end, start2, end2, notStart, notEnd; 1883 } NameRange; 1884 1885 static void 1886 TestUnicodeSet() { 1887 UErrorCode errorCode; 1888 UConverter *cnv; 1889 USet *set; 1890 const char *name; 1891 int32_t i, count; 1892 1893 static const char *const completeSetNames[]={ 1894 "UTF-7", 1895 "UTF-8", 1896 "UTF-16", 1897 "UTF-16BE", 1898 "UTF-16LE", 1899 "UTF-32", 1900 "UTF-32BE", 1901 "UTF-32LE", 1902 "SCSU", 1903 "BOCU-1", 1904 "CESU-8", 1905 #if !UCONFIG_NO_LEGACY_CONVERSION 1906 "gb18030", 1907 #endif 1908 "IMAP-mailbox-name" 1909 }; 1910 1911 static const char *const lmbcsNames[]={ 1912 #if !UCONFIG_NO_LEGACY_CONVERSION 1913 "LMBCS-1", 1914 "LMBCS-2", 1915 "LMBCS-3", 1916 "LMBCS-4", 1917 "LMBCS-5", 1918 "LMBCS-6", 1919 "LMBCS-8", 1920 "LMBCS-11", 1921 "LMBCS-16", 1922 "LMBCS-17", 1923 "LMBCS-18", 1924 "LMBCS-19" 1925 #endif 1926 }; 1927 1928 static const NameRange nameRanges[]={ 1929 { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff }, 1930 #if !UCONFIG_NO_LEGACY_CONVERSION 1931 { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff }, 1932 #endif 1933 { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff }, 1934 #if !UCONFIG_NO_LEGACY_CONVERSION 1935 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }, 1936 { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff }, 1937 /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */ 1938 { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff } 1939 #else 1940 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff } 1941 #endif 1942 }; 1943 1944 /* open an empty set */ 1945 set=uset_open(1, 0); 1946 1947 count=ucnv_countAvailable(); 1948 for(i=0; i<count; ++i) { 1949 errorCode=U_ZERO_ERROR; 1950 name=ucnv_getAvailableName(i); 1951 cnv=ucnv_open(name, &errorCode); 1952 if(U_FAILURE(errorCode)) { 1953 log_data_err("error: unable to open converter %s - %s\n", 1954 name, u_errorName(errorCode)); 1955 continue; 1956 } 1957 1958 uset_clear(set); 1959 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 1960 if(U_FAILURE(errorCode)) { 1961 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 1962 name, u_errorName(errorCode)); 1963 } else if(uset_size(set)==0) { 1964 log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name); 1965 } 1966 1967 ucnv_close(cnv); 1968 } 1969 1970 /* test converters that are known to convert all of Unicode (except maybe for surrogates) */ 1971 for(i=0; i<LENGTHOF(completeSetNames); ++i) { 1972 errorCode=U_ZERO_ERROR; 1973 name=completeSetNames[i]; 1974 cnv=ucnv_open(name, &errorCode); 1975 if(U_FAILURE(errorCode)) { 1976 log_data_err("error: unable to open converter %s - %s\n", 1977 name, u_errorName(errorCode)); 1978 continue; 1979 } 1980 1981 uset_clear(set); 1982 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 1983 if(U_FAILURE(errorCode)) { 1984 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 1985 name, u_errorName(errorCode)); 1986 } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) { 1987 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name); 1988 } 1989 1990 ucnv_close(cnv); 1991 } 1992 1993 /* test LMBCS variants which convert all of Unicode except for U+F6xx */ 1994 for(i=0; i<LENGTHOF(lmbcsNames); ++i) { 1995 errorCode=U_ZERO_ERROR; 1996 name=lmbcsNames[i]; 1997 cnv=ucnv_open(name, &errorCode); 1998 if(U_FAILURE(errorCode)) { 1999 log_data_err("error: unable to open converter %s - %s\n", 2000 name, u_errorName(errorCode)); 2001 continue; 2002 } 2003 2004 uset_clear(set); 2005 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2006 if(U_FAILURE(errorCode)) { 2007 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2008 name, u_errorName(errorCode)); 2009 } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) { 2010 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name); 2011 } 2012 2013 ucnv_close(cnv); 2014 } 2015 2016 /* test specific sets */ 2017 for(i=0; i<LENGTHOF(nameRanges); ++i) { 2018 errorCode=U_ZERO_ERROR; 2019 name=nameRanges[i].name; 2020 cnv=ucnv_open(name, &errorCode); 2021 if(U_FAILURE(errorCode)) { 2022 log_data_err("error: unable to open converter %s - %s\n", 2023 name, u_errorName(errorCode)); 2024 continue; 2025 } 2026 2027 uset_clear(set); 2028 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2029 if(U_FAILURE(errorCode)) { 2030 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2031 name, u_errorName(errorCode)); 2032 } else if( 2033 !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) || 2034 (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2)) 2035 ) { 2036 log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name); 2037 } else if(nameRanges[i].notStart>=0) { 2038 /* simulate containsAny() with the C API */ 2039 uset_complement(set); 2040 if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) { 2041 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name); 2042 } 2043 } 2044 2045 ucnv_close(cnv); 2046 } 2047 2048 errorCode = U_ZERO_ERROR; 2049 ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode); 2050 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 2051 log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode)); 2052 } 2053 errorCode = U_PARSE_ERROR; 2054 /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */ 2055 ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode); 2056 if (errorCode != U_PARSE_ERROR) { 2057 log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode)); 2058 } 2059 2060 uset_close(set); 2061 } 2062