1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2013, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /***************************************************************************** 7 * 8 * File CCONVTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Madhu Katragadda 7/7/2000 Converter Tests for extended code coverage 13 ****************************************************************************** 14 */ 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include "unicode/uloc.h" 19 #include "unicode/ucnv.h" 20 #include "unicode/utypes.h" 21 #include "unicode/ustring.h" 22 #include "unicode/uset.h" 23 #include "cintltst.h" 24 25 #define MAX_LENGTH 999 26 27 #define UNICODE_LIMIT 0x10FFFF 28 #define SURROGATE_HIGH_START 0xD800 29 #define SURROGATE_LOW_END 0xDFFF 30 31 static int32_t gInBufferSize = 0; 32 static int32_t gOutBufferSize = 0; 33 static char gNuConvTestName[1024]; 34 35 #define nct_min(x,y) ((x<y) ? x : y) 36 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 37 38 static void printSeq(const unsigned char* a, int len); 39 static void printSeqErr(const unsigned char* a, int len); 40 static void printUSeq(const UChar* a, int len); 41 static void printUSeqErr(const UChar* a, int len); 42 static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 43 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus); 44 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen, 45 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus); 46 47 static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 48 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset); 49 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 50 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset); 51 52 static void setNuConvTestName(const char *codepage, const char *direction) 53 { 54 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 55 codepage, 56 direction, 57 (int)gInBufferSize, 58 (int)gOutBufferSize); 59 } 60 61 62 static void TestSurrogateBehaviour(void); 63 static void TestErrorBehaviour(void); 64 65 #if !UCONFIG_NO_LEGACY_CONVERSION 66 static void TestToUnicodeErrorBehaviour(void); 67 static void TestGetNextErrorBehaviour(void); 68 #endif 69 70 static void TestRegressionUTF8(void); 71 static void TestRegressionUTF32(void); 72 static void TestAvailableConverters(void); 73 static void TestFlushInternalBuffer(void); /*for improved code coverage in ucnv_cnv.c*/ 74 static void TestResetBehaviour(void); 75 static void TestTruncated(void); 76 static void TestUnicodeSet(void); 77 78 static void TestWithBufferSize(int32_t osize, int32_t isize); 79 80 81 static void printSeq(const unsigned char* a, int len) 82 { 83 int i=0; 84 log_verbose("\n{"); 85 while (i<len) 86 log_verbose("0x%02X ", a[i++]); 87 log_verbose("}\n"); 88 } 89 90 static void printUSeq(const UChar* a, int len) 91 { 92 int i=0; 93 log_verbose("\n{"); 94 while (i<len) 95 log_verbose("%0x04X ", a[i++]); 96 log_verbose("}\n"); 97 } 98 99 static void printSeqErr(const unsigned char* a, int len) 100 { 101 int i=0; 102 fprintf(stderr, "\n{"); 103 while (i<len) fprintf(stderr, "0x%02X ", a[i++]); 104 fprintf(stderr, "}\n"); 105 } 106 107 static void printUSeqErr(const UChar* a, int len) 108 { 109 int i=0; 110 fprintf(stderr, "\n{"); 111 while (i<len) 112 fprintf(stderr, "0x%04X ", a[i++]); 113 fprintf(stderr,"}\n"); 114 } 115 116 void addExtraTests(TestNode** root); 117 118 void addExtraTests(TestNode** root) 119 { 120 addTest(root, &TestSurrogateBehaviour, "tsconv/ncnvtst/TestSurrogateBehaviour"); 121 addTest(root, &TestErrorBehaviour, "tsconv/ncnvtst/TestErrorBehaviour"); 122 123 #if !UCONFIG_NO_LEGACY_CONVERSION 124 addTest(root, &TestToUnicodeErrorBehaviour, "tsconv/ncnvtst/ToUnicodeErrorBehaviour"); 125 addTest(root, &TestGetNextErrorBehaviour, "tsconv/ncnvtst/TestGetNextErrorBehaviour"); 126 #endif 127 128 addTest(root, &TestAvailableConverters, "tsconv/ncnvtst/TestAvailableConverters"); 129 addTest(root, &TestFlushInternalBuffer, "tsconv/ncnvtst/TestFlushInternalBuffer"); 130 addTest(root, &TestResetBehaviour, "tsconv/ncnvtst/TestResetBehaviour"); 131 addTest(root, &TestRegressionUTF8, "tsconv/ncnvtst/TestRegressionUTF8"); 132 addTest(root, &TestRegressionUTF32, "tsconv/ncnvtst/TestRegressionUTF32"); 133 addTest(root, &TestTruncated, "tsconv/ncnvtst/TestTruncated"); 134 addTest(root, &TestUnicodeSet, "tsconv/ncnvtst/TestUnicodeSet"); 135 } 136 137 /*test surrogate behaviour*/ 138 static void TestSurrogateBehaviour(){ 139 log_verbose("Testing for SBCS and LATIN_1\n"); 140 { 141 UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032}; 142 const uint8_t expected[] = {0x31, 0x1a, 0x32}; 143 144 #if !UCONFIG_NO_LEGACY_CONVERSION 145 /*SBCS*/ 146 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 147 expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR)) 148 log_err("u-> ibm-920 [UCNV_SBCS] not match.\n"); 149 #endif 150 151 /*LATIN_1*/ 152 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 153 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR )) 154 log_err("u-> LATIN_1 not match.\n"); 155 156 } 157 158 #if !UCONFIG_NO_LEGACY_CONVERSION 159 log_verbose("Testing for DBCS and MBCS\n"); 160 { 161 UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; 162 const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 163 int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; 164 165 /*DBCS*/ 166 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 167 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) 168 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 169 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 170 expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR)) 171 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 172 /*MBCS*/ 173 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 174 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) 175 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 176 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 177 expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR)) 178 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 179 } 180 181 log_verbose("Testing for ISO-2022-jp\n"); 182 { 183 UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 184 185 const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 186 0x31,0x1A, 0x32}; 187 188 189 int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; 190 191 // iso-2022-jp android-change 192 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 193 expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR)) 194 log_err("u-> not match.\n"); 195 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 196 expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR)) 197 log_err("u-> not match.\n"); 198 } 199 200 /* BEGIN android-removed */ 201 /* To save space, Android does not build full ISO-2022-CN tables. 202 We skip the tests for ISO-2022-CN. */ 203 /* 204 log_verbose("Testing for ISO-2022-cn\n"); 205 { 206 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 207 208 static const uint8_t expected[] = { 209 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 210 0x36, 0x21, 211 0x0F, 0x31, 212 0x1A, 213 0x32 214 }; 215 216 217 218 static const int32_t offsets[] = { 219 0, 0, 0, 0, 0, 0, 0, 220 1, 1, 221 2, 2, 222 3, 223 5, }; 224 225 // iso-2022-CN android-change 226 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 227 expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR)) 228 log_err("u-> not match.\n"); 229 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 230 expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR)) 231 log_err("u-> not match.\n"); 232 } 233 */ 234 /* END android-removed */ 235 236 log_verbose("Testing for ISO-2022-kr\n"); 237 { 238 static const UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 239 240 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43, 241 0x0E, 0x6C, 0x69, 242 0x0f, 0x1A, 243 0x0e, 0x6F, 0x4B, 244 0x0F, 0x31, 245 0x1A, 246 0x32 }; 247 248 static const int32_t offsets[] = {-1, -1, -1, -1, 249 0, 0, 0, 250 1, 1, 251 3, 3, 3, 252 4, 4, 253 5, 254 7, 255 }; 256 257 // iso-2022-kr android-change 258 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 259 expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR)) 260 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 261 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 262 expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR)) 263 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 264 } 265 266 log_verbose("Testing for HZ\n"); 267 { 268 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 269 270 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B, 271 0x7E, 0x7D, 0x1A, 272 0x7E, 0x7B, 0x36, 0x21, 273 0x7E, 0x7D, 0x31, 274 0x1A, 275 0x32 }; 276 277 278 static const int32_t offsets[] = {0,0,0,0, 279 1,1,1, 280 3,3,3,3, 281 4,4,4, 282 5, 283 7,}; 284 285 /*hz*/ 286 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 287 expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR)) 288 log_err("u-> HZ not match.\n"); 289 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 290 expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR)) 291 log_err("u-> HZ not match.\n"); 292 } 293 #endif 294 295 /*UTF-8*/ 296 log_verbose("Testing for UTF8\n"); 297 { 298 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032}; 299 static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 300 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 301 0x04, 0x06 }; 302 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31, 303 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32}; 304 305 306 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; 307 /*UTF-8*/ 308 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 309 expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR )) 310 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 311 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 312 expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR )) 313 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 314 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 315 expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR )) 316 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 317 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 318 expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR )) 319 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 320 321 if(!convertToU(expected, sizeof(expected), 322 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, TRUE, U_ZERO_ERROR )) 323 log_err("UTF8 -> u did not match.\n"); 324 if(!convertToU(expected, sizeof(expected), 325 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, FALSE, U_ZERO_ERROR )) 326 log_err("UTF8 -> u did not match.\n"); 327 if(!convertToU(expected, sizeof(expected), 328 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR )) 329 log_err("UTF8 ->u did not match.\n"); 330 if(!convertToU(expected, sizeof(expected), 331 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR )) 332 log_err("UTF8 -> u did not match.\n"); 333 334 } 335 } 336 337 /*test various error behaviours*/ 338 static void TestErrorBehaviour(){ 339 log_verbose("Testing for SBCS and LATIN_1\n"); 340 { 341 static const UChar sampleText[] = { 0x0031, 0xd801}; 342 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 343 static const uint8_t expected0[] = { 0x31}; 344 static const uint8_t expected[] = { 0x31, 0x1a}; 345 static const uint8_t expected2[] = { 0x31, 0x1a, 0x32}; 346 347 #if !UCONFIG_NO_LEGACY_CONVERSION 348 /*SBCS*/ 349 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 350 expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR)) 351 log_err("u-> ibm-920 [UCNV_SBCS] \n"); 352 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 353 expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR)) 354 log_err("u-> ibm-920 [UCNV_SBCS] \n"); 355 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 356 expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR)) 357 log_err("u-> ibm-920 [UCNV_SBCS] did not match\n"); 358 #endif 359 360 /*LATIN_1*/ 361 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 362 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) 363 log_err("u-> LATIN_1 is supposed to fail\n"); 364 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 365 expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR)) 366 log_err("u-> LATIN_1 is supposed to fail\n"); 367 368 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 369 expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) 370 log_err("u-> LATIN_1 did not match\n"); 371 } 372 373 #if !UCONFIG_NO_LEGACY_CONVERSION 374 log_verbose("Testing for DBCS and MBCS\n"); 375 { 376 static const UChar sampleText[] = { 0x00a1, 0xd801}; 377 static const uint8_t expected[] = { 0xa2, 0xae}; 378 static const int32_t offsets[] = { 0x00, 0x00}; 379 static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0}; 380 static const int32_t offsetsSUB[] = { 0x00, 0x00, 0x01, 0x01}; 381 382 static const UChar sampleText2[] = { 0x00a1, 0xd801, 0x00a4}; 383 static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 384 static const int32_t offsets2[] = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02}; 385 386 static const UChar sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01}; 387 static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0}; 388 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x01, 0x02, 0x02}; 389 390 static const UChar sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01}; 391 static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe}; 392 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 }; 393 394 /*DBCS*/ 395 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 396 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 397 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 398 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 399 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 400 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 401 402 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 403 expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR)) 404 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 405 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 406 expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 407 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 408 409 410 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 411 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 412 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); 413 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 414 expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR)) 415 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); 416 417 /*MBCS*/ 418 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 419 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 420 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 421 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 422 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 423 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 424 425 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 426 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 427 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 428 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 429 expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) 430 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 431 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 432 expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR)) 433 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 434 435 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 436 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR)) 437 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 438 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 439 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR)) 440 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 441 442 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 443 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, TRUE, U_ZERO_ERROR)) 444 log_err("u-> euc-jp [UCNV_MBCS] \n"); 445 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 446 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, FALSE, U_ZERO_ERROR)) 447 log_err("u-> euc-jp [UCNV_MBCS] \n"); 448 } 449 450 // iso-2022-jp 451 log_verbose("Testing for iso-2022-jp\n"); 452 { 453 static const UChar sampleText[] = { 0x0031, 0xd801}; 454 static const uint8_t expected[] = { 0x31}; 455 static const uint8_t expectedSUB[] = { 0x31, 0x1a}; 456 static const int32_t offsets[] = { 0x00, 1}; 457 458 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 459 static const uint8_t expected2[] = { 0x31,0x1A,0x32}; 460 static const int32_t offsets2[] = { 0x00,0x01,0x02}; 461 462 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 463 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a}; 464 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 }; 465 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 466 expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR)) 467 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 468 // Google Patch: Change expected result code from U_AMBIGUOUS_ALIAS_WARNING to U_ZERO_ERROR. 469 // Introduced with ICU 51.1. 470 // Markus says this warning can occur when the set of available converters is changed, 471 // and that it's not worth looking into in further detail. 472 // Note: public ICU was U_ZERO_ERROR prior to ICU 51. 473 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 474 expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_ZERO_ERROR)) 475 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 476 // End of Google Patch. 477 478 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 479 expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR)) 480 log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n"); 481 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 482 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) 483 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); 484 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 485 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) 486 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); 487 488 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 489 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR)) 490 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 491 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 492 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR)) 493 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 494 } 495 496 /* BEGIN android-removed */ 497 /* To save space, Android does not build full ISO-2022-CN tables. 498 We skip the tests for ISO-2022-CN. */ 499 /* 500 // iso-2022-cn android-change 501 log_verbose("Testing for iso-2022-cn\n"); 502 { 503 static const UChar sampleText[] = { 0x0031, 0xd801}; 504 static const uint8_t expected[] = { 0x31}; 505 static const uint8_t expectedSUB[] = { 0x31, 0x1A}; 506 static const int32_t offsets[] = { 0x00, 1}; 507 508 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 509 static const uint8_t expected2[] = { 0x31, 0x1A,0x32}; 510 static const int32_t offsets2[] = { 0x00, 0x01,0x02}; 511 512 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 513 static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A}; 514 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x02 }; 515 516 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 517 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a }; 518 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 }; 519 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 520 expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR)) 521 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 522 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 523 expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR)) 524 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 525 526 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 527 expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR)) 528 log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n"); 529 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 530 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) 531 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); 532 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 533 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) 534 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); 535 536 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 537 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR)) 538 log_err("u->iso-2022-cn [UCNV_MBCS] \n"); 539 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 540 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR)) 541 log_err("u-> iso-2022-cn[UCNV_MBCS] \n"); 542 543 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 544 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR)) 545 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 546 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 547 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR)) 548 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 549 } 550 */ 551 /* END android-removed */ 552 553 // iso-2022-kr android-change 554 log_verbose("Testing for iso-2022-kr\n"); 555 { 556 static const UChar sampleText[] = { 0x0031, 0xd801}; 557 static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31}; 558 static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A}; 559 static const int32_t offsets[] = { -1, -1, -1, -1, 0x00, 1}; 560 561 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 562 static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32}; 563 static const int32_t offsets2[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02}; 564 565 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 566 static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43, 0x51, 0x50, 0x1A }; 567 static const int32_t offsets3MBCS[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02, 0x02 }; 568 569 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 570 expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR)) 571 log_err("u-> iso-2022-kr [UCNV_MBCS] \n"); 572 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 573 expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR)) 574 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 575 576 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 577 expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR)) 578 log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n"); 579 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 580 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) 581 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); 582 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 583 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) 584 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); 585 586 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 587 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR)) 588 log_err("u->iso-2022-kr [UCNV_MBCS] \n"); 589 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 590 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR)) 591 log_err("u-> iso-2022-kr[UCNV_MBCS] \n"); 592 } 593 594 /*HZ*/ 595 log_verbose("Testing for HZ\n"); 596 { 597 static const UChar sampleText[] = { 0x0031, 0xd801}; 598 static const uint8_t expected[] = { 0x7e, 0x7d, 0x31}; 599 static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A}; 600 static const int32_t offsets[] = { 0x00, 0x00, 0x00, 1}; 601 602 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 603 static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31, 0x1A, 0x32 }; 604 static const int32_t offsets2[] = { 0x00, 0x00, 0x00, 0x01, 0x02 }; 605 606 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 607 static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50, 0x1A }; 608 static const int32_t offsets3MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x02}; 609 610 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 611 static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a }; 612 static const int32_t offsets4MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 }; 613 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 614 expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR)) 615 log_err("u-> HZ [UCNV_MBCS] \n"); 616 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 617 expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR)) 618 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 619 620 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 621 expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR)) 622 log_err("u->HZ[UCNV_DBCS] did not match\n"); 623 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 624 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) 625 log_err("u-> HZ [UCNV_DBCS] did not match\n"); 626 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 627 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) 628 log_err("u-> HZ [UCNV_DBCS] did not match\n"); 629 630 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 631 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR)) 632 log_err("u->HZ [UCNV_MBCS] \n"); 633 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 634 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR)) 635 log_err("u-> HZ[UCNV_MBCS] \n"); 636 637 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 638 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR)) 639 log_err("u-> HZ [UCNV_MBCS] \n"); 640 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 641 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR)) 642 log_err("u-> HZ [UCNV_MBCS] \n"); 643 } 644 #endif 645 } 646 647 #if !UCONFIG_NO_LEGACY_CONVERSION 648 /*test different convertToUnicode error behaviours*/ 649 static void TestToUnicodeErrorBehaviour() 650 { 651 log_verbose("Testing error conditions for DBCS\n"); 652 { 653 uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04}; 654 const UChar expected[] = { 0x00a1 }; 655 656 if(!convertToU(sampleText, sizeof(sampleText), 657 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, TRUE, U_AMBIGUOUS_ALIAS_WARNING )) 658 log_err("DBCS (ibm-1363)->Unicode did not match.\n"); 659 if(!convertToU(sampleText, sizeof(sampleText), 660 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING )) 661 log_err("DBCS (ibm-1363)->Unicode with flush = false did not match.\n"); 662 } 663 log_verbose("Testing error conditions for SBCS\n"); 664 { 665 uint8_t sampleText[] = { 0xa2, 0xFF}; 666 const UChar expected[] = { 0x00c2 }; 667 668 /* uint8_t sampleText2[] = { 0xa2, 0x70 }; 669 const UChar expected2[] = { 0x0073 };*/ 670 671 if(!convertToU(sampleText, sizeof(sampleText), 672 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, TRUE, U_ZERO_ERROR )) 673 log_err("SBCS (ibm-1051)->Unicode did not match.\n"); 674 if(!convertToU(sampleText, sizeof(sampleText), 675 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, FALSE, U_ZERO_ERROR )) 676 log_err("SBCS (ibm-1051)->Unicode with flush = false did not match.\n"); 677 678 } 679 } 680 681 static void TestGetNextErrorBehaviour(){ 682 /*Test for unassigned character*/ 683 #define INPUT_SIZE 1 684 static const char input1[INPUT_SIZE]={ 0x70 }; 685 const char* source=(const char*)input1; 686 UErrorCode err=U_ZERO_ERROR; 687 UChar32 c=0; 688 UConverter *cnv=ucnv_open("ibm-424", &err); 689 if(U_FAILURE(err)) { 690 log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err)); 691 return; 692 } 693 c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err); 694 if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){ 695 log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n", myErrorName(err), c); 696 } 697 ucnv_close(cnv); 698 } 699 #endif 700 701 #define MAX_UTF16_LEN 2 702 #define MAX_UTF8_LEN 4 703 704 /*Regression test for utf8 converter*/ 705 static void TestRegressionUTF8(){ 706 UChar32 currCh = 0; 707 int32_t offset8; 708 int32_t offset16; 709 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar)); 710 uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH); 711 712 while (currCh <= UNICODE_LIMIT) { 713 offset16 = 0; 714 offset8 = 0; 715 while(currCh <= UNICODE_LIMIT 716 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN) 717 && offset8 < (MAX_LENGTH - MAX_UTF8_LEN)) 718 { 719 if (currCh == SURROGATE_HIGH_START) { 720 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ 721 } 722 UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh); 723 UTF8_APPEND_CHAR_SAFE(utf8, offset8, MAX_LENGTH, currCh); 724 currCh++; 725 } 726 if(!convertFromU(standardForm, offset16, 727 utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) { 728 log_err("Unicode->UTF8 did not match.\n"); 729 } 730 if(!convertToU(utf8, offset8, 731 standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) { 732 log_err("UTF8->Unicode did not match.\n"); 733 } 734 } 735 736 free(standardForm); 737 free(utf8); 738 739 { 740 static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 }; 741 static const UChar expected[] = { 0x0301, 0x0300 }; 742 UConverter *conv8; 743 UErrorCode err = U_ZERO_ERROR; 744 UChar pivotBuffer[100]; 745 const UChar* const pivEnd = pivotBuffer + 100; 746 const char* srcBeg; 747 const char* srcEnd; 748 UChar* pivBeg; 749 750 conv8 = ucnv_open("UTF-8", &err); 751 752 srcBeg = src8; 753 pivBeg = pivotBuffer; 754 srcEnd = src8 + 3; 755 ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 756 if (srcBeg != srcEnd) { 757 log_err("Did not consume whole buffer on first call.\n"); 758 } 759 760 srcEnd = src8 + 4; 761 ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 762 if (srcBeg != srcEnd) { 763 log_err("Did not consume whole buffer on second call.\n"); 764 } 765 766 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 767 log_err("Did not get expected results for UTF-8.\n"); 768 } 769 ucnv_close(conv8); 770 } 771 } 772 773 #define MAX_UTF32_LEN 1 774 775 static void TestRegressionUTF32(){ 776 UChar32 currCh = 0; 777 int32_t offset32; 778 int32_t offset16; 779 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar)); 780 UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32)); 781 782 while (currCh <= UNICODE_LIMIT) { 783 offset16 = 0; 784 offset32 = 0; 785 while(currCh <= UNICODE_LIMIT 786 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN) 787 && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN)) 788 { 789 if (currCh == SURROGATE_HIGH_START) { 790 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ 791 } 792 UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh); 793 UTF32_APPEND_CHAR_SAFE(utf32, offset32, MAX_LENGTH, currCh); 794 currCh++; 795 } 796 if(!convertFromU(standardForm, offset16, 797 (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) { 798 log_err("Unicode->UTF32 did not match.\n"); 799 } 800 if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32), 801 standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) { 802 log_err("UTF32->Unicode did not match.\n"); 803 } 804 } 805 free(standardForm); 806 free(utf32); 807 808 { 809 /* Check for lone surrogate error handling. */ 810 static const UChar sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 }; 811 static const UChar sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 }; 812 static const uint8_t expectedUTF32BE[] = { 813 0x00, 0x00, 0x00, 0x31, 814 0x00, 0x00, 0xff, 0xfd, 815 0x00, 0x00, 0x00, 0x32 816 }; 817 static const uint8_t expectedUTF32LE[] = { 818 0x31, 0x00, 0x00, 0x00, 819 0xfd, 0xff, 0x00, 0x00, 820 0x32, 0x00, 0x00, 0x00 821 }; 822 static const int32_t offsetsUTF32[] = { 823 0x00, 0x00, 0x00, 0x00, 824 0x01, 0x01, 0x01, 0x01, 825 0x02, 0x02, 0x02, 0x02 826 }; 827 828 if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]), 829 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 830 log_err("u->UTF-32BE\n"); 831 if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]), 832 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 833 log_err("u->UTF-32BE\n"); 834 835 if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]), 836 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 837 log_err("u->UTF-32LE\n"); 838 if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]), 839 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 840 log_err("u->UTF-32LE\n"); 841 } 842 843 { 844 static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 }; 845 static const UChar expected[] = { 0x0031, 0x0030 }; 846 UConverter *convBE; 847 UErrorCode err = U_ZERO_ERROR; 848 UChar pivotBuffer[100]; 849 const UChar* const pivEnd = pivotBuffer + 100; 850 const char* srcBeg; 851 const char* srcEnd; 852 UChar* pivBeg; 853 854 convBE = ucnv_open("UTF-32BE", &err); 855 856 srcBeg = srcBE; 857 pivBeg = pivotBuffer; 858 srcEnd = srcBE + 5; 859 ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 860 if (srcBeg != srcEnd) { 861 log_err("Did not consume whole buffer on first call.\n"); 862 } 863 864 srcEnd = srcBE + 8; 865 ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 866 if (srcBeg != srcEnd) { 867 log_err("Did not consume whole buffer on second call.\n"); 868 } 869 870 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 871 log_err("Did not get expected results for UTF-32BE.\n"); 872 } 873 ucnv_close(convBE); 874 } 875 { 876 static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 }; 877 static const UChar expected[] = { 0x0031, 0x0030 }; 878 UConverter *convLE; 879 UErrorCode err = U_ZERO_ERROR; 880 UChar pivotBuffer[100]; 881 const UChar* const pivEnd = pivotBuffer + 100; 882 const char* srcBeg; 883 const char* srcEnd; 884 UChar* pivBeg; 885 886 convLE = ucnv_open("UTF-32LE", &err); 887 888 srcBeg = srcLE; 889 pivBeg = pivotBuffer; 890 srcEnd = srcLE + 5; 891 ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 892 if (srcBeg != srcEnd) { 893 log_err("Did not consume whole buffer on first call.\n"); 894 } 895 896 srcEnd = srcLE + 8; 897 ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 898 if (srcBeg != srcEnd) { 899 log_err("Did not consume whole buffer on second call.\n"); 900 } 901 902 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 903 log_err("Did not get expected results for UTF-32LE.\n"); 904 } 905 ucnv_close(convLE); 906 } 907 } 908 909 /*Walk through the available converters*/ 910 static void TestAvailableConverters(){ 911 UErrorCode status=U_ZERO_ERROR; 912 UConverter *conv=NULL; 913 int32_t i=0; 914 for(i=0; i < ucnv_countAvailable(); i++){ 915 status=U_ZERO_ERROR; 916 conv=ucnv_open(ucnv_getAvailableName(i), &status); 917 if(U_FAILURE(status)){ 918 log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n", 919 ucnv_getAvailableName(i), myErrorName(status)); 920 continue; 921 } 922 ucnv_close(conv); 923 } 924 925 } 926 927 static void TestFlushInternalBuffer(){ 928 TestWithBufferSize(MAX_LENGTH, 1); 929 TestWithBufferSize(1, 1); 930 TestWithBufferSize(1, MAX_LENGTH); 931 TestWithBufferSize(MAX_LENGTH, MAX_LENGTH); 932 } 933 934 static void TestWithBufferSize(int32_t insize, int32_t outsize){ 935 936 gInBufferSize =insize; 937 gOutBufferSize = outsize; 938 939 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 940 { 941 UChar sampleText[] = 942 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E }; 943 const uint8_t expectedUTF8[] = 944 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 945 int32_t toUTF8Offs[] = 946 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07}; 947 /* int32_t fmUTF8Offs[] = 948 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/ 949 950 /*UTF-8*/ 951 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 952 expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE)) 953 log_err("u-> UTF8 did not match.\n"); 954 } 955 956 #if !UCONFIG_NO_LEGACY_CONVERSION 957 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); 958 { 959 UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 960 const uint8_t toIBM943[]= { 0x61, 961 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 962 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 963 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 964 0x61 }; 965 int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; 966 967 if(!testConvertFromU(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 968 toIBM943, sizeof(toIBM943), "ibm-943", 969 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE)) 970 log_err("u-> ibm-943 with subst with value did not match.\n"); 971 } 972 #endif 973 974 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 975 { 976 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 977 0xe0, 0x80, 0x61}; 978 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; 979 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; 980 981 if(!testConvertToU(sampleText1, sizeof(sampleText1), 982 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE)) 983 log_err("utf8->u with substitute did not match.\n");; 984 } 985 986 #if !UCONFIG_NO_LEGACY_CONVERSION 987 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); 988 /*to Unicode*/ 989 { 990 const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, 991 0x81, 0xad, /*unassigned*/ 992 0x89, 0xd3 }; 993 UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, 994 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, 995 0x7B87}; 996 int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; 997 998 if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU), 999 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943", 1000 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE)) 1001 log_err("ibm-943->u with substitute with value did not match.\n"); 1002 1003 } 1004 #endif 1005 } 1006 1007 static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 1008 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus) 1009 { 1010 1011 int32_t i=0; 1012 char *p=0; 1013 const UChar *src; 1014 char buffer[MAX_LENGTH]; 1015 int32_t offsetBuffer[MAX_LENGTH]; 1016 int32_t *offs=0; 1017 char *targ; 1018 char *targetLimit; 1019 UChar *sourceLimit=0; 1020 UErrorCode status = U_ZERO_ERROR; 1021 UConverter *conv = 0; 1022 conv = ucnv_open(codepage, &status); 1023 if(U_FAILURE(status)) 1024 { 1025 log_data_err("Couldn't open converter %s\n",codepage); 1026 return TRUE; 1027 } 1028 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); 1029 1030 for(i=0; i<MAX_LENGTH; i++){ 1031 buffer[i]=(char)0xF0; 1032 offsetBuffer[i]=0xFF; 1033 } 1034 1035 src=source; 1036 sourceLimit=(UChar*)src+(sourceLen); 1037 targ=buffer; 1038 targetLimit=targ+MAX_LENGTH; 1039 offs=offsetBuffer; 1040 ucnv_fromUnicode (conv, 1041 (char **)&targ, 1042 (const char *)targetLimit, 1043 &src, 1044 sourceLimit, 1045 expectOffsets ? offs : NULL, 1046 doFlush, 1047 &status); 1048 ucnv_close(conv); 1049 if(status != expectedStatus){ 1050 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus)); 1051 return FALSE; 1052 } 1053 1054 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1055 sourceLen, targ-buffer); 1056 1057 if(expectLen != targ-buffer) 1058 { 1059 log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage); 1060 log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage); 1061 printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer)); 1062 printSeqErr((const unsigned char*)expect, expectLen); 1063 return FALSE; 1064 } 1065 1066 if(memcmp(buffer, expect, expectLen)){ 1067 log_err("String does not match. FROM Unicode to codePage%s\n", codepage); 1068 log_info("\nGot:"); 1069 printSeqErr((const unsigned char *)buffer, expectLen); 1070 log_info("\nExpected:"); 1071 printSeqErr((const unsigned char *)expect, expectLen); 1072 return FALSE; 1073 } 1074 else { 1075 log_verbose("Matches!\n"); 1076 } 1077 1078 if (expectOffsets != 0){ 1079 log_verbose("comparing %d offsets..\n", targ-buffer); 1080 if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){ 1081 log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage); 1082 log_info("\nGot : "); 1083 printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer)); 1084 for(p=buffer;p<targ;p++) 1085 log_info("%d, ", offsetBuffer[p-buffer]); 1086 log_info("\nExpected: "); 1087 for(i=0; i< (targ-buffer); i++) 1088 log_info("%d,", expectOffsets[i]); 1089 } 1090 } 1091 1092 return TRUE; 1093 } 1094 1095 1096 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen, 1097 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus) 1098 { 1099 UErrorCode status = U_ZERO_ERROR; 1100 UConverter *conv = 0; 1101 int32_t i=0; 1102 UChar *p=0; 1103 const char* src; 1104 UChar buffer[MAX_LENGTH]; 1105 int32_t offsetBuffer[MAX_LENGTH]; 1106 int32_t *offs=0; 1107 UChar *targ; 1108 UChar *targetLimit; 1109 uint8_t *sourceLimit=0; 1110 1111 1112 1113 conv = ucnv_open(codepage, &status); 1114 if(U_FAILURE(status)) 1115 { 1116 log_data_err("Couldn't open converter %s\n",codepage); 1117 return TRUE; 1118 } 1119 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); 1120 1121 1122 1123 for(i=0; i<MAX_LENGTH; i++){ 1124 buffer[i]=0xFFFE; 1125 offsetBuffer[i]=-1; 1126 } 1127 1128 src=(const char *)source; 1129 sourceLimit=(uint8_t*)(src+(sourceLen)); 1130 targ=buffer; 1131 targetLimit=targ+MAX_LENGTH; 1132 offs=offsetBuffer; 1133 1134 1135 1136 ucnv_toUnicode (conv, 1137 &targ, 1138 targetLimit, 1139 (const char **)&src, 1140 (const char *)sourceLimit, 1141 expectOffsets ? offs : NULL, 1142 doFlush, 1143 &status); 1144 1145 ucnv_close(conv); 1146 if(status != expectedStatus){ 1147 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus)); 1148 return FALSE; 1149 } 1150 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1151 sourceLen, targ-buffer); 1152 1153 1154 1155 1156 log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2); 1157 1158 if (expectOffsets != 0) { 1159 if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){ 1160 1161 log_err("did not get the expected offsets from %s To UNICODE\n", codepage); 1162 log_info("\nGot : "); 1163 for(p=buffer;p<targ;p++) 1164 log_info("%d, ", offsetBuffer[p-buffer]); 1165 log_info("\nExpected: "); 1166 for(i=0; i<(targ-buffer); i++) 1167 log_info("%d, ", expectOffsets[i]); 1168 log_info("\nGot result:"); 1169 for(i=0; i<(targ-buffer); i++) 1170 log_info("0x%04X,", buffer[i]); 1171 log_info("\nFrom Input:"); 1172 for(i=0; i<(src-(const char *)source); i++) 1173 log_info("0x%02X,", (unsigned char)source[i]); 1174 log_info("\n"); 1175 } 1176 } 1177 if(memcmp(buffer, expect, expectLen*2)){ 1178 log_err("String does not match. from codePage %s TO Unicode\n", codepage); 1179 log_info("\nGot:"); 1180 printUSeqErr(buffer, expectLen); 1181 log_info("\nExpected:"); 1182 printUSeqErr(expect, expectLen); 1183 return FALSE; 1184 } 1185 else { 1186 log_verbose("Matches!\n"); 1187 } 1188 1189 return TRUE; 1190 } 1191 1192 1193 static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 1194 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset) 1195 { 1196 UErrorCode status = U_ZERO_ERROR; 1197 UConverter *conv = 0; 1198 char junkout[MAX_LENGTH]; /* FIX */ 1199 int32_t junokout[MAX_LENGTH]; /* FIX */ 1200 char *p; 1201 const UChar *src; 1202 char *end; 1203 char *targ; 1204 int32_t *offs; 1205 int i; 1206 int32_t realBufferSize; 1207 char *realBufferEnd; 1208 const UChar *realSourceEnd; 1209 const UChar *sourceLimit; 1210 UBool checkOffsets = TRUE; 1211 UBool doFlush; 1212 1213 UConverterFromUCallback oldAction = NULL; 1214 const void* oldContext = NULL; 1215 1216 for(i=0;i<MAX_LENGTH;i++) 1217 junkout[i] = (char)0xF0; 1218 for(i=0;i<MAX_LENGTH;i++) 1219 junokout[i] = 0xFF; 1220 1221 setNuConvTestName(codepage, "FROM"); 1222 1223 log_verbose("\n========= %s\n", gNuConvTestName); 1224 1225 conv = ucnv_open(codepage, &status); 1226 if(U_FAILURE(status)) 1227 { 1228 log_data_err("Couldn't open converter %s\n",codepage); 1229 return TRUE; 1230 } 1231 1232 log_verbose("Converter opened..\n"); 1233 /*----setting the callback routine----*/ 1234 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 1235 if (U_FAILURE(status)) { 1236 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 1237 } 1238 /*------------------------*/ 1239 1240 src = source; 1241 targ = junkout; 1242 offs = junokout; 1243 1244 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 1245 realBufferEnd = junkout + realBufferSize; 1246 realSourceEnd = source + sourceLen; 1247 1248 if ( gOutBufferSize != realBufferSize ) 1249 checkOffsets = FALSE; 1250 1251 if( gInBufferSize != MAX_LENGTH ) 1252 checkOffsets = FALSE; 1253 1254 do 1255 { 1256 end = nct_min(targ + gOutBufferSize, realBufferEnd); 1257 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 1258 1259 doFlush = (UBool)(sourceLimit == realSourceEnd); 1260 1261 if(targ == realBufferEnd) 1262 { 1263 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 1264 return FALSE; 1265 } 1266 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 1267 1268 1269 status = U_ZERO_ERROR; 1270 if(gInBufferSize ==999 && gOutBufferSize==999) 1271 doFlush = FALSE; 1272 ucnv_fromUnicode (conv, 1273 (char **)&targ, 1274 (const char *)end, 1275 &src, 1276 sourceLimit, 1277 offs, 1278 doFlush, /* flush if we're at the end of the input data */ 1279 &status); 1280 if(testReset) 1281 ucnv_resetToUnicode(conv); 1282 if(gInBufferSize ==999 && gOutBufferSize==999) 1283 ucnv_resetToUnicode(conv); 1284 1285 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 1286 1287 if(U_FAILURE(status)) { 1288 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 1289 return FALSE; 1290 } 1291 1292 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1293 sourceLen, targ-junkout); 1294 if(getTestOption(VERBOSITY_OPTION)) 1295 { 1296 char junk[999]; 1297 char offset_str[999]; 1298 char *ptr; 1299 1300 junk[0] = 0; 1301 offset_str[0] = 0; 1302 for(ptr = junkout;ptr<targ;ptr++) 1303 { 1304 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr); 1305 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]); 1306 } 1307 1308 log_verbose(junk); 1309 printSeq((const unsigned char *)expect, expectLen); 1310 if ( checkOffsets ) 1311 { 1312 log_verbose("\nOffsets:"); 1313 log_verbose(offset_str); 1314 } 1315 log_verbose("\n"); 1316 } 1317 ucnv_close(conv); 1318 1319 1320 if(expectLen != targ-junkout) 1321 { 1322 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 1323 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 1324 log_info("\nGot:"); 1325 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 1326 log_info("\nExpected:"); 1327 printSeqErr((const unsigned char*)expect, expectLen); 1328 return FALSE; 1329 } 1330 1331 if (checkOffsets && (expectOffsets != 0) ) 1332 { 1333 log_verbose("comparing %d offsets..\n", targ-junkout); 1334 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 1335 log_err("did not get the expected offsets. %s", gNuConvTestName); 1336 log_err("Got : "); 1337 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 1338 for(p=junkout;p<targ;p++) 1339 log_err("%d, ", junokout[p-junkout]); 1340 log_err("\nExpected: "); 1341 for(i=0; i<(targ-junkout); i++) 1342 log_err("%d,", expectOffsets[i]); 1343 } 1344 } 1345 1346 log_verbose("comparing..\n"); 1347 if(!memcmp(junkout, expect, expectLen)) 1348 { 1349 log_verbose("Matches!\n"); 1350 return TRUE; 1351 } 1352 else 1353 { 1354 log_err("String does not match. %s\n", gNuConvTestName); 1355 printUSeqErr(source, sourceLen); 1356 log_info("\nGot:"); 1357 printSeqErr((const unsigned char *)junkout, expectLen); 1358 log_info("\nExpected:"); 1359 printSeqErr((const unsigned char *)expect, expectLen); 1360 1361 return FALSE; 1362 } 1363 } 1364 1365 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 1366 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset) 1367 { 1368 UErrorCode status = U_ZERO_ERROR; 1369 UConverter *conv = 0; 1370 UChar junkout[MAX_LENGTH]; /* FIX */ 1371 int32_t junokout[MAX_LENGTH]; /* FIX */ 1372 const char *src; 1373 const char *realSourceEnd; 1374 const char *srcLimit; 1375 UChar *p; 1376 UChar *targ; 1377 UChar *end; 1378 int32_t *offs; 1379 int i; 1380 UBool checkOffsets = TRUE; 1381 int32_t realBufferSize; 1382 UChar *realBufferEnd; 1383 UBool doFlush; 1384 1385 UConverterToUCallback oldAction = NULL; 1386 const void* oldContext = NULL; 1387 1388 1389 for(i=0;i<MAX_LENGTH;i++) 1390 junkout[i] = 0xFFFE; 1391 1392 for(i=0;i<MAX_LENGTH;i++) 1393 junokout[i] = -1; 1394 1395 setNuConvTestName(codepage, "TO"); 1396 1397 log_verbose("\n========= %s\n", gNuConvTestName); 1398 1399 conv = ucnv_open(codepage, &status); 1400 if(U_FAILURE(status)) 1401 { 1402 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 1403 return TRUE; 1404 } 1405 1406 log_verbose("Converter opened..\n"); 1407 /*----setting the callback routine----*/ 1408 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 1409 if (U_FAILURE(status)) { 1410 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 1411 } 1412 /*-------------------------------------*/ 1413 1414 src = (const char *)source; 1415 targ = junkout; 1416 offs = junokout; 1417 1418 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 1419 realBufferEnd = junkout + realBufferSize; 1420 realSourceEnd = src + sourcelen; 1421 1422 if ( gOutBufferSize != realBufferSize ) 1423 checkOffsets = FALSE; 1424 1425 if( gInBufferSize != MAX_LENGTH ) 1426 checkOffsets = FALSE; 1427 1428 do 1429 { 1430 end = nct_min( targ + gOutBufferSize, realBufferEnd); 1431 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 1432 1433 if(targ == realBufferEnd) 1434 { 1435 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 1436 return FALSE; 1437 } 1438 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 1439 1440 /* oldTarg = targ; */ 1441 1442 status = U_ZERO_ERROR; 1443 doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE); 1444 1445 ucnv_toUnicode (conv, 1446 &targ, 1447 end, 1448 (const char **)&src, 1449 (const char *)srcLimit, 1450 offs, 1451 doFlush, /* flush if we're at the end of hte source data */ 1452 &status); 1453 if(testReset) 1454 ucnv_resetFromUnicode(conv); 1455 if(gInBufferSize ==999 && gOutBufferSize==999) 1456 ucnv_resetToUnicode(conv); 1457 /* offs += (targ-oldTarg); */ 1458 1459 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 1460 1461 if(U_FAILURE(status)) 1462 { 1463 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 1464 return FALSE; 1465 } 1466 1467 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 1468 sourcelen, targ-junkout); 1469 if(getTestOption(VERBOSITY_OPTION)) 1470 { 1471 char junk[999]; 1472 char offset_str[999]; 1473 1474 UChar *ptr; 1475 1476 junk[0] = 0; 1477 offset_str[0] = 0; 1478 1479 for(ptr = junkout;ptr<targ;ptr++) 1480 { 1481 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 1482 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 1483 } 1484 1485 log_verbose(junk); 1486 1487 if ( checkOffsets ) 1488 { 1489 log_verbose("\nOffsets:"); 1490 log_verbose(offset_str); 1491 } 1492 log_verbose("\n"); 1493 } 1494 ucnv_close(conv); 1495 1496 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 1497 1498 if (checkOffsets && (expectOffsets != 0)) 1499 { 1500 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 1501 1502 log_err("did not get the expected offsets. %s",gNuConvTestName); 1503 for(p=junkout;p<targ;p++) 1504 log_err("%d, ", junokout[p-junkout]); 1505 log_err("\nExpected: "); 1506 for(i=0; i<(targ-junkout); i++) 1507 log_err("%d,", expectOffsets[i]); 1508 log_err(""); 1509 for(i=0; i<(targ-junkout); i++) 1510 log_err("%X,", junkout[i]); 1511 log_err(""); 1512 for(i=0; i<(src-(const char *)source); i++) 1513 log_err("%X,", (unsigned char)source[i]); 1514 } 1515 } 1516 1517 if(!memcmp(junkout, expect, expectlen*2)) 1518 { 1519 log_verbose("Matches!\n"); 1520 return TRUE; 1521 } 1522 else 1523 { 1524 log_err("String does not match. %s\n", gNuConvTestName); 1525 log_verbose("String does not match. %s\n", gNuConvTestName); 1526 log_info("\nGot:"); 1527 printUSeq(junkout, expectlen); 1528 log_info("\nExpected:"); 1529 printUSeq(expect, expectlen); 1530 return FALSE; 1531 } 1532 } 1533 1534 1535 static void TestResetBehaviour(void){ 1536 #if !UCONFIG_NO_LEGACY_CONVERSION 1537 log_verbose("Testing Reset for DBCS and MBCS\n"); 1538 { 1539 static const UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; 1540 static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 1541 static const int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; 1542 1543 1544 static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8}; 1545 static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7}; 1546 static const int32_t offsets1[] = { 0,2,4,6}; 1547 1548 /*DBCS*/ 1549 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1550 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1551 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 1552 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1553 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1554 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 1555 1556 if(!testConvertToU(expected1, sizeof(expected1), 1557 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1558 offsets1, TRUE)) 1559 log_err("ibm-1363 -> did not match.\n"); 1560 /*MBCS*/ 1561 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1562 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1563 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 1564 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1565 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1566 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 1567 1568 if(!testConvertToU(expected1, sizeof(expected1), 1569 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1570 offsets1, TRUE)) 1571 log_err("ibm-1363 -> did not match.\n"); 1572 1573 } 1574 1575 log_verbose("Testing Reset for ISO-2022-jp\n"); 1576 { 1577 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1578 1579 static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 1580 0x31,0x1A, 0x32}; 1581 1582 1583 static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; 1584 1585 1586 static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; 1587 static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 1588 0x31,0x1A, 0x32}; 1589 static const int32_t offsets1[] = { 3,5,10,11,12}; 1590 1591 // iso-2022-jp android-change 1592 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1593 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1594 log_err("u-> not match.\n"); 1595 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1596 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1597 log_err("u-> not match.\n"); 1598 1599 if(!testConvertToU(expected1, sizeof(expected1), 1600 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1601 offsets1, TRUE)) 1602 log_err("iso-2022-jp -> did not match.\n"); 1603 1604 } 1605 1606 /* BEGIN android-removed */ 1607 /* To save space, Android does not build full ISO-2022-CN tables. 1608 We skip the tests for ISO-2022-CN. */ 1609 /* 1610 log_verbose("Testing Reset for ISO-2022-cn\n"); 1611 { 1612 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1613 1614 static const uint8_t expected[] = { 1615 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 1616 0x36, 0x21, 1617 0x0f, 0x31, 1618 0x1A, 1619 0x32 1620 }; 1621 1622 1623 static const int32_t offsets[] = { 1624 0, 0, 0, 0, 0, 0, 0, 1625 1, 1, 1626 2, 2, 1627 3, 1628 5, }; 1629 1630 UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; 1631 static const uint8_t expected1[] = { 1632 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 1633 0x36, 0x21, 1634 0x1B, 0x24, 0x29, 0x47, 0x24, 0x22, 1635 0x0f, 0x1A, 1636 0x32 1637 }; 1638 static const int32_t offsets1[] = { 5,7,13,16,17}; 1639 1640 // iso-2022-CN android-change 1641 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1642 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1643 log_err("u-> not match.\n"); 1644 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1645 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1646 log_err("u-> not match.\n"); 1647 1648 if(!testConvertToU(expected1, sizeof(expected1), 1649 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1650 offsets1, TRUE)) 1651 log_err("iso-2022-cn -> did not match.\n"); 1652 } 1653 */ 1654 /* END android-removed */ 1655 1656 log_verbose("Testing Reset for ISO-2022-kr\n"); 1657 { 1658 UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1659 1660 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43, 1661 0x0E, 0x6C, 0x69, 1662 0x0f, 0x1A, 1663 0x0e, 0x6F, 0x4B, 1664 0x0F, 0x31, 1665 0x1A, 1666 0x32 }; 1667 1668 static const int32_t offsets[] = {-1, -1, -1, -1, 1669 0, 0, 0, 1670 1, 1, 1671 3, 3, 3, 1672 4, 4, 1673 5, 1674 7, 1675 }; 1676 static const UChar sampleText1[] = { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032}; 1677 1678 static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43, 1679 0x0E, 0x6C, 0x69, 1680 0x0f, 0x41, 1681 0x0e, 0x6F, 0x4B, 1682 0x0F, 0x31, 1683 0x42, 1684 0x32 }; 1685 1686 static const int32_t offsets1[] = { 1687 5, 8, 10, 1688 13, 14, 15 1689 1690 }; 1691 // iso-2022-kr android-change 1692 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1693 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1694 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 1695 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1696 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1697 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 1698 if(!testConvertToU(expected1, sizeof(expected1), 1699 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1700 offsets1, TRUE)) 1701 log_err("iso-2022-kr -> did not match.\n"); 1702 } 1703 1704 1705 log_verbose("Testing Reset for HZ\n"); 1706 { 1707 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1708 1709 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B, 1710 0x7E, 0x7D, 0x1A, 1711 0x7E, 0x7B, 0x36, 0x21, 1712 0x7E, 0x7D, 0x31, 1713 0x1A, 1714 0x32 }; 1715 1716 1717 static const int32_t offsets[] = {0,0,0,0, 1718 1,1,1, 1719 3,3,3,3, 1720 4,4,4, 1721 5, 1722 7,}; 1723 static const UChar sampleText1[] = { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032}; 1724 1725 static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B, 1726 0x7E, 0x7D, 0x35, 1727 0x7E, 0x7B, 0x36, 0x21, 1728 0x7E, 0x7D, 0x31, 1729 0x41, 1730 0x32 }; 1731 1732 1733 static const int32_t offsets1[] = {2,6,9,13,14,15 1734 }; 1735 1736 /*hz*/ 1737 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1738 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1739 log_err("u-> not match.\n"); 1740 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1741 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1742 log_err("u-> not match.\n"); 1743 if(!testConvertToU(expected1, sizeof(expected1), 1744 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1745 offsets1, TRUE)) 1746 log_err("hz -> did not match.\n"); 1747 } 1748 #endif 1749 1750 /*UTF-8*/ 1751 log_verbose("Testing for UTF8\n"); 1752 { 1753 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032}; 1754 int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 1755 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 1756 0x04, 0x06 }; 1757 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31, 1758 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32}; 1759 1760 1761 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; 1762 /*UTF-8*/ 1763 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1764 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1765 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1766 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1767 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1768 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1769 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1770 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1771 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1772 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1773 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1774 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1775 if(!testConvertToU(expected, sizeof(expected), 1776 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1777 log_err("UTF8 -> did not match.\n"); 1778 if(!testConvertToU(expected, sizeof(expected), 1779 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1780 log_err("UTF8 -> did not match.\n"); 1781 if(!testConvertToU(expected, sizeof(expected), 1782 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) 1783 log_err("UTF8 -> did not match.\n"); 1784 if(!testConvertToU(expected, sizeof(expected), 1785 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) 1786 log_err("UTF8 -> did not match.\n"); 1787 1788 } 1789 1790 } 1791 1792 /* Test that U_TRUNCATED_CHAR_FOUND is set. */ 1793 static void 1794 doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) { 1795 UConverter *cnv; 1796 1797 UChar buffer[2]; 1798 UChar *target, *targetLimit; 1799 const char *source, *sourceLimit; 1800 1801 UErrorCode errorCode; 1802 1803 errorCode=U_ZERO_ERROR; 1804 cnv=ucnv_open(cnvName, &errorCode); 1805 if(U_FAILURE(errorCode)) { 1806 log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode)); 1807 return; 1808 } 1809 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 1810 if(U_FAILURE(errorCode)) { 1811 log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n", 1812 cnvName, u_errorName(errorCode)); 1813 ucnv_close(cnv); 1814 return; 1815 } 1816 1817 source=(const char *)bytes; 1818 sourceLimit=source+length; 1819 target=buffer; 1820 targetLimit=buffer+LENGTHOF(buffer); 1821 1822 /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */ 1823 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode); 1824 if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) { 1825 log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n", 1826 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); 1827 } 1828 1829 errorCode=U_ZERO_ERROR; 1830 source=sourceLimit; 1831 target=buffer; 1832 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); 1833 if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) { 1834 log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n", 1835 cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer)); 1836 } 1837 1838 /* 2. input bytes with flush=TRUE */ 1839 ucnv_resetToUnicode(cnv); 1840 1841 errorCode=U_ZERO_ERROR; 1842 source=(const char *)bytes; 1843 target=buffer; 1844 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); 1845 if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) { 1846 log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n", 1847 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); 1848 } 1849 1850 1851 ucnv_close(cnv); 1852 } 1853 1854 static void 1855 TestTruncated() { 1856 static const struct { 1857 const char *cnvName; 1858 uint8_t bytes[8]; /* partial input bytes resulting in no output */ 1859 int32_t length; 1860 } testCases[]={ 1861 { "IMAP-mailbox-name", { 0x26 }, 1 }, /* & */ 1862 { "IMAP-mailbox-name", { 0x26, 0x42 }, 2 }, /* &B */ 1863 { "IMAP-mailbox-name", { 0x26, 0x42, 0x42 }, 3 }, /* &BB */ 1864 { "IMAP-mailbox-name", { 0x26, 0x41, 0x41 }, 3 }, /* &AA */ 1865 1866 { "UTF-7", { 0x2b, 0x42 }, 2 }, /* +B */ 1867 { "UTF-8", { 0xd1 }, 1 }, 1868 1869 { "UTF-16BE", { 0x4e }, 1 }, 1870 { "UTF-16LE", { 0x4e }, 1 }, 1871 { "UTF-16", { 0x4e }, 1 }, 1872 { "UTF-16", { 0xff }, 1 }, 1873 { "UTF-16", { 0xfe, 0xff, 0x4e }, 3 }, 1874 1875 { "UTF-32BE", { 0, 0, 0x4e }, 3 }, 1876 { "UTF-32LE", { 0x4e }, 1 }, 1877 { "UTF-32", { 0, 0, 0x4e }, 3 }, 1878 { "UTF-32", { 0xff }, 1 }, 1879 { "UTF-32", { 0, 0, 0xfe, 0xff, 0 }, 5 }, 1880 { "SCSU", { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */ 1881 1882 #if !UCONFIG_NO_LEGACY_CONVERSION 1883 { "BOCU-1", { 0xd5 }, 1 }, 1884 1885 { "Shift-JIS", { 0xe0 }, 1 }, 1886 1887 { "ibm-939", { 0x0e, 0x41 }, 2 } /* SO 0x41 */ 1888 #else 1889 { "BOCU-1", { 0xd5 }, 1 ,} 1890 #endif 1891 }; 1892 int32_t i; 1893 1894 for(i=0; i<LENGTHOF(testCases); ++i) { 1895 doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length); 1896 } 1897 } 1898 1899 typedef struct NameRange { 1900 const char *name; 1901 UChar32 start, end, start2, end2, notStart, notEnd; 1902 } NameRange; 1903 1904 static void 1905 TestUnicodeSet() { 1906 UErrorCode errorCode; 1907 UConverter *cnv; 1908 USet *set; 1909 const char *name; 1910 int32_t i, count; 1911 1912 static const char *const completeSetNames[]={ 1913 "UTF-7", 1914 "UTF-8", 1915 "UTF-16", 1916 "UTF-16BE", 1917 "UTF-16LE", 1918 "UTF-32", 1919 "UTF-32BE", 1920 "UTF-32LE", 1921 "SCSU", 1922 "BOCU-1", 1923 "CESU-8", 1924 #if !UCONFIG_NO_LEGACY_CONVERSION 1925 "gb18030", 1926 #endif 1927 "IMAP-mailbox-name" 1928 }; 1929 #if !UCONFIG_NO_LEGACY_CONVERSION 1930 static const char *const lmbcsNames[]={ 1931 "LMBCS-1", 1932 "LMBCS-2", 1933 "LMBCS-3", 1934 "LMBCS-4", 1935 "LMBCS-5", 1936 "LMBCS-6", 1937 "LMBCS-8", 1938 "LMBCS-11", 1939 "LMBCS-16", 1940 "LMBCS-17", 1941 "LMBCS-18", 1942 "LMBCS-19" 1943 }; 1944 #endif 1945 1946 static const NameRange nameRanges[]={ 1947 { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff }, 1948 #if !UCONFIG_NO_LEGACY_CONVERSION 1949 { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff }, 1950 #endif 1951 { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff }, 1952 #if !UCONFIG_NO_LEGACY_CONVERSION 1953 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }, 1954 { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff }, 1955 /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */ 1956 { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff } 1957 #else 1958 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff } 1959 #endif 1960 }; 1961 1962 /* open an empty set */ 1963 set=uset_open(1, 0); 1964 1965 count=ucnv_countAvailable(); 1966 for(i=0; i<count; ++i) { 1967 errorCode=U_ZERO_ERROR; 1968 name=ucnv_getAvailableName(i); 1969 cnv=ucnv_open(name, &errorCode); 1970 if(U_FAILURE(errorCode)) { 1971 log_data_err("error: unable to open converter %s - %s\n", 1972 name, u_errorName(errorCode)); 1973 continue; 1974 } 1975 1976 uset_clear(set); 1977 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 1978 if(U_FAILURE(errorCode)) { 1979 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 1980 name, u_errorName(errorCode)); 1981 } else if(uset_size(set)==0) { 1982 log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name); 1983 } 1984 1985 ucnv_close(cnv); 1986 } 1987 1988 /* test converters that are known to convert all of Unicode (except maybe for surrogates) */ 1989 for(i=0; i<LENGTHOF(completeSetNames); ++i) { 1990 errorCode=U_ZERO_ERROR; 1991 name=completeSetNames[i]; 1992 cnv=ucnv_open(name, &errorCode); 1993 if(U_FAILURE(errorCode)) { 1994 log_data_err("error: unable to open converter %s - %s\n", 1995 name, u_errorName(errorCode)); 1996 continue; 1997 } 1998 1999 uset_clear(set); 2000 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2001 if(U_FAILURE(errorCode)) { 2002 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2003 name, u_errorName(errorCode)); 2004 } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) { 2005 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name); 2006 } 2007 2008 ucnv_close(cnv); 2009 } 2010 2011 #if !UCONFIG_NO_LEGACY_CONVERSION 2012 /* test LMBCS variants which convert all of Unicode except for U+F6xx */ 2013 for(i=0; i<LENGTHOF(lmbcsNames); ++i) { 2014 errorCode=U_ZERO_ERROR; 2015 name=lmbcsNames[i]; 2016 cnv=ucnv_open(name, &errorCode); 2017 if(U_FAILURE(errorCode)) { 2018 log_data_err("error: unable to open converter %s - %s\n", 2019 name, u_errorName(errorCode)); 2020 continue; 2021 } 2022 2023 uset_clear(set); 2024 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2025 if(U_FAILURE(errorCode)) { 2026 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2027 name, u_errorName(errorCode)); 2028 } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) { 2029 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name); 2030 } 2031 2032 ucnv_close(cnv); 2033 } 2034 #endif 2035 2036 /* test specific sets */ 2037 for(i=0; i<LENGTHOF(nameRanges); ++i) { 2038 errorCode=U_ZERO_ERROR; 2039 name=nameRanges[i].name; 2040 cnv=ucnv_open(name, &errorCode); 2041 if(U_FAILURE(errorCode)) { 2042 log_data_err("error: unable to open converter %s - %s\n", 2043 name, u_errorName(errorCode)); 2044 continue; 2045 } 2046 2047 uset_clear(set); 2048 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2049 if(U_FAILURE(errorCode)) { 2050 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2051 name, u_errorName(errorCode)); 2052 } else if( 2053 !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) || 2054 (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2)) 2055 ) { 2056 log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name); 2057 } else if(nameRanges[i].notStart>=0) { 2058 /* simulate containsAny() with the C API */ 2059 uset_complement(set); 2060 if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) { 2061 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name); 2062 } 2063 } 2064 2065 ucnv_close(cnv); 2066 } 2067 2068 errorCode = U_ZERO_ERROR; 2069 ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode); 2070 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 2071 log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode)); 2072 } 2073 errorCode = U_PARSE_ERROR; 2074 /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */ 2075 ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode); 2076 if (errorCode != U_PARSE_ERROR) { 2077 log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode)); 2078 } 2079 2080 uset_close(set); 2081 } 2082