1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2009, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /***************************************************************************** 7 * 8 * File CCONVTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Madhu Katragadda 7/7/2000 Converter Tests for extended code coverage 13 ****************************************************************************** 14 */ 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include "unicode/uloc.h" 19 #include "unicode/ucnv.h" 20 #include "unicode/utypes.h" 21 #include "unicode/ustring.h" 22 #include "unicode/uset.h" 23 #include "cintltst.h" 24 25 #define MAX_LENGTH 999 26 27 #define UNICODE_LIMIT 0x10FFFF 28 #define SURROGATE_HIGH_START 0xD800 29 #define SURROGATE_LOW_END 0xDFFF 30 31 static int32_t gInBufferSize = 0; 32 static int32_t gOutBufferSize = 0; 33 static char gNuConvTestName[1024]; 34 35 #define nct_min(x,y) ((x<y) ? x : y) 36 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 37 38 static void printSeq(const unsigned char* a, int len); 39 static void printSeqErr(const unsigned char* a, int len); 40 static void printUSeq(const UChar* a, int len); 41 static void printUSeqErr(const UChar* a, int len); 42 static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 43 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus); 44 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen, 45 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus); 46 47 static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 48 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset); 49 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 50 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset); 51 52 static void setNuConvTestName(const char *codepage, const char *direction) 53 { 54 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 55 codepage, 56 direction, 57 (int)gInBufferSize, 58 (int)gOutBufferSize); 59 } 60 61 62 static void TestSurrogateBehaviour(void); 63 static void TestErrorBehaviour(void); 64 65 #if !UCONFIG_NO_LEGACY_CONVERSION 66 static void TestToUnicodeErrorBehaviour(void); 67 static void TestGetNextErrorBehaviour(void); 68 #endif 69 70 static void TestRegressionUTF8(void); 71 static void TestRegressionUTF32(void); 72 static void TestAvailableConverters(void); 73 static void TestFlushInternalBuffer(void); /*for improved code coverage in ucnv_cnv.c*/ 74 static void TestResetBehaviour(void); 75 static void TestTruncated(void); 76 static void TestUnicodeSet(void); 77 78 static void TestWithBufferSize(int32_t osize, int32_t isize); 79 80 81 static void printSeq(const unsigned char* a, int len) 82 { 83 int i=0; 84 log_verbose("\n{"); 85 while (i<len) 86 log_verbose("0x%02X ", a[i++]); 87 log_verbose("}\n"); 88 } 89 90 static void printUSeq(const UChar* a, int len) 91 { 92 int i=0; 93 log_verbose("\n{"); 94 while (i<len) 95 log_verbose("%0x04X ", a[i++]); 96 log_verbose("}\n"); 97 } 98 99 static void printSeqErr(const unsigned char* a, int len) 100 { 101 int i=0; 102 fprintf(stderr, "\n{"); 103 while (i<len) fprintf(stderr, "0x%02X ", a[i++]); 104 fprintf(stderr, "}\n"); 105 } 106 107 static void printUSeqErr(const UChar* a, int len) 108 { 109 int i=0; 110 fprintf(stderr, "\n{"); 111 while (i<len) 112 fprintf(stderr, "0x%04X ", a[i++]); 113 fprintf(stderr,"}\n"); 114 } 115 116 void addExtraTests(TestNode** root); 117 118 void addExtraTests(TestNode** root) 119 { 120 addTest(root, &TestSurrogateBehaviour, "tsconv/ncnvtst/TestSurrogateBehaviour"); 121 addTest(root, &TestErrorBehaviour, "tsconv/ncnvtst/TestErrorBehaviour"); 122 123 #if !UCONFIG_NO_LEGACY_CONVERSION 124 addTest(root, &TestToUnicodeErrorBehaviour, "tsconv/ncnvtst/ToUnicodeErrorBehaviour"); 125 addTest(root, &TestGetNextErrorBehaviour, "tsconv/ncnvtst/TestGetNextErrorBehaviour"); 126 #endif 127 128 addTest(root, &TestAvailableConverters, "tsconv/ncnvtst/TestAvailableConverters"); 129 addTest(root, &TestFlushInternalBuffer, "tsconv/ncnvtst/TestFlushInternalBuffer"); 130 addTest(root, &TestResetBehaviour, "tsconv/ncnvtst/TestResetBehaviour"); 131 addTest(root, &TestRegressionUTF8, "tsconv/ncnvtst/TestRegressionUTF8"); 132 addTest(root, &TestRegressionUTF32, "tsconv/ncnvtst/TestRegressionUTF32"); 133 addTest(root, &TestTruncated, "tsconv/ncnvtst/TestTruncated"); 134 addTest(root, &TestUnicodeSet, "tsconv/ncnvtst/TestUnicodeSet"); 135 } 136 137 /*test surrogate behaviour*/ 138 static void TestSurrogateBehaviour(){ 139 log_verbose("Testing for SBCS and LATIN_1\n"); 140 { 141 UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032}; 142 const uint8_t expected[] = {0x31, 0x1a, 0x32}; 143 144 #if !UCONFIG_NO_LEGACY_CONVERSION 145 /*SBCS*/ 146 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 147 expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR)) 148 log_err("u-> ibm-920 [UCNV_SBCS] not match.\n"); 149 #endif 150 151 /*LATIN_1*/ 152 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 153 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR )) 154 log_err("u-> LATIN_1 not match.\n"); 155 156 } 157 158 #if !UCONFIG_NO_LEGACY_CONVERSION 159 log_verbose("Testing for DBCS and MBCS\n"); 160 { 161 UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; 162 const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 163 int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; 164 165 /*DBCS*/ 166 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 167 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) 168 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 169 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 170 expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR)) 171 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 172 /*MBCS*/ 173 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 174 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) 175 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 176 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 177 expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR)) 178 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 179 } 180 181 182 /* BEGIN android-removed */ 183 /* To save space, Android does not build full ISO2022 CJK tables. 184 We skip the tests for ISO-2022. */ 185 /* 186 log_verbose("Testing for ISO-2022-jp\n"); 187 { 188 UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 189 190 const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 191 0x31,0x1A, 0x32}; 192 193 194 int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; 195 196 // iso-2022-jp 197 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 198 expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR)) 199 log_err("u-> not match.\n"); 200 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 201 expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR)) 202 log_err("u-> not match.\n"); 203 } 204 205 log_verbose("Testing for ISO-2022-cn\n"); 206 { 207 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 208 209 static const uint8_t expected[] = { 210 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 211 0x36, 0x21, 212 0x0F, 0x31, 213 0x1A, 214 0x32 215 }; 216 217 218 219 static const int32_t offsets[] = { 220 0, 0, 0, 0, 0, 0, 0, 221 1, 1, 222 2, 2, 223 3, 224 5, }; 225 226 // iso-2022-CN 227 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 228 expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR)) 229 log_err("u-> not match.\n"); 230 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 231 expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR)) 232 log_err("u-> not match.\n"); 233 } 234 235 log_verbose("Testing for ISO-2022-kr\n"); 236 { 237 static const UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 238 239 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43, 240 0x0E, 0x6C, 0x69, 241 0x0f, 0x1A, 242 0x0e, 0x6F, 0x4B, 243 0x0F, 0x31, 244 0x1A, 245 0x32 }; 246 247 static const int32_t offsets[] = {-1, -1, -1, -1, 248 0, 0, 0, 249 1, 1, 250 3, 3, 3, 251 4, 4, 252 5, 253 7, 254 }; 255 256 // iso-2022-kr 257 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 258 expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR)) 259 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 260 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 261 expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR)) 262 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 263 } 264 */ 265 /* END android-removed */ 266 log_verbose("Testing for HZ\n"); 267 { 268 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 269 270 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B, 271 0x7E, 0x7D, 0x1A, 272 0x7E, 0x7B, 0x36, 0x21, 273 0x7E, 0x7D, 0x31, 274 0x1A, 275 0x32 }; 276 277 278 static const int32_t offsets[] = {0,0,0,0, 279 1,1,1, 280 3,3,3,3, 281 4,4,4, 282 5, 283 7,}; 284 285 /*hz*/ 286 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 287 expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR)) 288 log_err("u-> HZ not match.\n"); 289 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 290 expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR)) 291 log_err("u-> HZ not match.\n"); 292 } 293 #endif 294 295 /*UTF-8*/ 296 log_verbose("Testing for UTF8\n"); 297 { 298 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032}; 299 static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 300 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 301 0x04, 0x06 }; 302 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31, 303 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32}; 304 305 306 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; 307 /*UTF-8*/ 308 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 309 expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR )) 310 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 311 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 312 expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR )) 313 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 314 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 315 expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR )) 316 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 317 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 318 expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR )) 319 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 320 321 if(!convertToU(expected, sizeof(expected), 322 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, TRUE, U_ZERO_ERROR )) 323 log_err("UTF8 -> u did not match.\n"); 324 if(!convertToU(expected, sizeof(expected), 325 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", 0, FALSE, U_ZERO_ERROR )) 326 log_err("UTF8 -> u did not match.\n"); 327 if(!convertToU(expected, sizeof(expected), 328 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR )) 329 log_err("UTF8 ->u did not match.\n"); 330 if(!convertToU(expected, sizeof(expected), 331 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR )) 332 log_err("UTF8 -> u did not match.\n"); 333 334 } 335 } 336 337 /*test various error behaviours*/ 338 static void TestErrorBehaviour(){ 339 log_verbose("Testing for SBCS and LATIN_1\n"); 340 { 341 static const UChar sampleText[] = { 0x0031, 0xd801}; 342 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 343 static const uint8_t expected0[] = { 0x31}; 344 static const uint8_t expected[] = { 0x31, 0x1a}; 345 static const uint8_t expected2[] = { 0x31, 0x1a, 0x32}; 346 347 #if !UCONFIG_NO_LEGACY_CONVERSION 348 /*SBCS*/ 349 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 350 expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR)) 351 log_err("u-> ibm-920 [UCNV_SBCS] \n"); 352 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 353 expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR)) 354 log_err("u-> ibm-920 [UCNV_SBCS] \n"); 355 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 356 expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR)) 357 log_err("u-> ibm-920 [UCNV_SBCS] did not match\n"); 358 #endif 359 360 /*LATIN_1*/ 361 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 362 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) 363 log_err("u-> LATIN_1 is supposed to fail\n"); 364 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 365 expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR)) 366 log_err("u-> LATIN_1 is supposed to fail\n"); 367 368 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 369 expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) 370 log_err("u-> LATIN_1 did not match\n"); 371 } 372 373 #if !UCONFIG_NO_LEGACY_CONVERSION 374 log_verbose("Testing for DBCS and MBCS\n"); 375 { 376 static const UChar sampleText[] = { 0x00a1, 0xd801}; 377 static const uint8_t expected[] = { 0xa2, 0xae}; 378 static const int32_t offsets[] = { 0x00, 0x00}; 379 static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0}; 380 static const int32_t offsetsSUB[] = { 0x00, 0x00, 0x01, 0x01}; 381 382 static const UChar sampleText2[] = { 0x00a1, 0xd801, 0x00a4}; 383 static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 384 static const int32_t offsets2[] = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02}; 385 386 static const UChar sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01}; 387 static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0}; 388 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x01, 0x02, 0x02}; 389 390 /* BEGIN android-changed */ 391 /* Android uses a different EUC-JP table. We change one character, 392 * choosing a mapping that is common to both tables. */ 393 static const UChar sampleText4MBCS[] = { 0x0061, 0x9ED1, 0xdc01}; 394 static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xf4, 0xf8, 0xf4, 0xfe}; 395 /* static const UChar sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01}; */ 396 /* static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe}; */ 397 /* END android-changed */ 398 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 }; 399 400 /*DBCS*/ 401 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 402 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 403 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 404 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 405 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) 406 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 407 408 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 409 expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR)) 410 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 411 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 412 expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_ZERO_ERROR)) 413 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 414 415 416 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 417 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 418 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); 419 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 420 expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR)) 421 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); 422 423 /*MBCS*/ 424 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 425 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 426 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 427 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 428 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) 429 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 430 431 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 432 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 433 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 434 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 435 expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) 436 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 437 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 438 expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR)) 439 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 440 441 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 442 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR)) 443 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 444 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 445 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR)) 446 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 447 448 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 449 expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, TRUE, U_ZERO_ERROR)) 450 log_err("u-> euc-jp [UCNV_MBCS] \n"); 451 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 452 expected4MBCS, sizeof(expected4MBCS), "euc-jp", offsets4MBCS, FALSE, U_ZERO_ERROR)) 453 log_err("u-> euc-jp [UCNV_MBCS] \n"); 454 } 455 /* BEGIN android-removed */ 456 /* To save space, Android does not build full ISO2022 CJK tables. 457 We skip the tests for ISO-2022. */ 458 /* 459 // iso-2022-jp 460 log_verbose("Testing for iso-2022-jp\n"); 461 { 462 static const UChar sampleText[] = { 0x0031, 0xd801}; 463 static const uint8_t expected[] = { 0x31}; 464 static const uint8_t expectedSUB[] = { 0x31, 0x1a}; 465 static const int32_t offsets[] = { 0x00, 1}; 466 467 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 468 static const uint8_t expected2[] = { 0x31,0x1A,0x32}; 469 static const int32_t offsets2[] = { 0x00,0x01,0x02}; 470 471 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 472 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a}; 473 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 }; 474 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 475 expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR)) 476 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 477 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 478 expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_ZERO_ERROR)) 479 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 480 481 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 482 expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR)) 483 log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n"); 484 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 485 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) 486 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); 487 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 488 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) 489 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); 490 491 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 492 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR)) 493 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 494 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 495 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR)) 496 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 497 } 498 499 // iso-2022-cn 500 log_verbose("Testing for iso-2022-cn\n"); 501 { 502 static const UChar sampleText[] = { 0x0031, 0xd801}; 503 static const uint8_t expected[] = { 0x31}; 504 static const uint8_t expectedSUB[] = { 0x31, 0x1A}; 505 static const int32_t offsets[] = { 0x00, 1}; 506 507 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 508 static const uint8_t expected2[] = { 0x31, 0x1A,0x32}; 509 static const int32_t offsets2[] = { 0x00, 0x01,0x02}; 510 511 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 512 static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A}; 513 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x02 }; 514 515 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 516 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a }; 517 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 }; 518 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 519 expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR)) 520 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 521 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 522 expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR)) 523 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 524 525 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 526 expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR)) 527 log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n"); 528 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 529 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) 530 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); 531 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 532 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) 533 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); 534 535 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 536 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR)) 537 log_err("u->iso-2022-cn [UCNV_MBCS] \n"); 538 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 539 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR)) 540 log_err("u-> iso-2022-cn[UCNV_MBCS] \n"); 541 542 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 543 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR)) 544 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 545 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 546 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR)) 547 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 548 } 549 550 // iso-2022-kr 551 log_verbose("Testing for iso-2022-kr\n"); 552 { 553 static const UChar sampleText[] = { 0x0031, 0xd801}; 554 static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31}; 555 static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A}; 556 static const int32_t offsets[] = { -1, -1, -1, -1, 0x00, 1}; 557 558 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 559 static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32}; 560 static const int32_t offsets2[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02}; 561 562 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 563 static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43, 0x51, 0x50, 0x1A }; 564 static const int32_t offsets3MBCS[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02, 0x02 }; 565 566 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 567 expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR)) 568 log_err("u-> iso-2022-kr [UCNV_MBCS] \n"); 569 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 570 expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR)) 571 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 572 573 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 574 expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR)) 575 log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n"); 576 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 577 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) 578 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); 579 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 580 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) 581 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); 582 583 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 584 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR)) 585 log_err("u->iso-2022-kr [UCNV_MBCS] \n"); 586 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 587 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR)) 588 log_err("u-> iso-2022-kr[UCNV_MBCS] \n"); 589 } 590 */ 591 /* END android-removed */ 592 593 /*HZ*/ 594 log_verbose("Testing for HZ\n"); 595 { 596 static const UChar sampleText[] = { 0x0031, 0xd801}; 597 static const uint8_t expected[] = { 0x7e, 0x7d, 0x31}; 598 static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A}; 599 static const int32_t offsets[] = { 0x00, 0x00, 0x00, 1}; 600 601 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 602 static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31, 0x1A, 0x32 }; 603 static const int32_t offsets2[] = { 0x00, 0x00, 0x00, 0x01, 0x02 }; 604 605 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 606 static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50, 0x1A }; 607 static const int32_t offsets3MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x02}; 608 609 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 610 static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a }; 611 static const int32_t offsets4MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 }; 612 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 613 expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR)) 614 log_err("u-> HZ [UCNV_MBCS] \n"); 615 if(!convertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 616 expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR)) 617 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 618 619 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 620 expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR)) 621 log_err("u->HZ[UCNV_DBCS] did not match\n"); 622 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 623 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) 624 log_err("u-> HZ [UCNV_DBCS] did not match\n"); 625 if(!convertFromU(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]), 626 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) 627 log_err("u-> HZ [UCNV_DBCS] did not match\n"); 628 629 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 630 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR)) 631 log_err("u->HZ [UCNV_MBCS] \n"); 632 if(!convertFromU(sampleText3MBCS, sizeof(sampleText3MBCS)/sizeof(sampleText3MBCS[0]), 633 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR)) 634 log_err("u-> HZ[UCNV_MBCS] \n"); 635 636 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 637 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR)) 638 log_err("u-> HZ [UCNV_MBCS] \n"); 639 if(!convertFromU(sampleText4MBCS, sizeof(sampleText4MBCS)/sizeof(sampleText4MBCS[0]), 640 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR)) 641 log_err("u-> HZ [UCNV_MBCS] \n"); 642 } 643 #endif 644 } 645 646 #if !UCONFIG_NO_LEGACY_CONVERSION 647 /*test different convertToUnicode error behaviours*/ 648 static void TestToUnicodeErrorBehaviour() 649 { 650 log_verbose("Testing error conditions for DBCS\n"); 651 { 652 uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04}; 653 const UChar expected[] = { 0x00a1 }; 654 655 if(!convertToU(sampleText, sizeof(sampleText), 656 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, TRUE, U_ZERO_ERROR )) 657 log_err("DBCS (ibm-1363)->Unicode did not match.\n"); 658 if(!convertToU(sampleText, sizeof(sampleText), 659 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1363", 0, FALSE, U_ZERO_ERROR )) 660 log_err("DBCS (ibm-1363)->Unicode with flush = false did not match.\n"); 661 } 662 log_verbose("Testing error conditions for SBCS\n"); 663 { 664 uint8_t sampleText[] = { 0xa2, 0xFF}; 665 const UChar expected[] = { 0x00c2 }; 666 667 /* uint8_t sampleText2[] = { 0xa2, 0x70 }; 668 const UChar expected2[] = { 0x0073 };*/ 669 670 if(!convertToU(sampleText, sizeof(sampleText), 671 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, TRUE, U_ZERO_ERROR )) 672 log_err("SBCS (ibm-1051)->Unicode did not match.\n"); 673 if(!convertToU(sampleText, sizeof(sampleText), 674 expected, sizeof(expected)/sizeof(expected[0]), "ibm-1051", 0, FALSE, U_ZERO_ERROR )) 675 log_err("SBCS (ibm-1051)->Unicode with flush = false did not match.\n"); 676 677 } 678 } 679 680 static void TestGetNextErrorBehaviour(){ 681 /*Test for unassigned character*/ 682 #define INPUT_SIZE 1 683 static const char input1[INPUT_SIZE]={ 0x70 }; 684 const char* source=(const char*)input1; 685 UErrorCode err=U_ZERO_ERROR; 686 UChar32 c=0; 687 UConverter *cnv=ucnv_open("ibm-424", &err); 688 if(U_FAILURE(err)) { 689 log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err)); 690 return; 691 } 692 c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err); 693 if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){ 694 log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n", myErrorName(err), c); 695 } 696 ucnv_close(cnv); 697 } 698 #endif 699 700 #define MAX_UTF16_LEN 2 701 #define MAX_UTF8_LEN 4 702 703 /*Regression test for utf8 converter*/ 704 static void TestRegressionUTF8(){ 705 UChar32 currCh = 0; 706 int32_t offset8; 707 int32_t offset16; 708 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar)); 709 uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH); 710 711 while (currCh <= UNICODE_LIMIT) { 712 offset16 = 0; 713 offset8 = 0; 714 while(currCh <= UNICODE_LIMIT 715 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN) 716 && offset8 < (MAX_LENGTH - MAX_UTF8_LEN)) 717 { 718 if (currCh == SURROGATE_HIGH_START) { 719 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ 720 } 721 UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh); 722 UTF8_APPEND_CHAR_SAFE(utf8, offset8, MAX_LENGTH, currCh); 723 currCh++; 724 } 725 if(!convertFromU(standardForm, offset16, 726 utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) { 727 log_err("Unicode->UTF8 did not match.\n"); 728 } 729 if(!convertToU(utf8, offset8, 730 standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) { 731 log_err("UTF8->Unicode did not match.\n"); 732 } 733 } 734 735 free(standardForm); 736 free(utf8); 737 738 { 739 static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 }; 740 static const UChar expected[] = { 0x0301, 0x0300 }; 741 UConverter *conv8; 742 UErrorCode err = U_ZERO_ERROR; 743 UChar pivotBuffer[100]; 744 const UChar* const pivEnd = pivotBuffer + 100; 745 const char* srcBeg; 746 const char* srcEnd; 747 UChar* pivBeg; 748 749 conv8 = ucnv_open("UTF-8", &err); 750 751 srcBeg = src8; 752 pivBeg = pivotBuffer; 753 srcEnd = src8 + 3; 754 ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 755 if (srcBeg != srcEnd) { 756 log_err("Did not consume whole buffer on first call.\n"); 757 } 758 759 srcEnd = src8 + 4; 760 ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 761 if (srcBeg != srcEnd) { 762 log_err("Did not consume whole buffer on second call.\n"); 763 } 764 765 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 766 log_err("Did not get expected results for UTF-8.\n"); 767 } 768 ucnv_close(conv8); 769 } 770 } 771 772 #define MAX_UTF32_LEN 1 773 774 static void TestRegressionUTF32(){ 775 UChar32 currCh = 0; 776 int32_t offset32; 777 int32_t offset16; 778 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar)); 779 UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32)); 780 781 while (currCh <= UNICODE_LIMIT) { 782 offset16 = 0; 783 offset32 = 0; 784 while(currCh <= UNICODE_LIMIT 785 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN) 786 && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN)) 787 { 788 if (currCh == SURROGATE_HIGH_START) { 789 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ 790 } 791 UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh); 792 UTF32_APPEND_CHAR_SAFE(utf32, offset32, MAX_LENGTH, currCh); 793 currCh++; 794 } 795 if(!convertFromU(standardForm, offset16, 796 (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) { 797 log_err("Unicode->UTF32 did not match.\n"); 798 } 799 if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32), 800 standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) { 801 log_err("UTF32->Unicode did not match.\n"); 802 } 803 } 804 free(standardForm); 805 free(utf32); 806 807 { 808 /* Check for lone surrogate error handling. */ 809 static const UChar sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 }; 810 static const UChar sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 }; 811 static const uint8_t expectedUTF32BE[] = { 812 0x00, 0x00, 0x00, 0x31, 813 0x00, 0x00, 0xff, 0xfd, 814 0x00, 0x00, 0x00, 0x32 815 }; 816 static const uint8_t expectedUTF32LE[] = { 817 0x31, 0x00, 0x00, 0x00, 818 0xfd, 0xff, 0x00, 0x00, 819 0x32, 0x00, 0x00, 0x00 820 }; 821 static const int32_t offsetsUTF32[] = { 822 0x00, 0x00, 0x00, 0x00, 823 0x01, 0x01, 0x01, 0x01, 824 0x02, 0x02, 0x02, 0x02 825 }; 826 827 if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]), 828 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 829 log_err("u->UTF-32BE\n"); 830 if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]), 831 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 832 log_err("u->UTF-32BE\n"); 833 834 if(!convertFromU(sampleBadStartSurrogate, sizeof(sampleBadStartSurrogate)/sizeof(sampleBadStartSurrogate[0]), 835 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 836 log_err("u->UTF-32LE\n"); 837 if(!convertFromU(sampleBadEndSurrogate, sizeof(sampleBadEndSurrogate)/sizeof(sampleBadEndSurrogate[0]), 838 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 839 log_err("u->UTF-32LE\n"); 840 } 841 842 { 843 static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 }; 844 static const UChar expected[] = { 0x0031, 0x0030 }; 845 UConverter *convBE; 846 UErrorCode err = U_ZERO_ERROR; 847 UChar pivotBuffer[100]; 848 const UChar* const pivEnd = pivotBuffer + 100; 849 const char* srcBeg; 850 const char* srcEnd; 851 UChar* pivBeg; 852 853 convBE = ucnv_open("UTF-32BE", &err); 854 855 srcBeg = srcBE; 856 pivBeg = pivotBuffer; 857 srcEnd = srcBE + 5; 858 ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 859 if (srcBeg != srcEnd) { 860 log_err("Did not consume whole buffer on first call.\n"); 861 } 862 863 srcEnd = srcBE + 8; 864 ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 865 if (srcBeg != srcEnd) { 866 log_err("Did not consume whole buffer on second call.\n"); 867 } 868 869 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 870 log_err("Did not get expected results for UTF-32BE.\n"); 871 } 872 ucnv_close(convBE); 873 } 874 { 875 static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 }; 876 static const UChar expected[] = { 0x0031, 0x0030 }; 877 UConverter *convLE; 878 UErrorCode err = U_ZERO_ERROR; 879 UChar pivotBuffer[100]; 880 const UChar* const pivEnd = pivotBuffer + 100; 881 const char* srcBeg; 882 const char* srcEnd; 883 UChar* pivBeg; 884 885 convLE = ucnv_open("UTF-32LE", &err); 886 887 srcBeg = srcLE; 888 pivBeg = pivotBuffer; 889 srcEnd = srcLE + 5; 890 ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 891 if (srcBeg != srcEnd) { 892 log_err("Did not consume whole buffer on first call.\n"); 893 } 894 895 srcEnd = srcLE + 8; 896 ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 897 if (srcBeg != srcEnd) { 898 log_err("Did not consume whole buffer on second call.\n"); 899 } 900 901 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 902 log_err("Did not get expected results for UTF-32LE.\n"); 903 } 904 ucnv_close(convLE); 905 } 906 } 907 908 /*Walk through the available converters*/ 909 static void TestAvailableConverters(){ 910 UErrorCode status=U_ZERO_ERROR; 911 UConverter *conv=NULL; 912 int32_t i=0; 913 for(i=0; i < ucnv_countAvailable(); i++){ 914 status=U_ZERO_ERROR; 915 conv=ucnv_open(ucnv_getAvailableName(i), &status); 916 if(U_FAILURE(status)){ 917 log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n", 918 ucnv_getAvailableName(i), myErrorName(status)); 919 continue; 920 } 921 ucnv_close(conv); 922 } 923 924 } 925 926 static void TestFlushInternalBuffer(){ 927 TestWithBufferSize(MAX_LENGTH, 1); 928 TestWithBufferSize(1, 1); 929 TestWithBufferSize(1, MAX_LENGTH); 930 TestWithBufferSize(MAX_LENGTH, MAX_LENGTH); 931 } 932 933 static void TestWithBufferSize(int32_t insize, int32_t outsize){ 934 935 gInBufferSize =insize; 936 gOutBufferSize = outsize; 937 938 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 939 { 940 UChar sampleText[] = 941 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E }; 942 const uint8_t expectedUTF8[] = 943 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 944 int32_t toUTF8Offs[] = 945 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07}; 946 /* int32_t fmUTF8Offs[] = 947 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/ 948 949 /*UTF-8*/ 950 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 951 expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE)) 952 log_err("u-> UTF8 did not match.\n"); 953 } 954 955 #if !UCONFIG_NO_LEGACY_CONVERSION 956 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); 957 { 958 UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 959 const uint8_t toIBM943[]= { 0x61, 960 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 961 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 962 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 963 0x61 }; 964 int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; 965 966 if(!testConvertFromU(inputTest, sizeof(inputTest)/sizeof(inputTest[0]), 967 toIBM943, sizeof(toIBM943), "ibm-943", 968 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE)) 969 log_err("u-> ibm-943 with subst with value did not match.\n"); 970 } 971 #endif 972 973 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 974 { 975 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 976 0xe0, 0x80, 0x61}; 977 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; 978 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; 979 980 if(!testConvertToU(sampleText1, sizeof(sampleText1), 981 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE)) 982 log_err("utf8->u with substitute did not match.\n");; 983 } 984 985 #if !UCONFIG_NO_LEGACY_CONVERSION 986 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); 987 /*to Unicode*/ 988 { 989 const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, 990 0x81, 0xad, /*unassigned*/ 991 0x89, 0xd3 }; 992 UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, 993 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, 994 0x7B87}; 995 int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; 996 997 if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU), 998 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943", 999 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE)) 1000 log_err("ibm-943->u with substitute with value did not match.\n"); 1001 1002 } 1003 #endif 1004 } 1005 1006 static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 1007 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus) 1008 { 1009 1010 int32_t i=0; 1011 char *p=0; 1012 const UChar *src; 1013 char buffer[MAX_LENGTH]; 1014 int32_t offsetBuffer[MAX_LENGTH]; 1015 int32_t *offs=0; 1016 char *targ; 1017 char *targetLimit; 1018 UChar *sourceLimit=0; 1019 UErrorCode status = U_ZERO_ERROR; 1020 UConverter *conv = 0; 1021 conv = ucnv_open(codepage, &status); 1022 if(U_FAILURE(status)) 1023 { 1024 log_data_err("Couldn't open converter %s\n",codepage); 1025 return TRUE; 1026 } 1027 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); 1028 1029 for(i=0; i<MAX_LENGTH; i++){ 1030 buffer[i]=(char)0xF0; 1031 offsetBuffer[i]=0xFF; 1032 } 1033 1034 src=source; 1035 sourceLimit=(UChar*)src+(sourceLen); 1036 targ=buffer; 1037 targetLimit=targ+MAX_LENGTH; 1038 offs=offsetBuffer; 1039 ucnv_fromUnicode (conv, 1040 (char **)&targ, 1041 (const char *)targetLimit, 1042 &src, 1043 sourceLimit, 1044 expectOffsets ? offs : NULL, 1045 doFlush, 1046 &status); 1047 ucnv_close(conv); 1048 if(status != expectedStatus){ 1049 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus)); 1050 return FALSE; 1051 } 1052 1053 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1054 sourceLen, targ-buffer); 1055 1056 if(expectLen != targ-buffer) 1057 { 1058 log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage); 1059 log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage); 1060 printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer)); 1061 printSeqErr((const unsigned char*)expect, expectLen); 1062 return FALSE; 1063 } 1064 1065 if(memcmp(buffer, expect, expectLen)){ 1066 log_err("String does not match. FROM Unicode to codePage%s\n", codepage); 1067 log_info("\nGot:"); 1068 printSeqErr((const unsigned char *)buffer, expectLen); 1069 log_info("\nExpected:"); 1070 printSeqErr((const unsigned char *)expect, expectLen); 1071 return FALSE; 1072 } 1073 else { 1074 log_verbose("Matches!\n"); 1075 } 1076 1077 if (expectOffsets != 0){ 1078 log_verbose("comparing %d offsets..\n", targ-buffer); 1079 if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){ 1080 log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage); 1081 log_info("\nGot : "); 1082 printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer)); 1083 for(p=buffer;p<targ;p++) 1084 log_info("%d, ", offsetBuffer[p-buffer]); 1085 log_info("\nExpected: "); 1086 for(i=0; i< (targ-buffer); i++) 1087 log_info("%d,", expectOffsets[i]); 1088 } 1089 } 1090 1091 return TRUE; 1092 } 1093 1094 1095 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen, 1096 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus) 1097 { 1098 UErrorCode status = U_ZERO_ERROR; 1099 UConverter *conv = 0; 1100 int32_t i=0; 1101 UChar *p=0; 1102 const char* src; 1103 UChar buffer[MAX_LENGTH]; 1104 int32_t offsetBuffer[MAX_LENGTH]; 1105 int32_t *offs=0; 1106 UChar *targ; 1107 UChar *targetLimit; 1108 uint8_t *sourceLimit=0; 1109 1110 1111 1112 conv = ucnv_open(codepage, &status); 1113 if(U_FAILURE(status)) 1114 { 1115 log_data_err("Couldn't open converter %s\n",codepage); 1116 return TRUE; 1117 } 1118 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); 1119 1120 1121 1122 for(i=0; i<MAX_LENGTH; i++){ 1123 buffer[i]=0xFFFE; 1124 offsetBuffer[i]=-1; 1125 } 1126 1127 src=(const char *)source; 1128 sourceLimit=(uint8_t*)(src+(sourceLen)); 1129 targ=buffer; 1130 targetLimit=targ+MAX_LENGTH; 1131 offs=offsetBuffer; 1132 1133 1134 1135 ucnv_toUnicode (conv, 1136 &targ, 1137 targetLimit, 1138 (const char **)&src, 1139 (const char *)sourceLimit, 1140 expectOffsets ? offs : NULL, 1141 doFlush, 1142 &status); 1143 1144 ucnv_close(conv); 1145 if(status != expectedStatus){ 1146 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus)); 1147 return FALSE; 1148 } 1149 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1150 sourceLen, targ-buffer); 1151 1152 1153 1154 1155 log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2); 1156 1157 if (expectOffsets != 0) { 1158 if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){ 1159 1160 log_err("did not get the expected offsets from %s To UNICODE\n", codepage); 1161 log_info("\nGot : "); 1162 for(p=buffer;p<targ;p++) 1163 log_info("%d, ", offsetBuffer[p-buffer]); 1164 log_info("\nExpected: "); 1165 for(i=0; i<(targ-buffer); i++) 1166 log_info("%d, ", expectOffsets[i]); 1167 log_info("\nGot result:"); 1168 for(i=0; i<(targ-buffer); i++) 1169 log_info("0x%04X,", buffer[i]); 1170 log_info("\nFrom Input:"); 1171 for(i=0; i<(src-(const char *)source); i++) 1172 log_info("0x%02X,", (unsigned char)source[i]); 1173 log_info("\n"); 1174 } 1175 } 1176 if(memcmp(buffer, expect, expectLen*2)){ 1177 log_err("String does not match. from codePage %s TO Unicode\n", codepage); 1178 log_info("\nGot:"); 1179 printUSeqErr(buffer, expectLen); 1180 log_info("\nExpected:"); 1181 printUSeqErr(expect, expectLen); 1182 return FALSE; 1183 } 1184 else { 1185 log_verbose("Matches!\n"); 1186 } 1187 1188 return TRUE; 1189 } 1190 1191 1192 static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 1193 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset) 1194 { 1195 UErrorCode status = U_ZERO_ERROR; 1196 UConverter *conv = 0; 1197 char junkout[MAX_LENGTH]; /* FIX */ 1198 int32_t junokout[MAX_LENGTH]; /* FIX */ 1199 char *p; 1200 const UChar *src; 1201 char *end; 1202 char *targ; 1203 int32_t *offs; 1204 int i; 1205 int32_t realBufferSize; 1206 char *realBufferEnd; 1207 const UChar *realSourceEnd; 1208 const UChar *sourceLimit; 1209 UBool checkOffsets = TRUE; 1210 UBool doFlush; 1211 1212 UConverterFromUCallback oldAction = NULL; 1213 const void* oldContext = NULL; 1214 1215 for(i=0;i<MAX_LENGTH;i++) 1216 junkout[i] = (char)0xF0; 1217 for(i=0;i<MAX_LENGTH;i++) 1218 junokout[i] = 0xFF; 1219 1220 setNuConvTestName(codepage, "FROM"); 1221 1222 log_verbose("\n========= %s\n", gNuConvTestName); 1223 1224 conv = ucnv_open(codepage, &status); 1225 if(U_FAILURE(status)) 1226 { 1227 log_data_err("Couldn't open converter %s\n",codepage); 1228 return TRUE; 1229 } 1230 1231 log_verbose("Converter opened..\n"); 1232 /*----setting the callback routine----*/ 1233 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 1234 if (U_FAILURE(status)) { 1235 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 1236 } 1237 /*------------------------*/ 1238 1239 src = source; 1240 targ = junkout; 1241 offs = junokout; 1242 1243 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 1244 realBufferEnd = junkout + realBufferSize; 1245 realSourceEnd = source + sourceLen; 1246 1247 if ( gOutBufferSize != realBufferSize ) 1248 checkOffsets = FALSE; 1249 1250 if( gInBufferSize != MAX_LENGTH ) 1251 checkOffsets = FALSE; 1252 1253 do 1254 { 1255 end = nct_min(targ + gOutBufferSize, realBufferEnd); 1256 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 1257 1258 doFlush = (UBool)(sourceLimit == realSourceEnd); 1259 1260 if(targ == realBufferEnd) 1261 { 1262 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 1263 return FALSE; 1264 } 1265 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 1266 1267 1268 status = U_ZERO_ERROR; 1269 if(gInBufferSize ==999 && gOutBufferSize==999) 1270 doFlush = FALSE; 1271 ucnv_fromUnicode (conv, 1272 (char **)&targ, 1273 (const char *)end, 1274 &src, 1275 sourceLimit, 1276 offs, 1277 doFlush, /* flush if we're at the end of the input data */ 1278 &status); 1279 if(testReset) 1280 ucnv_resetToUnicode(conv); 1281 if(gInBufferSize ==999 && gOutBufferSize==999) 1282 ucnv_resetToUnicode(conv); 1283 1284 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 1285 1286 if(U_FAILURE(status)) { 1287 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 1288 return FALSE; 1289 } 1290 1291 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1292 sourceLen, targ-junkout); 1293 if(VERBOSITY) 1294 { 1295 char junk[999]; 1296 char offset_str[999]; 1297 char *ptr; 1298 1299 junk[0] = 0; 1300 offset_str[0] = 0; 1301 for(ptr = junkout;ptr<targ;ptr++) 1302 { 1303 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr); 1304 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]); 1305 } 1306 1307 log_verbose(junk); 1308 printSeq((const unsigned char *)expect, expectLen); 1309 if ( checkOffsets ) 1310 { 1311 log_verbose("\nOffsets:"); 1312 log_verbose(offset_str); 1313 } 1314 log_verbose("\n"); 1315 } 1316 ucnv_close(conv); 1317 1318 1319 if(expectLen != targ-junkout) 1320 { 1321 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 1322 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 1323 log_info("\nGot:"); 1324 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 1325 log_info("\nExpected:"); 1326 printSeqErr((const unsigned char*)expect, expectLen); 1327 return FALSE; 1328 } 1329 1330 if (checkOffsets && (expectOffsets != 0) ) 1331 { 1332 log_verbose("comparing %d offsets..\n", targ-junkout); 1333 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 1334 log_err("did not get the expected offsets. %s", gNuConvTestName); 1335 log_err("Got : "); 1336 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 1337 for(p=junkout;p<targ;p++) 1338 log_err("%d, ", junokout[p-junkout]); 1339 log_err("\nExpected: "); 1340 for(i=0; i<(targ-junkout); i++) 1341 log_err("%d,", expectOffsets[i]); 1342 } 1343 } 1344 1345 log_verbose("comparing..\n"); 1346 if(!memcmp(junkout, expect, expectLen)) 1347 { 1348 log_verbose("Matches!\n"); 1349 return TRUE; 1350 } 1351 else 1352 { 1353 log_err("String does not match. %s\n", gNuConvTestName); 1354 printUSeqErr(source, sourceLen); 1355 log_info("\nGot:"); 1356 printSeqErr((const unsigned char *)junkout, expectLen); 1357 log_info("\nExpected:"); 1358 printSeqErr((const unsigned char *)expect, expectLen); 1359 1360 return FALSE; 1361 } 1362 } 1363 1364 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 1365 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset) 1366 { 1367 UErrorCode status = U_ZERO_ERROR; 1368 UConverter *conv = 0; 1369 UChar junkout[MAX_LENGTH]; /* FIX */ 1370 int32_t junokout[MAX_LENGTH]; /* FIX */ 1371 const char *src; 1372 const char *realSourceEnd; 1373 const char *srcLimit; 1374 UChar *p; 1375 UChar *targ; 1376 UChar *end; 1377 int32_t *offs; 1378 int i; 1379 UBool checkOffsets = TRUE; 1380 int32_t realBufferSize; 1381 UChar *realBufferEnd; 1382 UBool doFlush; 1383 1384 UConverterToUCallback oldAction = NULL; 1385 const void* oldContext = NULL; 1386 1387 1388 for(i=0;i<MAX_LENGTH;i++) 1389 junkout[i] = 0xFFFE; 1390 1391 for(i=0;i<MAX_LENGTH;i++) 1392 junokout[i] = -1; 1393 1394 setNuConvTestName(codepage, "TO"); 1395 1396 log_verbose("\n========= %s\n", gNuConvTestName); 1397 1398 conv = ucnv_open(codepage, &status); 1399 if(U_FAILURE(status)) 1400 { 1401 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 1402 return TRUE; 1403 } 1404 1405 log_verbose("Converter opened..\n"); 1406 /*----setting the callback routine----*/ 1407 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 1408 if (U_FAILURE(status)) { 1409 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 1410 } 1411 /*-------------------------------------*/ 1412 1413 src = (const char *)source; 1414 targ = junkout; 1415 offs = junokout; 1416 1417 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 1418 realBufferEnd = junkout + realBufferSize; 1419 realSourceEnd = src + sourcelen; 1420 1421 if ( gOutBufferSize != realBufferSize ) 1422 checkOffsets = FALSE; 1423 1424 if( gInBufferSize != MAX_LENGTH ) 1425 checkOffsets = FALSE; 1426 1427 do 1428 { 1429 end = nct_min( targ + gOutBufferSize, realBufferEnd); 1430 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 1431 1432 if(targ == realBufferEnd) 1433 { 1434 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 1435 return FALSE; 1436 } 1437 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 1438 1439 /* oldTarg = targ; */ 1440 1441 status = U_ZERO_ERROR; 1442 doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE); 1443 1444 ucnv_toUnicode (conv, 1445 &targ, 1446 end, 1447 (const char **)&src, 1448 (const char *)srcLimit, 1449 offs, 1450 doFlush, /* flush if we're at the end of hte source data */ 1451 &status); 1452 if(testReset) 1453 ucnv_resetFromUnicode(conv); 1454 if(gInBufferSize ==999 && gOutBufferSize==999) 1455 ucnv_resetToUnicode(conv); 1456 /* offs += (targ-oldTarg); */ 1457 1458 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 1459 1460 if(U_FAILURE(status)) 1461 { 1462 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 1463 return FALSE; 1464 } 1465 1466 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 1467 sourcelen, targ-junkout); 1468 if(VERBOSITY) 1469 { 1470 char junk[999]; 1471 char offset_str[999]; 1472 1473 UChar *ptr; 1474 1475 junk[0] = 0; 1476 offset_str[0] = 0; 1477 1478 for(ptr = junkout;ptr<targ;ptr++) 1479 { 1480 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 1481 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 1482 } 1483 1484 log_verbose(junk); 1485 1486 if ( checkOffsets ) 1487 { 1488 log_verbose("\nOffsets:"); 1489 log_verbose(offset_str); 1490 } 1491 log_verbose("\n"); 1492 } 1493 ucnv_close(conv); 1494 1495 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 1496 1497 if (checkOffsets && (expectOffsets != 0)) 1498 { 1499 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 1500 1501 log_err("did not get the expected offsets. %s",gNuConvTestName); 1502 for(p=junkout;p<targ;p++) 1503 log_err("%d, ", junokout[p-junkout]); 1504 log_err("\nExpected: "); 1505 for(i=0; i<(targ-junkout); i++) 1506 log_err("%d,", expectOffsets[i]); 1507 log_err(""); 1508 for(i=0; i<(targ-junkout); i++) 1509 log_err("%X,", junkout[i]); 1510 log_err(""); 1511 for(i=0; i<(src-(const char *)source); i++) 1512 log_err("%X,", (unsigned char)source[i]); 1513 } 1514 } 1515 1516 if(!memcmp(junkout, expect, expectlen*2)) 1517 { 1518 log_verbose("Matches!\n"); 1519 return TRUE; 1520 } 1521 else 1522 { 1523 log_err("String does not match. %s\n", gNuConvTestName); 1524 log_verbose("String does not match. %s\n", gNuConvTestName); 1525 log_info("\nGot:"); 1526 printUSeq(junkout, expectlen); 1527 log_info("\nExpected:"); 1528 printUSeq(expect, expectlen); 1529 return FALSE; 1530 } 1531 } 1532 1533 1534 static void TestResetBehaviour(void){ 1535 #if !UCONFIG_NO_LEGACY_CONVERSION 1536 log_verbose("Testing Reset for DBCS and MBCS\n"); 1537 { 1538 static const UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; 1539 static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 1540 static const int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; 1541 1542 1543 static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8}; 1544 static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7}; 1545 static const int32_t offsets1[] = { 0,2,4,6}; 1546 1547 /*DBCS*/ 1548 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1549 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1550 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 1551 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1552 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1553 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 1554 1555 if(!testConvertToU(expected1, sizeof(expected1), 1556 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1557 offsets1, TRUE)) 1558 log_err("ibm-1363 -> did not match.\n"); 1559 /*MBCS*/ 1560 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1561 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1562 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 1563 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1564 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1565 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 1566 1567 if(!testConvertToU(expected1, sizeof(expected1), 1568 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1569 offsets1, TRUE)) 1570 log_err("ibm-1363 -> did not match.\n"); 1571 1572 } 1573 1574 /* BEGIN android-removed */ 1575 /* To save space, Android does not build full ISO2022 CJK tables. 1576 We skip the tests for ISO-2022. */ 1577 /* 1578 log_verbose("Testing Reset for ISO-2022-jp\n"); 1579 { 1580 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1581 1582 static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 1583 0x31,0x1A, 0x32}; 1584 1585 1586 static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; 1587 1588 1589 static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; 1590 static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 1591 0x31,0x1A, 0x32}; 1592 static const int32_t offsets1[] = { 3,5,10,11,12}; 1593 1594 // iso-2022-jp 1595 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1596 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1597 log_err("u-> not match.\n"); 1598 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1599 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1600 log_err("u-> not match.\n"); 1601 1602 if(!testConvertToU(expected1, sizeof(expected1), 1603 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1604 offsets1, TRUE)) 1605 log_err("iso-2022-jp -> did not match.\n"); 1606 1607 } 1608 1609 log_verbose("Testing Reset for ISO-2022-cn\n"); 1610 { 1611 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1612 1613 static const uint8_t expected[] = { 1614 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 1615 0x36, 0x21, 1616 0x0f, 0x31, 1617 0x1A, 1618 0x32 1619 }; 1620 1621 1622 static const int32_t offsets[] = { 1623 0, 0, 0, 0, 0, 0, 0, 1624 1, 1, 1625 2, 2, 1626 3, 1627 5, }; 1628 1629 UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; 1630 static const uint8_t expected1[] = { 1631 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 1632 0x36, 0x21, 1633 0x1B, 0x24, 0x29, 0x47, 0x24, 0x22, 1634 0x0f, 0x1A, 1635 0x32 1636 }; 1637 static const int32_t offsets1[] = { 5,7,13,16,17}; 1638 1639 // iso-2022-CN 1640 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1641 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1642 log_err("u-> not match.\n"); 1643 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1644 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1645 log_err("u-> not match.\n"); 1646 1647 if(!testConvertToU(expected1, sizeof(expected1), 1648 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1649 offsets1, TRUE)) 1650 log_err("iso-2022-cn -> did not match.\n"); 1651 } 1652 1653 log_verbose("Testing Reset for ISO-2022-kr\n"); 1654 { 1655 UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1656 1657 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43, 1658 0x0E, 0x6C, 0x69, 1659 0x0f, 0x1A, 1660 0x0e, 0x6F, 0x4B, 1661 0x0F, 0x31, 1662 0x1A, 1663 0x32 }; 1664 1665 static const int32_t offsets[] = {-1, -1, -1, -1, 1666 0, 0, 0, 1667 1, 1, 1668 3, 3, 3, 1669 4, 4, 1670 5, 1671 7, 1672 }; 1673 static const UChar sampleText1[] = { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032}; 1674 1675 static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43, 1676 0x0E, 0x6C, 0x69, 1677 0x0f, 0x41, 1678 0x0e, 0x6F, 0x4B, 1679 0x0F, 0x31, 1680 0x42, 1681 0x32 }; 1682 1683 static const int32_t offsets1[] = { 1684 5, 8, 10, 1685 13, 14, 15 1686 1687 }; 1688 // iso-2022-kr 1689 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1690 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1691 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 1692 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1693 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1694 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 1695 if(!testConvertToU(expected1, sizeof(expected1), 1696 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1697 offsets1, TRUE)) 1698 log_err("iso-2022-kr -> did not match.\n"); 1699 } 1700 */ 1701 /* END android-removed */ 1702 1703 log_verbose("Testing Reset for HZ\n"); 1704 { 1705 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1706 1707 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B, 1708 0x7E, 0x7D, 0x1A, 1709 0x7E, 0x7B, 0x36, 0x21, 1710 0x7E, 0x7D, 0x31, 1711 0x1A, 1712 0x32 }; 1713 1714 1715 static const int32_t offsets[] = {0,0,0,0, 1716 1,1,1, 1717 3,3,3,3, 1718 4,4,4, 1719 5, 1720 7,}; 1721 static const UChar sampleText1[] = { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032}; 1722 1723 static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B, 1724 0x7E, 0x7D, 0x35, 1725 0x7E, 0x7B, 0x36, 0x21, 1726 0x7E, 0x7D, 0x31, 1727 0x41, 1728 0x32 }; 1729 1730 1731 static const int32_t offsets1[] = {2,6,9,13,14,15 1732 }; 1733 1734 /*hz*/ 1735 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1736 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1737 log_err("u-> not match.\n"); 1738 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1739 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1740 log_err("u-> not match.\n"); 1741 if(!testConvertToU(expected1, sizeof(expected1), 1742 sampleText1, sizeof(sampleText1)/sizeof(sampleText1[0]), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1743 offsets1, TRUE)) 1744 log_err("hz -> did not match.\n"); 1745 } 1746 #endif 1747 1748 /*UTF-8*/ 1749 log_verbose("Testing for UTF8\n"); 1750 { 1751 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032}; 1752 int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 1753 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 1754 0x04, 0x06 }; 1755 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31, 1756 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32}; 1757 1758 1759 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; 1760 /*UTF-8*/ 1761 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1762 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1763 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1764 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1765 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1766 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1767 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1768 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1769 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1770 if(!testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 1771 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1772 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1773 if(!testConvertToU(expected, sizeof(expected), 1774 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1775 log_err("UTF8 -> did not match.\n"); 1776 if(!testConvertToU(expected, sizeof(expected), 1777 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1778 log_err("UTF8 -> did not match.\n"); 1779 if(!testConvertToU(expected, sizeof(expected), 1780 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) 1781 log_err("UTF8 -> did not match.\n"); 1782 if(!testConvertToU(expected, sizeof(expected), 1783 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) 1784 log_err("UTF8 -> did not match.\n"); 1785 1786 } 1787 1788 } 1789 1790 /* Test that U_TRUNCATED_CHAR_FOUND is set. */ 1791 static void 1792 doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) { 1793 UConverter *cnv; 1794 1795 UChar buffer[2]; 1796 UChar *target, *targetLimit; 1797 const char *source, *sourceLimit; 1798 1799 UErrorCode errorCode; 1800 1801 errorCode=U_ZERO_ERROR; 1802 cnv=ucnv_open(cnvName, &errorCode); 1803 if(U_FAILURE(errorCode)) { 1804 log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode)); 1805 return; 1806 } 1807 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 1808 if(U_FAILURE(errorCode)) { 1809 log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n", 1810 cnvName, u_errorName(errorCode)); 1811 ucnv_close(cnv); 1812 return; 1813 } 1814 1815 source=(const char *)bytes; 1816 sourceLimit=source+length; 1817 target=buffer; 1818 targetLimit=buffer+LENGTHOF(buffer); 1819 1820 /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */ 1821 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode); 1822 if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) { 1823 log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n", 1824 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); 1825 } 1826 1827 errorCode=U_ZERO_ERROR; 1828 source=sourceLimit; 1829 target=buffer; 1830 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); 1831 if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) { 1832 log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n", 1833 cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer)); 1834 } 1835 1836 /* 2. input bytes with flush=TRUE */ 1837 ucnv_resetToUnicode(cnv); 1838 1839 errorCode=U_ZERO_ERROR; 1840 source=(const char *)bytes; 1841 target=buffer; 1842 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); 1843 if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) { 1844 log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n", 1845 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); 1846 } 1847 1848 1849 ucnv_close(cnv); 1850 } 1851 1852 static void 1853 TestTruncated() { 1854 static const struct { 1855 const char *cnvName; 1856 uint8_t bytes[8]; /* partial input bytes resulting in no output */ 1857 int32_t length; 1858 } testCases[]={ 1859 { "IMAP-mailbox-name", { 0x26 }, 1 }, /* & */ 1860 { "IMAP-mailbox-name", { 0x26, 0x42 }, 2 }, /* &B */ 1861 { "IMAP-mailbox-name", { 0x26, 0x42, 0x42 }, 3 }, /* &BB */ 1862 { "IMAP-mailbox-name", { 0x26, 0x41, 0x41 }, 3 }, /* &AA */ 1863 1864 { "UTF-7", { 0x2b, 0x42 }, 2 }, /* +B */ 1865 { "UTF-8", { 0xd1 }, 1 }, 1866 1867 { "UTF-16BE", { 0x4e }, 1 }, 1868 { "UTF-16LE", { 0x4e }, 1 }, 1869 { "UTF-16", { 0x4e }, 1 }, 1870 { "UTF-16", { 0xff }, 1 }, 1871 { "UTF-16", { 0xfe, 0xff, 0x4e }, 3 }, 1872 1873 { "UTF-32BE", { 0, 0, 0x4e }, 3 }, 1874 { "UTF-32LE", { 0x4e }, 1 }, 1875 { "UTF-32", { 0, 0, 0x4e }, 3 }, 1876 { "UTF-32", { 0xff }, 1 }, 1877 { "UTF-32", { 0, 0, 0xfe, 0xff, 0 }, 5 }, 1878 { "SCSU", { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */ 1879 1880 #if !UCONFIG_NO_LEGACY_CONVERSION 1881 { "BOCU-1", { 0xd5 }, 1 }, 1882 1883 { "Shift-JIS", { 0xe0 }, 1 }, 1884 1885 { "ibm-939", { 0x0e, 0x41 }, 2 } /* SO 0x41 */ 1886 #else 1887 { "BOCU-1", { 0xd5 }, 1 ,} 1888 #endif 1889 }; 1890 int32_t i; 1891 1892 for(i=0; i<LENGTHOF(testCases); ++i) { 1893 doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length); 1894 } 1895 } 1896 1897 typedef struct NameRange { 1898 const char *name; 1899 UChar32 start, end, start2, end2, notStart, notEnd; 1900 } NameRange; 1901 1902 static void 1903 TestUnicodeSet() { 1904 UErrorCode errorCode; 1905 UConverter *cnv; 1906 USet *set; 1907 const char *name; 1908 int32_t i, count; 1909 1910 static const char *const completeSetNames[]={ 1911 "UTF-7", 1912 "UTF-8", 1913 "UTF-16", 1914 "UTF-16BE", 1915 "UTF-16LE", 1916 "UTF-32", 1917 "UTF-32BE", 1918 "UTF-32LE", 1919 "SCSU", 1920 "BOCU-1", 1921 "CESU-8", 1922 #if !UCONFIG_NO_LEGACY_CONVERSION 1923 "gb18030", 1924 #endif 1925 "IMAP-mailbox-name" 1926 }; 1927 #if !UCONFIG_NO_LEGACY_CONVERSION 1928 static const char *const lmbcsNames[]={ 1929 "LMBCS-1", 1930 "LMBCS-2", 1931 "LMBCS-3", 1932 "LMBCS-4", 1933 "LMBCS-5", 1934 "LMBCS-6", 1935 "LMBCS-8", 1936 "LMBCS-11", 1937 "LMBCS-16", 1938 "LMBCS-17", 1939 "LMBCS-18", 1940 "LMBCS-19" 1941 }; 1942 #endif 1943 1944 static const NameRange nameRanges[]={ 1945 { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff }, 1946 #if !UCONFIG_NO_LEGACY_CONVERSION 1947 { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff }, 1948 #endif 1949 { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff }, 1950 #if !UCONFIG_NO_LEGACY_CONVERSION 1951 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }, 1952 { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff }, 1953 /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */ 1954 { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff } 1955 #else 1956 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff } 1957 #endif 1958 }; 1959 1960 /* open an empty set */ 1961 set=uset_open(1, 0); 1962 1963 count=ucnv_countAvailable(); 1964 for(i=0; i<count; ++i) { 1965 errorCode=U_ZERO_ERROR; 1966 name=ucnv_getAvailableName(i); 1967 cnv=ucnv_open(name, &errorCode); 1968 if(U_FAILURE(errorCode)) { 1969 log_data_err("error: unable to open converter %s - %s\n", 1970 name, u_errorName(errorCode)); 1971 continue; 1972 } 1973 1974 uset_clear(set); 1975 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 1976 if(U_FAILURE(errorCode)) { 1977 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 1978 name, u_errorName(errorCode)); 1979 } else if(uset_size(set)==0) { 1980 log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name); 1981 } 1982 1983 ucnv_close(cnv); 1984 } 1985 1986 /* test converters that are known to convert all of Unicode (except maybe for surrogates) */ 1987 for(i=0; i<LENGTHOF(completeSetNames); ++i) { 1988 errorCode=U_ZERO_ERROR; 1989 name=completeSetNames[i]; 1990 cnv=ucnv_open(name, &errorCode); 1991 if(U_FAILURE(errorCode)) { 1992 log_data_err("error: unable to open converter %s - %s\n", 1993 name, u_errorName(errorCode)); 1994 continue; 1995 } 1996 1997 uset_clear(set); 1998 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 1999 if(U_FAILURE(errorCode)) { 2000 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2001 name, u_errorName(errorCode)); 2002 } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) { 2003 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name); 2004 } 2005 2006 ucnv_close(cnv); 2007 } 2008 2009 #if !UCONFIG_NO_LEGACY_CONVERSION 2010 /* test LMBCS variants which convert all of Unicode except for U+F6xx */ 2011 for(i=0; i<LENGTHOF(lmbcsNames); ++i) { 2012 errorCode=U_ZERO_ERROR; 2013 name=lmbcsNames[i]; 2014 cnv=ucnv_open(name, &errorCode); 2015 if(U_FAILURE(errorCode)) { 2016 log_data_err("error: unable to open converter %s - %s\n", 2017 name, u_errorName(errorCode)); 2018 continue; 2019 } 2020 2021 uset_clear(set); 2022 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2023 if(U_FAILURE(errorCode)) { 2024 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2025 name, u_errorName(errorCode)); 2026 } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) { 2027 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name); 2028 } 2029 2030 ucnv_close(cnv); 2031 } 2032 #endif 2033 2034 /* test specific sets */ 2035 for(i=0; i<LENGTHOF(nameRanges); ++i) { 2036 errorCode=U_ZERO_ERROR; 2037 name=nameRanges[i].name; 2038 cnv=ucnv_open(name, &errorCode); 2039 if(U_FAILURE(errorCode)) { 2040 log_data_err("error: unable to open converter %s - %s\n", 2041 name, u_errorName(errorCode)); 2042 continue; 2043 } 2044 2045 uset_clear(set); 2046 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2047 if(U_FAILURE(errorCode)) { 2048 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2049 name, u_errorName(errorCode)); 2050 } else if( 2051 !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) || 2052 (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2)) 2053 ) { 2054 log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name); 2055 } else if(nameRanges[i].notStart>=0) { 2056 /* simulate containsAny() with the C API */ 2057 uset_complement(set); 2058 if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) { 2059 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name); 2060 } 2061 } 2062 2063 ucnv_close(cnv); 2064 } 2065 2066 errorCode = U_ZERO_ERROR; 2067 ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode); 2068 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 2069 log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode)); 2070 } 2071 errorCode = U_PARSE_ERROR; 2072 /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */ 2073 ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode); 2074 if (errorCode != U_PARSE_ERROR) { 2075 log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode)); 2076 } 2077 2078 uset_close(set); 2079 } 2080