1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 /***************************************************************************** 9 * 10 * File ncnvtst.c 11 * 12 * Modification History: 13 * Name Description 14 * Madhu Katragadda 7/7/2000 Converter Tests for extended code coverage 15 ****************************************************************************** 16 */ 17 #include <stdio.h> 18 #include <stdlib.h> 19 #include <string.h> 20 #include "unicode/uloc.h" 21 #include "unicode/ucnv.h" 22 #include "unicode/utypes.h" 23 #include "unicode/ustring.h" 24 #include "unicode/uset.h" 25 #include "unicode/utf8.h" 26 #include "unicode/utf16.h" 27 #include "cintltst.h" 28 #include "cmemory.h" 29 30 #define MAX_LENGTH 999 31 32 #define UNICODE_LIMIT 0x10FFFF 33 #define SURROGATE_HIGH_START 0xD800 34 #define SURROGATE_LOW_END 0xDFFF 35 36 static int32_t gInBufferSize = 0; 37 static int32_t gOutBufferSize = 0; 38 static char gNuConvTestName[1024]; 39 40 #define nct_min(x,y) ((x<y) ? x : y) 41 42 static void printSeq(const unsigned char* a, int len); 43 static void printSeqErr(const unsigned char* a, int len); 44 static void printUSeq(const UChar* a, int len); 45 static void printUSeqErr(const UChar* a, int len); 46 static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 47 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus); 48 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen, 49 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus); 50 51 static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 52 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset); 53 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 54 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset); 55 56 static void setNuConvTestName(const char *codepage, const char *direction) 57 { 58 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 59 codepage, 60 direction, 61 (int)gInBufferSize, 62 (int)gOutBufferSize); 63 } 64 65 66 static void TestSurrogateBehaviour(void); 67 static void TestErrorBehaviour(void); 68 69 #if !UCONFIG_NO_LEGACY_CONVERSION 70 static void TestToUnicodeErrorBehaviour(void); 71 static void TestGetNextErrorBehaviour(void); 72 #endif 73 74 static void TestRegressionUTF8(void); 75 static void TestRegressionUTF32(void); 76 static void TestAvailableConverters(void); 77 static void TestFlushInternalBuffer(void); /*for improved code coverage in ucnv_cnv.c*/ 78 static void TestResetBehaviour(void); 79 static void TestTruncated(void); 80 static void TestUnicodeSet(void); 81 82 static void TestWithBufferSize(int32_t osize, int32_t isize); 83 84 85 static void printSeq(const unsigned char* a, int len) 86 { 87 int i=0; 88 log_verbose("\n{"); 89 while (i<len) 90 log_verbose("0x%02X ", a[i++]); 91 log_verbose("}\n"); 92 } 93 94 static void printUSeq(const UChar* a, int len) 95 { 96 int i=0; 97 log_verbose("\n{"); 98 while (i<len) 99 log_verbose("%0x04X ", a[i++]); 100 log_verbose("}\n"); 101 } 102 103 static void printSeqErr(const unsigned char* a, int len) 104 { 105 int i=0; 106 fprintf(stderr, "\n{"); 107 while (i<len) fprintf(stderr, "0x%02X ", a[i++]); 108 fprintf(stderr, "}\n"); 109 } 110 111 static void printUSeqErr(const UChar* a, int len) 112 { 113 int i=0; 114 fprintf(stderr, "\n{"); 115 while (i<len) 116 fprintf(stderr, "0x%04X ", a[i++]); 117 fprintf(stderr,"}\n"); 118 } 119 120 void addExtraTests(TestNode** root); 121 122 void addExtraTests(TestNode** root) 123 { 124 addTest(root, &TestSurrogateBehaviour, "tsconv/ncnvtst/TestSurrogateBehaviour"); 125 addTest(root, &TestErrorBehaviour, "tsconv/ncnvtst/TestErrorBehaviour"); 126 127 #if !UCONFIG_NO_LEGACY_CONVERSION 128 addTest(root, &TestToUnicodeErrorBehaviour, "tsconv/ncnvtst/ToUnicodeErrorBehaviour"); 129 addTest(root, &TestGetNextErrorBehaviour, "tsconv/ncnvtst/TestGetNextErrorBehaviour"); 130 #endif 131 132 addTest(root, &TestAvailableConverters, "tsconv/ncnvtst/TestAvailableConverters"); 133 addTest(root, &TestFlushInternalBuffer, "tsconv/ncnvtst/TestFlushInternalBuffer"); 134 addTest(root, &TestResetBehaviour, "tsconv/ncnvtst/TestResetBehaviour"); 135 addTest(root, &TestRegressionUTF8, "tsconv/ncnvtst/TestRegressionUTF8"); 136 addTest(root, &TestRegressionUTF32, "tsconv/ncnvtst/TestRegressionUTF32"); 137 addTest(root, &TestTruncated, "tsconv/ncnvtst/TestTruncated"); 138 addTest(root, &TestUnicodeSet, "tsconv/ncnvtst/TestUnicodeSet"); 139 } 140 141 /*test surrogate behaviour*/ 142 static void TestSurrogateBehaviour(){ 143 log_verbose("Testing for SBCS and LATIN_1\n"); 144 { 145 UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032}; 146 const uint8_t expected[] = {0x31, 0x1a, 0x32}; 147 148 #if !UCONFIG_NO_LEGACY_CONVERSION 149 /*SBCS*/ 150 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 151 expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR)) 152 log_err("u-> ibm-920 [UCNV_SBCS] not match.\n"); 153 #endif 154 155 /*LATIN_1*/ 156 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 157 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR )) 158 log_err("u-> LATIN_1 not match.\n"); 159 160 } 161 162 #if !UCONFIG_NO_LEGACY_CONVERSION 163 log_verbose("Testing for DBCS and MBCS\n"); 164 { 165 UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; 166 const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 167 int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; 168 169 /*DBCS*/ 170 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 171 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) 172 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 173 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 174 expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR)) 175 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 176 /*MBCS*/ 177 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 178 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) 179 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 180 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 181 expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR)) 182 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 183 } 184 185 log_verbose("Testing for ISO-2022-jp\n"); 186 { 187 UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 188 189 const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 190 0x31,0x1A, 0x32}; 191 192 193 int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; 194 195 /*iso-2022-jp*/ 196 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 197 expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR)) 198 log_err("u-> not match.\n"); 199 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 200 expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR)) 201 log_err("u-> not match.\n"); 202 } 203 204 /* BEGIN android-removed */ 205 /* To save space, Android does not build full ISO-2022-CN tables. 206 We skip the tests for ISO-2022-CN. */ 207 /* 208 log_verbose("Testing for ISO-2022-cn\n"); 209 { 210 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 211 212 static const uint8_t expected[] = { 213 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 214 0x36, 0x21, 215 0x0F, 0x31, 216 0x1A, 217 0x32 218 }; 219 220 221 222 static const int32_t offsets[] = { 223 0, 0, 0, 0, 0, 0, 0, 224 1, 1, 225 2, 2, 226 3, 227 5, }; 228 229 // iso-2022-CN android-change 230 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 231 expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR)) 232 log_err("u-> not match.\n"); 233 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 234 expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR)) 235 log_err("u-> not match.\n"); 236 } 237 */ 238 /* END android-removed */ 239 240 log_verbose("Testing for ISO-2022-kr\n"); 241 { 242 static const UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 243 244 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43, 245 0x0E, 0x6C, 0x69, 246 0x0f, 0x1A, 247 0x0e, 0x6F, 0x4B, 248 0x0F, 0x31, 249 0x1A, 250 0x32 }; 251 252 static const int32_t offsets[] = {-1, -1, -1, -1, 253 0, 0, 0, 254 1, 1, 255 3, 3, 3, 256 4, 4, 257 5, 258 7, 259 }; 260 261 /*iso-2022-kr*/ 262 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 263 expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR)) 264 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 265 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 266 expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR)) 267 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 268 } 269 270 log_verbose("Testing for HZ\n"); 271 { 272 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 273 274 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B, 275 0x7E, 0x7D, 0x1A, 276 0x7E, 0x7B, 0x36, 0x21, 277 0x7E, 0x7D, 0x31, 278 0x1A, 279 0x32 }; 280 281 282 static const int32_t offsets[] = {0,0,0,0, 283 1,1,1, 284 3,3,3,3, 285 4,4,4, 286 5, 287 7,}; 288 289 /*hz*/ 290 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 291 expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR)) 292 log_err("u-> HZ not match.\n"); 293 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 294 expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR)) 295 log_err("u-> HZ not match.\n"); 296 } 297 #endif 298 299 /*UTF-8*/ 300 log_verbose("Testing for UTF8\n"); 301 { 302 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032}; 303 static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 304 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 305 0x04, 0x06 }; 306 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31, 307 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32}; 308 309 310 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; 311 /*UTF-8*/ 312 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 313 expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR )) 314 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 315 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 316 expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR )) 317 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 318 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 319 expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR )) 320 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 321 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 322 expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR )) 323 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 324 325 if(!convertToU(expected, sizeof(expected), 326 sampleText, UPRV_LENGTHOF(sampleText), "UTF8", 0, TRUE, U_ZERO_ERROR )) 327 log_err("UTF8 -> u did not match.\n"); 328 if(!convertToU(expected, sizeof(expected), 329 sampleText, UPRV_LENGTHOF(sampleText), "UTF8", 0, FALSE, U_ZERO_ERROR )) 330 log_err("UTF8 -> u did not match.\n"); 331 if(!convertToU(expected, sizeof(expected), 332 sampleText, UPRV_LENGTHOF(sampleText), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR )) 333 log_err("UTF8 ->u did not match.\n"); 334 if(!convertToU(expected, sizeof(expected), 335 sampleText, UPRV_LENGTHOF(sampleText), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR )) 336 log_err("UTF8 -> u did not match.\n"); 337 338 } 339 } 340 341 /*test various error behaviours*/ 342 static void TestErrorBehaviour(){ 343 log_verbose("Testing for SBCS and LATIN_1\n"); 344 { 345 static const UChar sampleText[] = { 0x0031, 0xd801}; 346 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 347 static const uint8_t expected0[] = { 0x31}; 348 static const uint8_t expected[] = { 0x31, 0x1a}; 349 static const uint8_t expected2[] = { 0x31, 0x1a, 0x32}; 350 351 #if !UCONFIG_NO_LEGACY_CONVERSION 352 /*SBCS*/ 353 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 354 expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR)) 355 log_err("u-> ibm-920 [UCNV_SBCS] \n"); 356 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 357 expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR)) 358 log_err("u-> ibm-920 [UCNV_SBCS] \n"); 359 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 360 expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR)) 361 log_err("u-> ibm-920 [UCNV_SBCS] did not match\n"); 362 #endif 363 364 /*LATIN_1*/ 365 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 366 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) 367 log_err("u-> LATIN_1 is supposed to fail\n"); 368 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 369 expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR)) 370 log_err("u-> LATIN_1 is supposed to fail\n"); 371 372 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 373 expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) 374 log_err("u-> LATIN_1 did not match\n"); 375 } 376 377 #if !UCONFIG_NO_LEGACY_CONVERSION 378 log_verbose("Testing for DBCS and MBCS\n"); 379 { 380 static const UChar sampleText[] = { 0x00a1, 0xd801}; 381 static const uint8_t expected[] = { 0xa2, 0xae}; 382 static const int32_t offsets[] = { 0x00, 0x00}; 383 static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0}; 384 static const int32_t offsetsSUB[] = { 0x00, 0x00, 0x01, 0x01}; 385 386 static const UChar sampleText2[] = { 0x00a1, 0xd801, 0x00a4}; 387 static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 388 static const int32_t offsets2[] = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02}; 389 390 static const UChar sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01}; 391 static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0}; 392 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x01, 0x02, 0x02}; 393 394 static const UChar sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01}; 395 static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe}; 396 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 }; 397 398 /*DBCS*/ 399 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 400 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 401 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 402 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 403 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 404 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 405 406 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 407 expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR)) 408 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 409 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 410 expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 411 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 412 413 414 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 415 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 416 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); 417 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 418 expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR)) 419 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); 420 421 /*MBCS*/ 422 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 423 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 424 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 425 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 426 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 427 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 428 429 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 430 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 431 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 432 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 433 expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) 434 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 435 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 436 expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR)) 437 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 438 439 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 440 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR)) 441 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 442 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 443 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR)) 444 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 445 446 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 447 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, TRUE, U_ZERO_ERROR)) 448 log_err("u-> euc-jp [UCNV_MBCS] \n"); 449 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 450 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, FALSE, U_ZERO_ERROR)) 451 log_err("u-> euc-jp [UCNV_MBCS] \n"); 452 } 453 454 /*iso-2022-jp*/ 455 log_verbose("Testing for iso-2022-jp\n"); 456 { 457 static const UChar sampleText[] = { 0x0031, 0xd801}; 458 static const uint8_t expected[] = { 0x31}; 459 static const uint8_t expectedSUB[] = { 0x31, 0x1a}; 460 static const int32_t offsets[] = { 0x00, 1}; 461 462 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 463 static const uint8_t expected2[] = { 0x31,0x1A,0x32}; 464 static const int32_t offsets2[] = { 0x00,0x01,0x02}; 465 466 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 467 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a}; 468 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 }; 469 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 470 expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR)) 471 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 472 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 473 expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 474 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 475 476 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 477 expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR)) 478 log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n"); 479 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 480 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) 481 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); 482 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 483 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) 484 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); 485 486 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 487 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR)) 488 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 489 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 490 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR)) 491 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 492 } 493 494 /* BEGIN android-removed */ 495 /* To save space, Android does not build full ISO-2022-CN tables. 496 We skip the tests for ISO-2022-CN. */ 497 /*iso-2022-cn*/ 498 /* 499 log_verbose("Testing for iso-2022-cn\n"); 500 { 501 static const UChar sampleText[] = { 0x0031, 0xd801}; 502 static const uint8_t expected[] = { 0x31}; 503 static const uint8_t expectedSUB[] = { 0x31, 0x1A}; 504 static const int32_t offsets[] = { 0x00, 1}; 505 506 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 507 static const uint8_t expected2[] = { 0x31, 0x1A,0x32}; 508 static const int32_t offsets2[] = { 0x00, 0x01,0x02}; 509 510 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 511 static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A}; 512 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x02 }; 513 514 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 515 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a }; 516 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 }; 517 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 518 expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR)) 519 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 520 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 521 expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR)) 522 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 523 524 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 525 expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR)) 526 log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n"); 527 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 528 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) 529 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); 530 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 531 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) 532 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); 533 534 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 535 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR)) 536 log_err("u->iso-2022-cn [UCNV_MBCS] \n"); 537 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 538 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR)) 539 log_err("u-> iso-2022-cn[UCNV_MBCS] \n"); 540 541 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 542 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR)) 543 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 544 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 545 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR)) 546 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 547 } 548 */ 549 /* END android-removed */ 550 551 /*iso-2022-kr*/ 552 log_verbose("Testing for iso-2022-kr\n"); 553 { 554 static const UChar sampleText[] = { 0x0031, 0xd801}; 555 static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31}; 556 static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A}; 557 static const int32_t offsets[] = { -1, -1, -1, -1, 0x00, 1}; 558 559 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 560 static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32}; 561 static const int32_t offsets2[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02}; 562 563 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 564 static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43, 0x51, 0x50, 0x1A }; 565 static const int32_t offsets3MBCS[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02, 0x02 }; 566 567 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 568 expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR)) 569 log_err("u-> iso-2022-kr [UCNV_MBCS] \n"); 570 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 571 expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR)) 572 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 573 574 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 575 expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR)) 576 log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n"); 577 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 578 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) 579 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); 580 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 581 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) 582 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); 583 584 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 585 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR)) 586 log_err("u->iso-2022-kr [UCNV_MBCS] \n"); 587 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 588 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR)) 589 log_err("u-> iso-2022-kr[UCNV_MBCS] \n"); 590 } 591 592 /*HZ*/ 593 log_verbose("Testing for HZ\n"); 594 { 595 static const UChar sampleText[] = { 0x0031, 0xd801}; 596 static const uint8_t expected[] = { 0x7e, 0x7d, 0x31}; 597 static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A}; 598 static const int32_t offsets[] = { 0x00, 0x00, 0x00, 1}; 599 600 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 601 static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31, 0x1A, 0x32 }; 602 static const int32_t offsets2[] = { 0x00, 0x00, 0x00, 0x01, 0x02 }; 603 604 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 605 static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50, 0x1A }; 606 static const int32_t offsets3MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x02}; 607 608 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 609 static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a }; 610 static const int32_t offsets4MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 }; 611 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 612 expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR)) 613 log_err("u-> HZ [UCNV_MBCS] \n"); 614 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 615 expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR)) 616 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 617 618 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 619 expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR)) 620 log_err("u->HZ[UCNV_DBCS] did not match\n"); 621 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 622 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) 623 log_err("u-> HZ [UCNV_DBCS] did not match\n"); 624 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 625 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) 626 log_err("u-> HZ [UCNV_DBCS] did not match\n"); 627 628 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 629 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR)) 630 log_err("u->HZ [UCNV_MBCS] \n"); 631 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 632 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR)) 633 log_err("u-> HZ[UCNV_MBCS] \n"); 634 635 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 636 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR)) 637 log_err("u-> HZ [UCNV_MBCS] \n"); 638 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 639 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR)) 640 log_err("u-> HZ [UCNV_MBCS] \n"); 641 } 642 #endif 643 } 644 645 #if !UCONFIG_NO_LEGACY_CONVERSION 646 /*test different convertToUnicode error behaviours*/ 647 static void TestToUnicodeErrorBehaviour() 648 { 649 log_verbose("Testing error conditions for DBCS\n"); 650 { 651 uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04}; 652 const UChar expected[] = { 0x00a1 }; 653 654 if(!convertToU(sampleText, sizeof(sampleText), 655 expected, UPRV_LENGTHOF(expected), "ibm-1363", 0, TRUE, U_AMBIGUOUS_ALIAS_WARNING )) 656 log_err("DBCS (ibm-1363)->Unicode did not match.\n"); 657 if(!convertToU(sampleText, sizeof(sampleText), 658 expected, UPRV_LENGTHOF(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING )) 659 log_err("DBCS (ibm-1363)->Unicode with flush = false did not match.\n"); 660 } 661 log_verbose("Testing error conditions for SBCS\n"); 662 { 663 uint8_t sampleText[] = { 0xa2, 0xFF}; 664 const UChar expected[] = { 0x00c2 }; 665 666 /* uint8_t sampleText2[] = { 0xa2, 0x70 }; 667 const UChar expected2[] = { 0x0073 };*/ 668 669 if(!convertToU(sampleText, sizeof(sampleText), 670 expected, UPRV_LENGTHOF(expected), "ibm-1051", 0, TRUE, U_ZERO_ERROR )) 671 log_err("SBCS (ibm-1051)->Unicode did not match.\n"); 672 if(!convertToU(sampleText, sizeof(sampleText), 673 expected, UPRV_LENGTHOF(expected), "ibm-1051", 0, FALSE, U_ZERO_ERROR )) 674 log_err("SBCS (ibm-1051)->Unicode with flush = false did not match.\n"); 675 676 } 677 } 678 679 static void TestGetNextErrorBehaviour(){ 680 /*Test for unassigned character*/ 681 #define INPUT_SIZE 1 682 static const char input1[INPUT_SIZE]={ 0x70 }; 683 const char* source=(const char*)input1; 684 UErrorCode err=U_ZERO_ERROR; 685 UChar32 c=0; 686 UConverter *cnv=ucnv_open("ibm-424", &err); 687 if(U_FAILURE(err)) { 688 log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err)); 689 return; 690 } 691 c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err); 692 if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){ 693 log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n", myErrorName(err), c); 694 } 695 ucnv_close(cnv); 696 } 697 #endif 698 699 #define MAX_UTF16_LEN 2 700 #define MAX_UTF8_LEN 4 701 702 /*Regression test for utf8 converter*/ 703 static void TestRegressionUTF8(){ 704 UChar32 currCh = 0; 705 int32_t offset8; 706 int32_t offset16; 707 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar)); 708 uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH); 709 710 while (currCh <= UNICODE_LIMIT) { 711 offset16 = 0; 712 offset8 = 0; 713 while(currCh <= UNICODE_LIMIT 714 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN) 715 && offset8 < (MAX_LENGTH - MAX_UTF8_LEN)) 716 { 717 if (currCh == SURROGATE_HIGH_START) { 718 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ 719 } 720 U16_APPEND_UNSAFE(standardForm, offset16, currCh); 721 U8_APPEND_UNSAFE(utf8, offset8, currCh); 722 currCh++; 723 } 724 if(!convertFromU(standardForm, offset16, 725 utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) { 726 log_err("Unicode->UTF8 did not match.\n"); 727 } 728 if(!convertToU(utf8, offset8, 729 standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) { 730 log_err("UTF8->Unicode did not match.\n"); 731 } 732 } 733 734 free(standardForm); 735 free(utf8); 736 737 { 738 static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 }; 739 static const UChar expected[] = { 0x0301, 0x0300 }; 740 UConverter *conv8; 741 UErrorCode err = U_ZERO_ERROR; 742 UChar pivotBuffer[100]; 743 const UChar* const pivEnd = pivotBuffer + 100; 744 const char* srcBeg; 745 const char* srcEnd; 746 UChar* pivBeg; 747 748 conv8 = ucnv_open("UTF-8", &err); 749 750 srcBeg = src8; 751 pivBeg = pivotBuffer; 752 srcEnd = src8 + 3; 753 ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 754 if (srcBeg != srcEnd) { 755 log_err("Did not consume whole buffer on first call.\n"); 756 } 757 758 srcEnd = src8 + 4; 759 ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 760 if (srcBeg != srcEnd) { 761 log_err("Did not consume whole buffer on second call.\n"); 762 } 763 764 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 765 log_err("Did not get expected results for UTF-8.\n"); 766 } 767 ucnv_close(conv8); 768 } 769 } 770 771 #define MAX_UTF32_LEN 1 772 773 static void TestRegressionUTF32(){ 774 #if !UCONFIG_ONLY_HTML_CONVERSION 775 UChar32 currCh = 0; 776 int32_t offset32; 777 int32_t offset16; 778 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar)); 779 UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32)); 780 781 while (currCh <= UNICODE_LIMIT) { 782 offset16 = 0; 783 offset32 = 0; 784 while(currCh <= UNICODE_LIMIT 785 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN) 786 && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN)) 787 { 788 if (currCh == SURROGATE_HIGH_START) { 789 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ 790 } 791 U16_APPEND_UNSAFE(standardForm, offset16, currCh); 792 utf32[offset32++] = currCh; 793 currCh++; 794 } 795 if(!convertFromU(standardForm, offset16, 796 (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) { 797 log_err("Unicode->UTF32 did not match.\n"); 798 } 799 if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32), 800 standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) { 801 log_err("UTF32->Unicode did not match.\n"); 802 } 803 } 804 free(standardForm); 805 free(utf32); 806 807 { 808 /* Check for lone surrogate error handling. */ 809 static const UChar sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 }; 810 static const UChar sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 }; 811 static const uint8_t expectedUTF32BE[] = { 812 0x00, 0x00, 0x00, 0x31, 813 0x00, 0x00, 0xff, 0xfd, 814 0x00, 0x00, 0x00, 0x32 815 }; 816 static const uint8_t expectedUTF32LE[] = { 817 0x31, 0x00, 0x00, 0x00, 818 0xfd, 0xff, 0x00, 0x00, 819 0x32, 0x00, 0x00, 0x00 820 }; 821 static const int32_t offsetsUTF32[] = { 822 0x00, 0x00, 0x00, 0x00, 823 0x01, 0x01, 0x01, 0x01, 824 0x02, 0x02, 0x02, 0x02 825 }; 826 827 if(!convertFromU(sampleBadStartSurrogate, UPRV_LENGTHOF(sampleBadStartSurrogate), 828 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 829 log_err("u->UTF-32BE\n"); 830 if(!convertFromU(sampleBadEndSurrogate, UPRV_LENGTHOF(sampleBadEndSurrogate), 831 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 832 log_err("u->UTF-32BE\n"); 833 834 if(!convertFromU(sampleBadStartSurrogate, UPRV_LENGTHOF(sampleBadStartSurrogate), 835 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 836 log_err("u->UTF-32LE\n"); 837 if(!convertFromU(sampleBadEndSurrogate, UPRV_LENGTHOF(sampleBadEndSurrogate), 838 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 839 log_err("u->UTF-32LE\n"); 840 } 841 842 { 843 static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 }; 844 static const UChar expected[] = { 0x0031, 0x0030 }; 845 UConverter *convBE; 846 UErrorCode err = U_ZERO_ERROR; 847 UChar pivotBuffer[100]; 848 const UChar* const pivEnd = pivotBuffer + 100; 849 const char* srcBeg; 850 const char* srcEnd; 851 UChar* pivBeg; 852 853 convBE = ucnv_open("UTF-32BE", &err); 854 855 srcBeg = srcBE; 856 pivBeg = pivotBuffer; 857 srcEnd = srcBE + 5; 858 ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 859 if (srcBeg != srcEnd) { 860 log_err("Did not consume whole buffer on first call.\n"); 861 } 862 863 srcEnd = srcBE + 8; 864 ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 865 if (srcBeg != srcEnd) { 866 log_err("Did not consume whole buffer on second call.\n"); 867 } 868 869 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 870 log_err("Did not get expected results for UTF-32BE.\n"); 871 } 872 ucnv_close(convBE); 873 } 874 { 875 static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 }; 876 static const UChar expected[] = { 0x0031, 0x0030 }; 877 UConverter *convLE; 878 UErrorCode err = U_ZERO_ERROR; 879 UChar pivotBuffer[100]; 880 const UChar* const pivEnd = pivotBuffer + 100; 881 const char* srcBeg; 882 const char* srcEnd; 883 UChar* pivBeg; 884 885 convLE = ucnv_open("UTF-32LE", &err); 886 887 srcBeg = srcLE; 888 pivBeg = pivotBuffer; 889 srcEnd = srcLE + 5; 890 ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 891 if (srcBeg != srcEnd) { 892 log_err("Did not consume whole buffer on first call.\n"); 893 } 894 895 srcEnd = srcLE + 8; 896 ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 897 if (srcBeg != srcEnd) { 898 log_err("Did not consume whole buffer on second call.\n"); 899 } 900 901 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 902 log_err("Did not get expected results for UTF-32LE.\n"); 903 } 904 ucnv_close(convLE); 905 } 906 #endif 907 } 908 909 /*Walk through the available converters*/ 910 static void TestAvailableConverters(){ 911 UErrorCode status=U_ZERO_ERROR; 912 UConverter *conv=NULL; 913 int32_t i=0; 914 for(i=0; i < ucnv_countAvailable(); i++){ 915 status=U_ZERO_ERROR; 916 conv=ucnv_open(ucnv_getAvailableName(i), &status); 917 if(U_FAILURE(status)){ 918 log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n", 919 ucnv_getAvailableName(i), myErrorName(status)); 920 continue; 921 } 922 ucnv_close(conv); 923 } 924 925 } 926 927 static void TestFlushInternalBuffer(){ 928 TestWithBufferSize(MAX_LENGTH, 1); 929 TestWithBufferSize(1, 1); 930 TestWithBufferSize(1, MAX_LENGTH); 931 TestWithBufferSize(MAX_LENGTH, MAX_LENGTH); 932 } 933 934 static void TestWithBufferSize(int32_t insize, int32_t outsize){ 935 936 gInBufferSize =insize; 937 gOutBufferSize = outsize; 938 939 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 940 { 941 UChar sampleText[] = 942 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E }; 943 const uint8_t expectedUTF8[] = 944 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 945 int32_t toUTF8Offs[] = 946 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07}; 947 /* int32_t fmUTF8Offs[] = 948 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/ 949 950 /*UTF-8*/ 951 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 952 expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE)) 953 log_err("u-> UTF8 did not match.\n"); 954 } 955 956 #if !UCONFIG_NO_LEGACY_CONVERSION 957 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); 958 { 959 UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 960 const uint8_t toIBM943[]= { 0x61, 961 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 962 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 963 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 964 0x61 }; 965 int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; 966 967 if(!testConvertFromU(inputTest, UPRV_LENGTHOF(inputTest), 968 toIBM943, sizeof(toIBM943), "ibm-943", 969 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE)) 970 log_err("u-> ibm-943 with subst with value did not match.\n"); 971 } 972 #endif 973 974 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 975 { 976 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 977 0xe0, 0x80, 0x61}; 978 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061}; 979 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0005, 0x0006}; 980 981 if(!testConvertToU(sampleText1, sizeof(sampleText1), 982 expected1, UPRV_LENGTHOF(expected1),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE)) 983 log_err("utf8->u with substitute did not match.\n");; 984 } 985 986 #if !UCONFIG_NO_LEGACY_CONVERSION 987 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); 988 /*to Unicode*/ 989 { 990 const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, 991 0x81, 0xad, /*unassigned*/ 992 0x89, 0xd3 }; 993 UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, 994 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, 995 0x7B87}; 996 int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; 997 998 if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU), 999 IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943", 1000 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE)) 1001 log_err("ibm-943->u with substitute with value did not match.\n"); 1002 1003 } 1004 #endif 1005 } 1006 1007 static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 1008 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus) 1009 { 1010 1011 int32_t i=0; 1012 char *p=0; 1013 const UChar *src; 1014 char buffer[MAX_LENGTH]; 1015 int32_t offsetBuffer[MAX_LENGTH]; 1016 int32_t *offs=0; 1017 char *targ; 1018 char *targetLimit; 1019 UChar *sourceLimit=0; 1020 UErrorCode status = U_ZERO_ERROR; 1021 UConverter *conv = 0; 1022 conv = ucnv_open(codepage, &status); 1023 if(U_FAILURE(status)) 1024 { 1025 log_data_err("Couldn't open converter %s\n",codepage); 1026 return TRUE; 1027 } 1028 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); 1029 1030 for(i=0; i<MAX_LENGTH; i++){ 1031 buffer[i]=(char)0xF0; 1032 offsetBuffer[i]=0xFF; 1033 } 1034 1035 src=source; 1036 sourceLimit=(UChar*)src+(sourceLen); 1037 targ=buffer; 1038 targetLimit=targ+MAX_LENGTH; 1039 offs=offsetBuffer; 1040 ucnv_fromUnicode (conv, 1041 (char **)&targ, 1042 (const char *)targetLimit, 1043 &src, 1044 sourceLimit, 1045 expectOffsets ? offs : NULL, 1046 doFlush, 1047 &status); 1048 ucnv_close(conv); 1049 if(status != expectedStatus){ 1050 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus)); 1051 return FALSE; 1052 } 1053 1054 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1055 sourceLen, targ-buffer); 1056 1057 if(expectLen != targ-buffer) 1058 { 1059 log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage); 1060 log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage); 1061 printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer)); 1062 printSeqErr((const unsigned char*)expect, expectLen); 1063 return FALSE; 1064 } 1065 1066 if(memcmp(buffer, expect, expectLen)){ 1067 log_err("String does not match. FROM Unicode to codePage%s\n", codepage); 1068 log_info("\nGot:"); 1069 printSeqErr((const unsigned char *)buffer, expectLen); 1070 log_info("\nExpected:"); 1071 printSeqErr((const unsigned char *)expect, expectLen); 1072 return FALSE; 1073 } 1074 else { 1075 log_verbose("Matches!\n"); 1076 } 1077 1078 if (expectOffsets != 0){ 1079 log_verbose("comparing %d offsets..\n", targ-buffer); 1080 if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){ 1081 log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage); 1082 log_info("\nGot : "); 1083 printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer)); 1084 for(p=buffer;p<targ;p++) 1085 log_info("%d, ", offsetBuffer[p-buffer]); 1086 log_info("\nExpected: "); 1087 for(i=0; i< (targ-buffer); i++) 1088 log_info("%d,", expectOffsets[i]); 1089 } 1090 } 1091 1092 return TRUE; 1093 } 1094 1095 1096 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen, 1097 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus) 1098 { 1099 UErrorCode status = U_ZERO_ERROR; 1100 UConverter *conv = 0; 1101 int32_t i=0; 1102 UChar *p=0; 1103 const char* src; 1104 UChar buffer[MAX_LENGTH]; 1105 int32_t offsetBuffer[MAX_LENGTH]; 1106 int32_t *offs=0; 1107 UChar *targ; 1108 UChar *targetLimit; 1109 uint8_t *sourceLimit=0; 1110 1111 1112 1113 conv = ucnv_open(codepage, &status); 1114 if(U_FAILURE(status)) 1115 { 1116 log_data_err("Couldn't open converter %s\n",codepage); 1117 return TRUE; 1118 } 1119 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); 1120 1121 1122 1123 for(i=0; i<MAX_LENGTH; i++){ 1124 buffer[i]=0xFFFE; 1125 offsetBuffer[i]=-1; 1126 } 1127 1128 src=(const char *)source; 1129 sourceLimit=(uint8_t*)(src+(sourceLen)); 1130 targ=buffer; 1131 targetLimit=targ+MAX_LENGTH; 1132 offs=offsetBuffer; 1133 1134 1135 1136 ucnv_toUnicode (conv, 1137 &targ, 1138 targetLimit, 1139 (const char **)&src, 1140 (const char *)sourceLimit, 1141 expectOffsets ? offs : NULL, 1142 doFlush, 1143 &status); 1144 1145 ucnv_close(conv); 1146 if(status != expectedStatus){ 1147 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus)); 1148 return FALSE; 1149 } 1150 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1151 sourceLen, targ-buffer); 1152 1153 1154 1155 1156 log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2); 1157 1158 if (expectOffsets != 0) { 1159 if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){ 1160 1161 log_err("did not get the expected offsets from %s To UNICODE\n", codepage); 1162 log_info("\nGot : "); 1163 for(p=buffer;p<targ;p++) 1164 log_info("%d, ", offsetBuffer[p-buffer]); 1165 log_info("\nExpected: "); 1166 for(i=0; i<(targ-buffer); i++) 1167 log_info("%d, ", expectOffsets[i]); 1168 log_info("\nGot result:"); 1169 for(i=0; i<(targ-buffer); i++) 1170 log_info("0x%04X,", buffer[i]); 1171 log_info("\nFrom Input:"); 1172 for(i=0; i<(src-(const char *)source); i++) 1173 log_info("0x%02X,", (unsigned char)source[i]); 1174 log_info("\n"); 1175 } 1176 } 1177 if(memcmp(buffer, expect, expectLen*2)){ 1178 log_err("String does not match. from codePage %s TO Unicode\n", codepage); 1179 log_info("\nGot:"); 1180 printUSeqErr(buffer, expectLen); 1181 log_info("\nExpected:"); 1182 printUSeqErr(expect, expectLen); 1183 return FALSE; 1184 } 1185 else { 1186 log_verbose("Matches!\n"); 1187 } 1188 1189 return TRUE; 1190 } 1191 1192 1193 static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 1194 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset) 1195 { 1196 UErrorCode status = U_ZERO_ERROR; 1197 UConverter *conv = 0; 1198 char junkout[MAX_LENGTH]; /* FIX */ 1199 int32_t junokout[MAX_LENGTH]; /* FIX */ 1200 char *p; 1201 const UChar *src; 1202 char *end; 1203 char *targ; 1204 int32_t *offs; 1205 int i; 1206 int32_t realBufferSize; 1207 char *realBufferEnd; 1208 const UChar *realSourceEnd; 1209 const UChar *sourceLimit; 1210 UBool checkOffsets = TRUE; 1211 UBool doFlush; 1212 1213 UConverterFromUCallback oldAction = NULL; 1214 const void* oldContext = NULL; 1215 1216 for(i=0;i<MAX_LENGTH;i++) 1217 junkout[i] = (char)0xF0; 1218 for(i=0;i<MAX_LENGTH;i++) 1219 junokout[i] = 0xFF; 1220 1221 setNuConvTestName(codepage, "FROM"); 1222 1223 log_verbose("\n========= %s\n", gNuConvTestName); 1224 1225 conv = ucnv_open(codepage, &status); 1226 if(U_FAILURE(status)) 1227 { 1228 log_data_err("Couldn't open converter %s\n",codepage); 1229 return TRUE; 1230 } 1231 1232 log_verbose("Converter opened..\n"); 1233 /*----setting the callback routine----*/ 1234 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 1235 if (U_FAILURE(status)) { 1236 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 1237 } 1238 /*------------------------*/ 1239 1240 src = source; 1241 targ = junkout; 1242 offs = junokout; 1243 1244 realBufferSize = UPRV_LENGTHOF(junkout); 1245 realBufferEnd = junkout + realBufferSize; 1246 realSourceEnd = source + sourceLen; 1247 1248 if ( gOutBufferSize != realBufferSize ) 1249 checkOffsets = FALSE; 1250 1251 if( gInBufferSize != MAX_LENGTH ) 1252 checkOffsets = FALSE; 1253 1254 do 1255 { 1256 end = nct_min(targ + gOutBufferSize, realBufferEnd); 1257 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 1258 1259 doFlush = (UBool)(sourceLimit == realSourceEnd); 1260 1261 if(targ == realBufferEnd) 1262 { 1263 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 1264 return FALSE; 1265 } 1266 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 1267 1268 1269 status = U_ZERO_ERROR; 1270 if(gInBufferSize ==999 && gOutBufferSize==999) 1271 doFlush = FALSE; 1272 ucnv_fromUnicode (conv, 1273 (char **)&targ, 1274 (const char *)end, 1275 &src, 1276 sourceLimit, 1277 offs, 1278 doFlush, /* flush if we're at the end of the input data */ 1279 &status); 1280 if(testReset) 1281 ucnv_resetToUnicode(conv); 1282 if(gInBufferSize ==999 && gOutBufferSize==999) 1283 ucnv_resetToUnicode(conv); 1284 1285 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 1286 1287 if(U_FAILURE(status)) { 1288 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 1289 return FALSE; 1290 } 1291 1292 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1293 sourceLen, targ-junkout); 1294 if(getTestOption(VERBOSITY_OPTION)) 1295 { 1296 char junk[999]; 1297 char offset_str[999]; 1298 char *ptr; 1299 1300 junk[0] = 0; 1301 offset_str[0] = 0; 1302 for(ptr = junkout;ptr<targ;ptr++) 1303 { 1304 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr); 1305 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]); 1306 } 1307 1308 log_verbose(junk); 1309 printSeq((const unsigned char *)expect, expectLen); 1310 if ( checkOffsets ) 1311 { 1312 log_verbose("\nOffsets:"); 1313 log_verbose(offset_str); 1314 } 1315 log_verbose("\n"); 1316 } 1317 ucnv_close(conv); 1318 1319 1320 if(expectLen != targ-junkout) 1321 { 1322 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 1323 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 1324 log_info("\nGot:"); 1325 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 1326 log_info("\nExpected:"); 1327 printSeqErr((const unsigned char*)expect, expectLen); 1328 return FALSE; 1329 } 1330 1331 if (checkOffsets && (expectOffsets != 0) ) 1332 { 1333 log_verbose("comparing %d offsets..\n", targ-junkout); 1334 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 1335 log_err("did not get the expected offsets. %s", gNuConvTestName); 1336 log_err("Got : "); 1337 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 1338 for(p=junkout;p<targ;p++) 1339 log_err("%d, ", junokout[p-junkout]); 1340 log_err("\nExpected: "); 1341 for(i=0; i<(targ-junkout); i++) 1342 log_err("%d,", expectOffsets[i]); 1343 } 1344 } 1345 1346 log_verbose("comparing..\n"); 1347 if(!memcmp(junkout, expect, expectLen)) 1348 { 1349 log_verbose("Matches!\n"); 1350 return TRUE; 1351 } 1352 else 1353 { 1354 log_err("String does not match. %s\n", gNuConvTestName); 1355 printUSeqErr(source, sourceLen); 1356 log_info("\nGot:"); 1357 printSeqErr((const unsigned char *)junkout, expectLen); 1358 log_info("\nExpected:"); 1359 printSeqErr((const unsigned char *)expect, expectLen); 1360 1361 return FALSE; 1362 } 1363 } 1364 1365 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 1366 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset) 1367 { 1368 UErrorCode status = U_ZERO_ERROR; 1369 UConverter *conv = 0; 1370 UChar junkout[MAX_LENGTH]; /* FIX */ 1371 int32_t junokout[MAX_LENGTH]; /* FIX */ 1372 const char *src; 1373 const char *realSourceEnd; 1374 const char *srcLimit; 1375 UChar *p; 1376 UChar *targ; 1377 UChar *end; 1378 int32_t *offs; 1379 int i; 1380 UBool checkOffsets = TRUE; 1381 int32_t realBufferSize; 1382 UChar *realBufferEnd; 1383 UBool doFlush; 1384 1385 UConverterToUCallback oldAction = NULL; 1386 const void* oldContext = NULL; 1387 1388 1389 for(i=0;i<MAX_LENGTH;i++) 1390 junkout[i] = 0xFFFE; 1391 1392 for(i=0;i<MAX_LENGTH;i++) 1393 junokout[i] = -1; 1394 1395 setNuConvTestName(codepage, "TO"); 1396 1397 log_verbose("\n========= %s\n", gNuConvTestName); 1398 1399 conv = ucnv_open(codepage, &status); 1400 if(U_FAILURE(status)) 1401 { 1402 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 1403 return TRUE; 1404 } 1405 1406 log_verbose("Converter opened..\n"); 1407 /*----setting the callback routine----*/ 1408 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 1409 if (U_FAILURE(status)) { 1410 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 1411 } 1412 /*-------------------------------------*/ 1413 1414 src = (const char *)source; 1415 targ = junkout; 1416 offs = junokout; 1417 1418 realBufferSize = UPRV_LENGTHOF(junkout); 1419 realBufferEnd = junkout + realBufferSize; 1420 realSourceEnd = src + sourcelen; 1421 1422 if ( gOutBufferSize != realBufferSize ) 1423 checkOffsets = FALSE; 1424 1425 if( gInBufferSize != MAX_LENGTH ) 1426 checkOffsets = FALSE; 1427 1428 do 1429 { 1430 end = nct_min( targ + gOutBufferSize, realBufferEnd); 1431 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 1432 1433 if(targ == realBufferEnd) 1434 { 1435 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 1436 return FALSE; 1437 } 1438 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 1439 1440 /* oldTarg = targ; */ 1441 1442 status = U_ZERO_ERROR; 1443 doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE); 1444 1445 ucnv_toUnicode (conv, 1446 &targ, 1447 end, 1448 (const char **)&src, 1449 (const char *)srcLimit, 1450 offs, 1451 doFlush, /* flush if we're at the end of hte source data */ 1452 &status); 1453 if(testReset) 1454 ucnv_resetFromUnicode(conv); 1455 if(gInBufferSize ==999 && gOutBufferSize==999) 1456 ucnv_resetToUnicode(conv); 1457 /* offs += (targ-oldTarg); */ 1458 1459 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 1460 1461 if(U_FAILURE(status)) 1462 { 1463 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 1464 return FALSE; 1465 } 1466 1467 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 1468 sourcelen, targ-junkout); 1469 if(getTestOption(VERBOSITY_OPTION)) 1470 { 1471 char junk[999]; 1472 char offset_str[999]; 1473 1474 UChar *ptr; 1475 1476 junk[0] = 0; 1477 offset_str[0] = 0; 1478 1479 for(ptr = junkout;ptr<targ;ptr++) 1480 { 1481 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 1482 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 1483 } 1484 1485 log_verbose(junk); 1486 1487 if ( checkOffsets ) 1488 { 1489 log_verbose("\nOffsets:"); 1490 log_verbose(offset_str); 1491 } 1492 log_verbose("\n"); 1493 } 1494 ucnv_close(conv); 1495 1496 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 1497 1498 if (checkOffsets && (expectOffsets != 0)) 1499 { 1500 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 1501 1502 log_err("did not get the expected offsets. %s",gNuConvTestName); 1503 for(p=junkout;p<targ;p++) 1504 log_err("%d, ", junokout[p-junkout]); 1505 log_err("\nExpected: "); 1506 for(i=0; i<(targ-junkout); i++) 1507 log_err("%d,", expectOffsets[i]); 1508 log_err(""); 1509 for(i=0; i<(targ-junkout); i++) 1510 log_err("%X,", junkout[i]); 1511 log_err(""); 1512 for(i=0; i<(src-(const char *)source); i++) 1513 log_err("%X,", (unsigned char)source[i]); 1514 } 1515 } 1516 1517 if(!memcmp(junkout, expect, expectlen*2)) 1518 { 1519 log_verbose("Matches!\n"); 1520 return TRUE; 1521 } 1522 else 1523 { 1524 log_err("String does not match. %s\n", gNuConvTestName); 1525 log_verbose("String does not match. %s\n", gNuConvTestName); 1526 log_info("\nGot:"); 1527 printUSeq(junkout, expectlen); 1528 log_info("\nExpected:"); 1529 printUSeq(expect, expectlen); 1530 return FALSE; 1531 } 1532 } 1533 1534 1535 static void TestResetBehaviour(void){ 1536 #if !UCONFIG_NO_LEGACY_CONVERSION 1537 log_verbose("Testing Reset for DBCS and MBCS\n"); 1538 { 1539 static const UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; 1540 static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 1541 static const int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; 1542 1543 1544 static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8}; 1545 static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7}; 1546 static const int32_t offsets1[] = { 0,2,4,6}; 1547 1548 /*DBCS*/ 1549 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1550 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1551 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 1552 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1553 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1554 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 1555 1556 if(!testConvertToU(expected1, sizeof(expected1), 1557 sampleText1, UPRV_LENGTHOF(sampleText1), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1558 offsets1, TRUE)) 1559 log_err("ibm-1363 -> did not match.\n"); 1560 /*MBCS*/ 1561 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1562 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1563 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 1564 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1565 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1566 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 1567 1568 if(!testConvertToU(expected1, sizeof(expected1), 1569 sampleText1, UPRV_LENGTHOF(sampleText1), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1570 offsets1, TRUE)) 1571 log_err("ibm-1363 -> did not match.\n"); 1572 1573 } 1574 1575 log_verbose("Testing Reset for ISO-2022-jp\n"); 1576 { 1577 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1578 1579 static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 1580 0x31,0x1A, 0x32}; 1581 1582 1583 static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; 1584 1585 1586 static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; 1587 static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 1588 0x31,0x1A, 0x32}; 1589 static const int32_t offsets1[] = { 3,5,10,11,12}; 1590 1591 /*iso-2022-jp*/ 1592 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1593 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1594 log_err("u-> not match.\n"); 1595 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1596 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1597 log_err("u-> not match.\n"); 1598 1599 if(!testConvertToU(expected1, sizeof(expected1), 1600 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1601 offsets1, TRUE)) 1602 log_err("iso-2022-jp -> did not match.\n"); 1603 1604 } 1605 1606 /* BEGIN android-removed */ 1607 /* To save space, Android does not build full ISO-2022-CN tables. 1608 We skip the tests for ISO-2022-CN. */ 1609 /* 1610 log_verbose("Testing Reset for ISO-2022-cn\n"); 1611 { 1612 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1613 1614 static const uint8_t expected[] = { 1615 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 1616 0x36, 0x21, 1617 0x0f, 0x31, 1618 0x1A, 1619 0x32 1620 }; 1621 1622 1623 static const int32_t offsets[] = { 1624 0, 0, 0, 0, 0, 0, 0, 1625 1, 1, 1626 2, 2, 1627 3, 1628 5, }; 1629 1630 UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; 1631 static const uint8_t expected1[] = { 1632 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 1633 0x36, 0x21, 1634 0x1B, 0x24, 0x29, 0x47, 0x24, 0x22, 1635 0x0f, 0x1A, 1636 0x32 1637 }; 1638 static const int32_t offsets1[] = { 5,7,13,16,17}; 1639 1640 // iso-2022-CN android-change 1641 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1642 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1643 log_err("u-> not match.\n"); 1644 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1645 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1646 log_err("u-> not match.\n"); 1647 1648 if(!testConvertToU(expected1, sizeof(expected1), 1649 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1650 offsets1, TRUE)) 1651 log_err("iso-2022-cn -> did not match.\n"); 1652 } 1653 */ 1654 /* END android-removed */ 1655 1656 log_verbose("Testing Reset for ISO-2022-kr\n"); 1657 { 1658 UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1659 1660 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43, 1661 0x0E, 0x6C, 0x69, 1662 0x0f, 0x1A, 1663 0x0e, 0x6F, 0x4B, 1664 0x0F, 0x31, 1665 0x1A, 1666 0x32 }; 1667 1668 static const int32_t offsets[] = {-1, -1, -1, -1, 1669 0, 0, 0, 1670 1, 1, 1671 3, 3, 3, 1672 4, 4, 1673 5, 1674 7, 1675 }; 1676 static const UChar sampleText1[] = { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032}; 1677 1678 static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43, 1679 0x0E, 0x6C, 0x69, 1680 0x0f, 0x41, 1681 0x0e, 0x6F, 0x4B, 1682 0x0F, 0x31, 1683 0x42, 1684 0x32 }; 1685 1686 static const int32_t offsets1[] = { 1687 5, 8, 10, 1688 13, 14, 15 1689 1690 }; 1691 /*iso-2022-kr*/ 1692 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1693 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1694 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 1695 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1696 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1697 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 1698 if(!testConvertToU(expected1, sizeof(expected1), 1699 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1700 offsets1, TRUE)) 1701 log_err("iso-2022-kr -> did not match.\n"); 1702 } 1703 1704 log_verbose("Testing Reset for HZ\n"); 1705 { 1706 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1707 1708 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B, 1709 0x7E, 0x7D, 0x1A, 1710 0x7E, 0x7B, 0x36, 0x21, 1711 0x7E, 0x7D, 0x31, 1712 0x1A, 1713 0x32 }; 1714 1715 1716 static const int32_t offsets[] = {0,0,0,0, 1717 1,1,1, 1718 3,3,3,3, 1719 4,4,4, 1720 5, 1721 7,}; 1722 static const UChar sampleText1[] = { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032}; 1723 1724 static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B, 1725 0x7E, 0x7D, 0x35, 1726 0x7E, 0x7B, 0x36, 0x21, 1727 0x7E, 0x7D, 0x31, 1728 0x41, 1729 0x32 }; 1730 1731 1732 static const int32_t offsets1[] = {2,6,9,13,14,15 1733 }; 1734 1735 /*hz*/ 1736 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1737 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1738 log_err("u-> not match.\n"); 1739 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1740 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1741 log_err("u-> not match.\n"); 1742 if(!testConvertToU(expected1, sizeof(expected1), 1743 sampleText1, UPRV_LENGTHOF(sampleText1), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1744 offsets1, TRUE)) 1745 log_err("hz -> did not match.\n"); 1746 } 1747 #endif 1748 1749 /*UTF-8*/ 1750 log_verbose("Testing for UTF8\n"); 1751 { 1752 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032}; 1753 int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 1754 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 1755 0x04, 0x06 }; 1756 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31, 1757 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32}; 1758 1759 1760 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; 1761 /*UTF-8*/ 1762 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1763 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1764 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1765 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1766 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1767 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1768 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1769 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1770 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1771 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1772 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1773 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1774 if(!testConvertToU(expected, sizeof(expected), 1775 sampleText, UPRV_LENGTHOF(sampleText), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1776 log_err("UTF8 -> did not match.\n"); 1777 if(!testConvertToU(expected, sizeof(expected), 1778 sampleText, UPRV_LENGTHOF(sampleText), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1779 log_err("UTF8 -> did not match.\n"); 1780 if(!testConvertToU(expected, sizeof(expected), 1781 sampleText, UPRV_LENGTHOF(sampleText), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) 1782 log_err("UTF8 -> did not match.\n"); 1783 if(!testConvertToU(expected, sizeof(expected), 1784 sampleText, UPRV_LENGTHOF(sampleText), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) 1785 log_err("UTF8 -> did not match.\n"); 1786 1787 } 1788 1789 } 1790 1791 /* Test that U_TRUNCATED_CHAR_FOUND is set. */ 1792 static void 1793 doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) { 1794 UConverter *cnv; 1795 1796 UChar buffer[2]; 1797 UChar *target, *targetLimit; 1798 const char *source, *sourceLimit; 1799 1800 UErrorCode errorCode; 1801 1802 errorCode=U_ZERO_ERROR; 1803 cnv=ucnv_open(cnvName, &errorCode); 1804 if(U_FAILURE(errorCode)) { 1805 log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode)); 1806 return; 1807 } 1808 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 1809 if(U_FAILURE(errorCode)) { 1810 log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n", 1811 cnvName, u_errorName(errorCode)); 1812 ucnv_close(cnv); 1813 return; 1814 } 1815 1816 source=(const char *)bytes; 1817 sourceLimit=source+length; 1818 target=buffer; 1819 targetLimit=buffer+UPRV_LENGTHOF(buffer); 1820 1821 /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */ 1822 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode); 1823 if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) { 1824 log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n", 1825 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); 1826 } 1827 1828 errorCode=U_ZERO_ERROR; 1829 source=sourceLimit; 1830 target=buffer; 1831 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); 1832 if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) { 1833 log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n", 1834 cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer)); 1835 } 1836 1837 /* 2. input bytes with flush=TRUE */ 1838 ucnv_resetToUnicode(cnv); 1839 1840 errorCode=U_ZERO_ERROR; 1841 source=(const char *)bytes; 1842 target=buffer; 1843 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); 1844 if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) { 1845 log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n", 1846 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); 1847 } 1848 1849 1850 ucnv_close(cnv); 1851 } 1852 1853 static void 1854 TestTruncated() { 1855 static const struct { 1856 const char *cnvName; 1857 uint8_t bytes[8]; /* partial input bytes resulting in no output */ 1858 int32_t length; 1859 } testCases[]={ 1860 { "IMAP-mailbox-name", { 0x26 }, 1 }, /* & */ 1861 { "IMAP-mailbox-name", { 0x26, 0x42 }, 2 }, /* &B */ 1862 { "IMAP-mailbox-name", { 0x26, 0x42, 0x42 }, 3 }, /* &BB */ 1863 { "IMAP-mailbox-name", { 0x26, 0x41, 0x41 }, 3 }, /* &AA */ 1864 1865 { "UTF-7", { 0x2b, 0x42 }, 2 }, /* +B */ 1866 { "UTF-8", { 0xd1 }, 1 }, 1867 1868 { "UTF-16BE", { 0x4e }, 1 }, 1869 { "UTF-16LE", { 0x4e }, 1 }, 1870 { "UTF-16", { 0x4e }, 1 }, 1871 { "UTF-16", { 0xff }, 1 }, 1872 { "UTF-16", { 0xfe, 0xff, 0x4e }, 3 }, 1873 1874 { "UTF-32BE", { 0, 0, 0x4e }, 3 }, 1875 { "UTF-32LE", { 0x4e }, 1 }, 1876 { "UTF-32", { 0, 0, 0x4e }, 3 }, 1877 { "UTF-32", { 0xff }, 1 }, 1878 { "UTF-32", { 0, 0, 0xfe, 0xff, 0 }, 5 }, 1879 { "SCSU", { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */ 1880 1881 #if !UCONFIG_NO_LEGACY_CONVERSION 1882 { "BOCU-1", { 0xd5 }, 1 }, 1883 1884 { "Shift-JIS", { 0xe0 }, 1 }, 1885 1886 { "ibm-939", { 0x0e, 0x41 }, 2 } /* SO 0x41 */ 1887 #else 1888 { "BOCU-1", { 0xd5 }, 1 ,} 1889 #endif 1890 }; 1891 int32_t i; 1892 1893 for(i=0; i<UPRV_LENGTHOF(testCases); ++i) { 1894 doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length); 1895 } 1896 } 1897 1898 typedef struct NameRange { 1899 const char *name; 1900 UChar32 start, end, start2, end2, notStart, notEnd; 1901 } NameRange; 1902 1903 static void 1904 TestUnicodeSet() { 1905 UErrorCode errorCode; 1906 UConverter *cnv; 1907 USet *set; 1908 const char *name; 1909 int32_t i, count; 1910 1911 static const char *const completeSetNames[]={ 1912 "UTF-7", 1913 "UTF-8", 1914 "UTF-16", 1915 "UTF-16BE", 1916 "UTF-16LE", 1917 "UTF-32", 1918 "UTF-32BE", 1919 "UTF-32LE", 1920 "SCSU", 1921 "BOCU-1", 1922 "CESU-8", 1923 #if !UCONFIG_NO_LEGACY_CONVERSION 1924 "gb18030", 1925 #endif 1926 "IMAP-mailbox-name" 1927 }; 1928 #if !UCONFIG_NO_LEGACY_CONVERSION 1929 static const char *const lmbcsNames[]={ 1930 "LMBCS-1", 1931 "LMBCS-2", 1932 "LMBCS-3", 1933 "LMBCS-4", 1934 "LMBCS-5", 1935 "LMBCS-6", 1936 "LMBCS-8", 1937 "LMBCS-11", 1938 "LMBCS-16", 1939 "LMBCS-17", 1940 "LMBCS-18", 1941 "LMBCS-19" 1942 }; 1943 #endif 1944 1945 static const NameRange nameRanges[]={ 1946 { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff }, 1947 #if !UCONFIG_NO_LEGACY_CONVERSION 1948 { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff }, 1949 #endif 1950 { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff }, 1951 #if !UCONFIG_NO_LEGACY_CONVERSION 1952 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }, 1953 { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff }, 1954 /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */ 1955 { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff } 1956 #else 1957 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff } 1958 #endif 1959 }; 1960 1961 /* open an empty set */ 1962 set=uset_open(1, 0); 1963 1964 count=ucnv_countAvailable(); 1965 for(i=0; i<count; ++i) { 1966 errorCode=U_ZERO_ERROR; 1967 name=ucnv_getAvailableName(i); 1968 cnv=ucnv_open(name, &errorCode); 1969 if(U_FAILURE(errorCode)) { 1970 log_data_err("error: unable to open converter %s - %s\n", 1971 name, u_errorName(errorCode)); 1972 continue; 1973 } 1974 1975 uset_clear(set); 1976 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 1977 if(U_FAILURE(errorCode)) { 1978 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 1979 name, u_errorName(errorCode)); 1980 } else if(uset_size(set)==0) { 1981 log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name); 1982 } 1983 1984 ucnv_close(cnv); 1985 } 1986 1987 /* test converters that are known to convert all of Unicode (except maybe for surrogates) */ 1988 for(i=0; i<UPRV_LENGTHOF(completeSetNames); ++i) { 1989 errorCode=U_ZERO_ERROR; 1990 name=completeSetNames[i]; 1991 cnv=ucnv_open(name, &errorCode); 1992 if(U_FAILURE(errorCode)) { 1993 log_data_err("error: unable to open converter %s - %s\n", 1994 name, u_errorName(errorCode)); 1995 continue; 1996 } 1997 1998 uset_clear(set); 1999 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2000 if(U_FAILURE(errorCode)) { 2001 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2002 name, u_errorName(errorCode)); 2003 } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) { 2004 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name); 2005 } 2006 2007 ucnv_close(cnv); 2008 } 2009 2010 #if !UCONFIG_NO_LEGACY_CONVERSION 2011 /* test LMBCS variants which convert all of Unicode except for U+F6xx */ 2012 for(i=0; i<UPRV_LENGTHOF(lmbcsNames); ++i) { 2013 errorCode=U_ZERO_ERROR; 2014 name=lmbcsNames[i]; 2015 cnv=ucnv_open(name, &errorCode); 2016 if(U_FAILURE(errorCode)) { 2017 log_data_err("error: unable to open converter %s - %s\n", 2018 name, u_errorName(errorCode)); 2019 continue; 2020 } 2021 2022 uset_clear(set); 2023 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2024 if(U_FAILURE(errorCode)) { 2025 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2026 name, u_errorName(errorCode)); 2027 } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) { 2028 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name); 2029 } 2030 2031 ucnv_close(cnv); 2032 } 2033 #endif 2034 2035 /* test specific sets */ 2036 for(i=0; i<UPRV_LENGTHOF(nameRanges); ++i) { 2037 errorCode=U_ZERO_ERROR; 2038 name=nameRanges[i].name; 2039 cnv=ucnv_open(name, &errorCode); 2040 if(U_FAILURE(errorCode)) { 2041 log_data_err("error: unable to open converter %s - %s\n", 2042 name, u_errorName(errorCode)); 2043 continue; 2044 } 2045 2046 uset_clear(set); 2047 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2048 if(U_FAILURE(errorCode)) { 2049 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2050 name, u_errorName(errorCode)); 2051 } else if( 2052 !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) || 2053 (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2)) 2054 ) { 2055 log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name); 2056 } else if(nameRanges[i].notStart>=0) { 2057 /* simulate containsAny() with the C API */ 2058 uset_complement(set); 2059 if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) { 2060 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name); 2061 } 2062 } 2063 2064 ucnv_close(cnv); 2065 } 2066 2067 errorCode = U_ZERO_ERROR; 2068 ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode); 2069 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 2070 log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode)); 2071 } 2072 errorCode = U_PARSE_ERROR; 2073 /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */ 2074 ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode); 2075 if (errorCode != U_PARSE_ERROR) { 2076 log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode)); 2077 } 2078 2079 uset_close(set); 2080 } 2081