1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 /***************************************************************************** 9 * 10 * File ncnvtst.c 11 * 12 * Modification History: 13 * Name Description 14 * Madhu Katragadda 7/7/2000 Converter Tests for extended code coverage 15 ****************************************************************************** 16 */ 17 #include <stdio.h> 18 #include <stdlib.h> 19 #include <string.h> 20 #include "unicode/uloc.h" 21 #include "unicode/ucnv.h" 22 #include "unicode/utypes.h" 23 #include "unicode/ustring.h" 24 #include "unicode/uset.h" 25 #include "cintltst.h" 26 #include "cmemory.h" 27 28 #define MAX_LENGTH 999 29 30 #define UNICODE_LIMIT 0x10FFFF 31 #define SURROGATE_HIGH_START 0xD800 32 #define SURROGATE_LOW_END 0xDFFF 33 34 static int32_t gInBufferSize = 0; 35 static int32_t gOutBufferSize = 0; 36 static char gNuConvTestName[1024]; 37 38 #define nct_min(x,y) ((x<y) ? x : y) 39 40 static void printSeq(const unsigned char* a, int len); 41 static void printSeqErr(const unsigned char* a, int len); 42 static void printUSeq(const UChar* a, int len); 43 static void printUSeqErr(const UChar* a, int len); 44 static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 45 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus); 46 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen, 47 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus); 48 49 static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 50 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset); 51 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 52 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset); 53 54 static void setNuConvTestName(const char *codepage, const char *direction) 55 { 56 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 57 codepage, 58 direction, 59 (int)gInBufferSize, 60 (int)gOutBufferSize); 61 } 62 63 64 static void TestSurrogateBehaviour(void); 65 static void TestErrorBehaviour(void); 66 67 #if !UCONFIG_NO_LEGACY_CONVERSION 68 static void TestToUnicodeErrorBehaviour(void); 69 static void TestGetNextErrorBehaviour(void); 70 #endif 71 72 static void TestRegressionUTF8(void); 73 static void TestRegressionUTF32(void); 74 static void TestAvailableConverters(void); 75 static void TestFlushInternalBuffer(void); /*for improved code coverage in ucnv_cnv.c*/ 76 static void TestResetBehaviour(void); 77 static void TestTruncated(void); 78 static void TestUnicodeSet(void); 79 80 static void TestWithBufferSize(int32_t osize, int32_t isize); 81 82 83 static void printSeq(const unsigned char* a, int len) 84 { 85 int i=0; 86 log_verbose("\n{"); 87 while (i<len) 88 log_verbose("0x%02X ", a[i++]); 89 log_verbose("}\n"); 90 } 91 92 static void printUSeq(const UChar* a, int len) 93 { 94 int i=0; 95 log_verbose("\n{"); 96 while (i<len) 97 log_verbose("%0x04X ", a[i++]); 98 log_verbose("}\n"); 99 } 100 101 static void printSeqErr(const unsigned char* a, int len) 102 { 103 int i=0; 104 fprintf(stderr, "\n{"); 105 while (i<len) fprintf(stderr, "0x%02X ", a[i++]); 106 fprintf(stderr, "}\n"); 107 } 108 109 static void printUSeqErr(const UChar* a, int len) 110 { 111 int i=0; 112 fprintf(stderr, "\n{"); 113 while (i<len) 114 fprintf(stderr, "0x%04X ", a[i++]); 115 fprintf(stderr,"}\n"); 116 } 117 118 void addExtraTests(TestNode** root); 119 120 void addExtraTests(TestNode** root) 121 { 122 addTest(root, &TestSurrogateBehaviour, "tsconv/ncnvtst/TestSurrogateBehaviour"); 123 addTest(root, &TestErrorBehaviour, "tsconv/ncnvtst/TestErrorBehaviour"); 124 125 #if !UCONFIG_NO_LEGACY_CONVERSION 126 addTest(root, &TestToUnicodeErrorBehaviour, "tsconv/ncnvtst/ToUnicodeErrorBehaviour"); 127 addTest(root, &TestGetNextErrorBehaviour, "tsconv/ncnvtst/TestGetNextErrorBehaviour"); 128 #endif 129 130 addTest(root, &TestAvailableConverters, "tsconv/ncnvtst/TestAvailableConverters"); 131 addTest(root, &TestFlushInternalBuffer, "tsconv/ncnvtst/TestFlushInternalBuffer"); 132 addTest(root, &TestResetBehaviour, "tsconv/ncnvtst/TestResetBehaviour"); 133 addTest(root, &TestRegressionUTF8, "tsconv/ncnvtst/TestRegressionUTF8"); 134 addTest(root, &TestRegressionUTF32, "tsconv/ncnvtst/TestRegressionUTF32"); 135 addTest(root, &TestTruncated, "tsconv/ncnvtst/TestTruncated"); 136 addTest(root, &TestUnicodeSet, "tsconv/ncnvtst/TestUnicodeSet"); 137 } 138 139 /*test surrogate behaviour*/ 140 static void TestSurrogateBehaviour(){ 141 log_verbose("Testing for SBCS and LATIN_1\n"); 142 { 143 UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032}; 144 const uint8_t expected[] = {0x31, 0x1a, 0x32}; 145 146 #if !UCONFIG_NO_LEGACY_CONVERSION 147 /*SBCS*/ 148 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 149 expected, sizeof(expected), "ibm-920", 0 , TRUE, U_ZERO_ERROR)) 150 log_err("u-> ibm-920 [UCNV_SBCS] not match.\n"); 151 #endif 152 153 /*LATIN_1*/ 154 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 155 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR )) 156 log_err("u-> LATIN_1 not match.\n"); 157 158 } 159 160 #if !UCONFIG_NO_LEGACY_CONVERSION 161 log_verbose("Testing for DBCS and MBCS\n"); 162 { 163 UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; 164 const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 165 int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; 166 167 /*DBCS*/ 168 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 169 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) 170 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 171 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 172 expected, sizeof(expected), "ibm-1363", offsets , TRUE, U_ZERO_ERROR)) 173 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 174 /*MBCS*/ 175 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 176 expected, sizeof(expected), "ibm-1363", 0 , TRUE, U_ZERO_ERROR)) 177 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 178 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 179 expected, sizeof(expected), "ibm-1363", offsets, TRUE, U_ZERO_ERROR)) 180 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 181 } 182 183 log_verbose("Testing for ISO-2022-jp\n"); 184 { 185 UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 186 187 const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 188 0x31,0x1A, 0x32}; 189 190 191 int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; 192 193 /*iso-2022-jp*/ 194 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 195 expected, sizeof(expected), "iso-2022-jp", 0 , TRUE, U_ZERO_ERROR)) 196 log_err("u-> not match.\n"); 197 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 198 expected, sizeof(expected), "iso-2022-jp", offsets , TRUE, U_ZERO_ERROR)) 199 log_err("u-> not match.\n"); 200 } 201 202 /* BEGIN android-removed */ 203 /* To save space, Android does not build full ISO-2022-CN tables. 204 We skip the tests for ISO-2022-CN. */ 205 /* 206 log_verbose("Testing for ISO-2022-cn\n"); 207 { 208 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 209 210 static const uint8_t expected[] = { 211 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 212 0x36, 0x21, 213 0x0F, 0x31, 214 0x1A, 215 0x32 216 }; 217 218 219 220 static const int32_t offsets[] = { 221 0, 0, 0, 0, 0, 0, 0, 222 1, 1, 223 2, 2, 224 3, 225 5, }; 226 227 // iso-2022-CN android-change 228 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 229 expected, sizeof(expected), "iso-2022-cn", 0 , TRUE, U_ZERO_ERROR)) 230 log_err("u-> not match.\n"); 231 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 232 expected, sizeof(expected), "iso-2022-cn", offsets , TRUE, U_ZERO_ERROR)) 233 log_err("u-> not match.\n"); 234 } 235 */ 236 /* END android-removed */ 237 238 log_verbose("Testing for ISO-2022-kr\n"); 239 { 240 static const UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 241 242 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43, 243 0x0E, 0x6C, 0x69, 244 0x0f, 0x1A, 245 0x0e, 0x6F, 0x4B, 246 0x0F, 0x31, 247 0x1A, 248 0x32 }; 249 250 static const int32_t offsets[] = {-1, -1, -1, -1, 251 0, 0, 0, 252 1, 1, 253 3, 3, 3, 254 4, 4, 255 5, 256 7, 257 }; 258 259 /*iso-2022-kr*/ 260 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 261 expected, sizeof(expected), "iso-2022-kr", 0 , TRUE, U_ZERO_ERROR)) 262 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 263 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 264 expected, sizeof(expected), "iso-2022-kr", offsets , TRUE, U_ZERO_ERROR)) 265 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 266 } 267 268 log_verbose("Testing for HZ\n"); 269 { 270 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 271 272 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B, 273 0x7E, 0x7D, 0x1A, 274 0x7E, 0x7B, 0x36, 0x21, 275 0x7E, 0x7D, 0x31, 276 0x1A, 277 0x32 }; 278 279 280 static const int32_t offsets[] = {0,0,0,0, 281 1,1,1, 282 3,3,3,3, 283 4,4,4, 284 5, 285 7,}; 286 287 /*hz*/ 288 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 289 expected, sizeof(expected), "HZ", 0 , TRUE, U_ZERO_ERROR)) 290 log_err("u-> HZ not match.\n"); 291 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 292 expected, sizeof(expected), "HZ", offsets , TRUE, U_ZERO_ERROR)) 293 log_err("u-> HZ not match.\n"); 294 } 295 #endif 296 297 /*UTF-8*/ 298 log_verbose("Testing for UTF8\n"); 299 { 300 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032}; 301 static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 302 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 303 0x04, 0x06 }; 304 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31, 305 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32}; 306 307 308 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; 309 /*UTF-8*/ 310 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 311 expected, sizeof(expected), "UTF8", offsets, TRUE, U_ZERO_ERROR )) 312 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 313 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 314 expected, sizeof(expected), "UTF8", 0, TRUE, U_ZERO_ERROR )) 315 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 316 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 317 expected, sizeof(expected), "UTF8", offsets, FALSE, U_ZERO_ERROR )) 318 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 319 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 320 expected, sizeof(expected), "UTF8", 0, FALSE, U_ZERO_ERROR )) 321 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 322 323 if(!convertToU(expected, sizeof(expected), 324 sampleText, UPRV_LENGTHOF(sampleText), "UTF8", 0, TRUE, U_ZERO_ERROR )) 325 log_err("UTF8 -> u did not match.\n"); 326 if(!convertToU(expected, sizeof(expected), 327 sampleText, UPRV_LENGTHOF(sampleText), "UTF8", 0, FALSE, U_ZERO_ERROR )) 328 log_err("UTF8 -> u did not match.\n"); 329 if(!convertToU(expected, sizeof(expected), 330 sampleText, UPRV_LENGTHOF(sampleText), "UTF8", fromOffsets, TRUE, U_ZERO_ERROR )) 331 log_err("UTF8 ->u did not match.\n"); 332 if(!convertToU(expected, sizeof(expected), 333 sampleText, UPRV_LENGTHOF(sampleText), "UTF8", fromOffsets, FALSE, U_ZERO_ERROR )) 334 log_err("UTF8 -> u did not match.\n"); 335 336 } 337 } 338 339 /*test various error behaviours*/ 340 static void TestErrorBehaviour(){ 341 log_verbose("Testing for SBCS and LATIN_1\n"); 342 { 343 static const UChar sampleText[] = { 0x0031, 0xd801}; 344 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 345 static const uint8_t expected0[] = { 0x31}; 346 static const uint8_t expected[] = { 0x31, 0x1a}; 347 static const uint8_t expected2[] = { 0x31, 0x1a, 0x32}; 348 349 #if !UCONFIG_NO_LEGACY_CONVERSION 350 /*SBCS*/ 351 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 352 expected, sizeof(expected), "ibm-920", 0, TRUE, U_ZERO_ERROR)) 353 log_err("u-> ibm-920 [UCNV_SBCS] \n"); 354 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 355 expected0, sizeof(expected0), "ibm-920", 0, FALSE, U_ZERO_ERROR)) 356 log_err("u-> ibm-920 [UCNV_SBCS] \n"); 357 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 358 expected2, sizeof(expected2), "ibm-920", 0, TRUE, U_ZERO_ERROR)) 359 log_err("u-> ibm-920 [UCNV_SBCS] did not match\n"); 360 #endif 361 362 /*LATIN_1*/ 363 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 364 expected, sizeof(expected), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) 365 log_err("u-> LATIN_1 is supposed to fail\n"); 366 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 367 expected0, sizeof(expected0), "LATIN_1", 0, FALSE, U_ZERO_ERROR)) 368 log_err("u-> LATIN_1 is supposed to fail\n"); 369 370 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 371 expected2, sizeof(expected2), "LATIN_1", 0, TRUE, U_ZERO_ERROR)) 372 log_err("u-> LATIN_1 did not match\n"); 373 } 374 375 #if !UCONFIG_NO_LEGACY_CONVERSION 376 log_verbose("Testing for DBCS and MBCS\n"); 377 { 378 static const UChar sampleText[] = { 0x00a1, 0xd801}; 379 static const uint8_t expected[] = { 0xa2, 0xae}; 380 static const int32_t offsets[] = { 0x00, 0x00}; 381 static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0}; 382 static const int32_t offsetsSUB[] = { 0x00, 0x00, 0x01, 0x01}; 383 384 static const UChar sampleText2[] = { 0x00a1, 0xd801, 0x00a4}; 385 static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 386 static const int32_t offsets2[] = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02}; 387 388 static const UChar sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01}; 389 static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0}; 390 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x01, 0x02, 0x02}; 391 392 static const UChar sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01}; 393 static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe}; 394 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 }; 395 396 /*DBCS*/ 397 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 398 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 399 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 400 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 401 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 402 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 403 404 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 405 expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, TRUE, U_ZERO_ERROR)) 406 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 407 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 408 expected, sizeof(expected), "ibm-1363", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 409 log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n"); 410 411 412 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 413 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 414 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); 415 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 416 expected2, sizeof(expected2), "ibm-1363", offsets2, TRUE, U_ZERO_ERROR)) 417 log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n"); 418 419 /*MBCS*/ 420 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 421 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 422 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 423 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 424 expected, sizeof(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 425 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 426 427 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 428 expected2, sizeof(expected2), "ibm-1363", 0, TRUE, U_ZERO_ERROR)) 429 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 430 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 431 expected2, sizeof(expected2), "ibm-1363", 0, FALSE, U_ZERO_ERROR)) 432 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 433 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 434 expected2, sizeof(expected2), "ibm-1363", offsets2, FALSE, U_ZERO_ERROR)) 435 log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n"); 436 437 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 438 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, TRUE, U_ZERO_ERROR)) 439 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 440 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 441 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, FALSE, U_ZERO_ERROR)) 442 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 443 444 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 445 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, TRUE, U_ZERO_ERROR)) 446 log_err("u-> euc-jp [UCNV_MBCS] \n"); 447 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 448 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, FALSE, U_ZERO_ERROR)) 449 log_err("u-> euc-jp [UCNV_MBCS] \n"); 450 } 451 452 /*iso-2022-jp*/ 453 log_verbose("Testing for iso-2022-jp\n"); 454 { 455 static const UChar sampleText[] = { 0x0031, 0xd801}; 456 static const uint8_t expected[] = { 0x31}; 457 static const uint8_t expectedSUB[] = { 0x31, 0x1a}; 458 static const int32_t offsets[] = { 0x00, 1}; 459 460 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 461 static const uint8_t expected2[] = { 0x31,0x1A,0x32}; 462 static const int32_t offsets2[] = { 0x00,0x01,0x02}; 463 464 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 465 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a}; 466 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 }; 467 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 468 expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, TRUE, U_ZERO_ERROR)) 469 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 470 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 471 expected, sizeof(expected), "iso-2022-jp", offsets, FALSE, U_AMBIGUOUS_ALIAS_WARNING)) 472 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 473 474 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 475 expected2, sizeof(expected2), "iso-2022-jp", offsets2, TRUE, U_ZERO_ERROR)) 476 log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n"); 477 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 478 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) 479 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); 480 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 481 expected2, sizeof(expected2), "iso-2022-jp", offsets2, FALSE, U_ZERO_ERROR)) 482 log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n"); 483 484 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 485 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, TRUE, U_ZERO_ERROR)) 486 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 487 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 488 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, FALSE, U_ZERO_ERROR)) 489 log_err("u-> iso-2022-jp [UCNV_MBCS] \n"); 490 } 491 492 /* BEGIN android-removed */ 493 /* To save space, Android does not build full ISO-2022-CN tables. 494 We skip the tests for ISO-2022-CN. */ 495 /*iso-2022-cn*/ 496 /* 497 log_verbose("Testing for iso-2022-cn\n"); 498 { 499 static const UChar sampleText[] = { 0x0031, 0xd801}; 500 static const uint8_t expected[] = { 0x31}; 501 static const uint8_t expectedSUB[] = { 0x31, 0x1A}; 502 static const int32_t offsets[] = { 0x00, 1}; 503 504 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 505 static const uint8_t expected2[] = { 0x31, 0x1A,0x32}; 506 static const int32_t offsets2[] = { 0x00, 0x01,0x02}; 507 508 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 509 static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A}; 510 static const int32_t offsets3MBCS[] = { 0x00, 0x01, 0x02 }; 511 512 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 513 static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a }; 514 static const int32_t offsets4MBCS[] = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 }; 515 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 516 expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, TRUE, U_ZERO_ERROR)) 517 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 518 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 519 expected, sizeof(expected), "iso-2022-cn", offsets, FALSE, U_ZERO_ERROR)) 520 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 521 522 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 523 expected2, sizeof(expected2), "iso-2022-cn", offsets2, TRUE, U_ZERO_ERROR)) 524 log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n"); 525 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 526 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) 527 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); 528 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 529 expected2, sizeof(expected2), "iso-2022-cn", offsets2, FALSE, U_ZERO_ERROR)) 530 log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n"); 531 532 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 533 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, TRUE, U_ZERO_ERROR)) 534 log_err("u->iso-2022-cn [UCNV_MBCS] \n"); 535 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 536 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, FALSE, U_ZERO_ERROR)) 537 log_err("u-> iso-2022-cn[UCNV_MBCS] \n"); 538 539 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 540 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, TRUE, U_ZERO_ERROR)) 541 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 542 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 543 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, FALSE, U_ZERO_ERROR)) 544 log_err("u-> iso-2022-cn [UCNV_MBCS] \n"); 545 } 546 */ 547 /* END android-removed */ 548 549 /*iso-2022-kr*/ 550 log_verbose("Testing for iso-2022-kr\n"); 551 { 552 static const UChar sampleText[] = { 0x0031, 0xd801}; 553 static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31}; 554 static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A}; 555 static const int32_t offsets[] = { -1, -1, -1, -1, 0x00, 1}; 556 557 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 558 static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32}; 559 static const int32_t offsets2[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02}; 560 561 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 562 static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43, 0x51, 0x50, 0x1A }; 563 static const int32_t offsets3MBCS[] = { -1, -1, -1, -1, 0x00, 0x01, 0x02, 0x02 }; 564 565 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 566 expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, TRUE, U_ZERO_ERROR)) 567 log_err("u-> iso-2022-kr [UCNV_MBCS] \n"); 568 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 569 expected, sizeof(expected), "iso-2022-kr", offsets, FALSE, U_ZERO_ERROR)) 570 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 571 572 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 573 expected2, sizeof(expected2), "iso-2022-kr", offsets2, TRUE, U_ZERO_ERROR)) 574 log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n"); 575 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 576 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) 577 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); 578 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 579 expected2, sizeof(expected2), "iso-2022-kr", offsets2, FALSE, U_ZERO_ERROR)) 580 log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n"); 581 582 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 583 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, TRUE, U_ZERO_ERROR)) 584 log_err("u->iso-2022-kr [UCNV_MBCS] \n"); 585 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 586 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, FALSE, U_ZERO_ERROR)) 587 log_err("u-> iso-2022-kr[UCNV_MBCS] \n"); 588 } 589 590 /*HZ*/ 591 log_verbose("Testing for HZ\n"); 592 { 593 static const UChar sampleText[] = { 0x0031, 0xd801}; 594 static const uint8_t expected[] = { 0x7e, 0x7d, 0x31}; 595 static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A}; 596 static const int32_t offsets[] = { 0x00, 0x00, 0x00, 1}; 597 598 static const UChar sampleText2[] = { 0x0031, 0xd801, 0x0032}; 599 static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31, 0x1A, 0x32 }; 600 static const int32_t offsets2[] = { 0x00, 0x00, 0x00, 0x01, 0x02 }; 601 602 static const UChar sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01}; 603 static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50, 0x1A }; 604 static const int32_t offsets3MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x02}; 605 606 static const UChar sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01}; 607 static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a }; 608 static const int32_t offsets4MBCS[] = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 }; 609 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 610 expectedSUB, sizeof(expectedSUB), "HZ", offsets, TRUE, U_ZERO_ERROR)) 611 log_err("u-> HZ [UCNV_MBCS] \n"); 612 if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText), 613 expected, sizeof(expected), "HZ", offsets, FALSE, U_ZERO_ERROR)) 614 log_err("u-> ibm-1363 [UCNV_MBCS] \n"); 615 616 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 617 expected2, sizeof(expected2), "HZ", offsets2, TRUE, U_ZERO_ERROR)) 618 log_err("u->HZ[UCNV_DBCS] did not match\n"); 619 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 620 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) 621 log_err("u-> HZ [UCNV_DBCS] did not match\n"); 622 if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2), 623 expected2, sizeof(expected2), "HZ", offsets2, FALSE, U_ZERO_ERROR)) 624 log_err("u-> HZ [UCNV_DBCS] did not match\n"); 625 626 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 627 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, TRUE, U_ZERO_ERROR)) 628 log_err("u->HZ [UCNV_MBCS] \n"); 629 if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS), 630 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, FALSE, U_ZERO_ERROR)) 631 log_err("u-> HZ[UCNV_MBCS] \n"); 632 633 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 634 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, TRUE, U_ZERO_ERROR)) 635 log_err("u-> HZ [UCNV_MBCS] \n"); 636 if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS), 637 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, FALSE, U_ZERO_ERROR)) 638 log_err("u-> HZ [UCNV_MBCS] \n"); 639 } 640 #endif 641 } 642 643 #if !UCONFIG_NO_LEGACY_CONVERSION 644 /*test different convertToUnicode error behaviours*/ 645 static void TestToUnicodeErrorBehaviour() 646 { 647 log_verbose("Testing error conditions for DBCS\n"); 648 { 649 uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04}; 650 const UChar expected[] = { 0x00a1 }; 651 652 if(!convertToU(sampleText, sizeof(sampleText), 653 expected, UPRV_LENGTHOF(expected), "ibm-1363", 0, TRUE, U_AMBIGUOUS_ALIAS_WARNING )) 654 log_err("DBCS (ibm-1363)->Unicode did not match.\n"); 655 if(!convertToU(sampleText, sizeof(sampleText), 656 expected, UPRV_LENGTHOF(expected), "ibm-1363", 0, FALSE, U_AMBIGUOUS_ALIAS_WARNING )) 657 log_err("DBCS (ibm-1363)->Unicode with flush = false did not match.\n"); 658 } 659 log_verbose("Testing error conditions for SBCS\n"); 660 { 661 uint8_t sampleText[] = { 0xa2, 0xFF}; 662 const UChar expected[] = { 0x00c2 }; 663 664 /* uint8_t sampleText2[] = { 0xa2, 0x70 }; 665 const UChar expected2[] = { 0x0073 };*/ 666 667 if(!convertToU(sampleText, sizeof(sampleText), 668 expected, UPRV_LENGTHOF(expected), "ibm-1051", 0, TRUE, U_ZERO_ERROR )) 669 log_err("SBCS (ibm-1051)->Unicode did not match.\n"); 670 if(!convertToU(sampleText, sizeof(sampleText), 671 expected, UPRV_LENGTHOF(expected), "ibm-1051", 0, FALSE, U_ZERO_ERROR )) 672 log_err("SBCS (ibm-1051)->Unicode with flush = false did not match.\n"); 673 674 } 675 } 676 677 static void TestGetNextErrorBehaviour(){ 678 /*Test for unassigned character*/ 679 #define INPUT_SIZE 1 680 static const char input1[INPUT_SIZE]={ 0x70 }; 681 const char* source=(const char*)input1; 682 UErrorCode err=U_ZERO_ERROR; 683 UChar32 c=0; 684 UConverter *cnv=ucnv_open("ibm-424", &err); 685 if(U_FAILURE(err)) { 686 log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err)); 687 return; 688 } 689 c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err); 690 if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){ 691 log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n", myErrorName(err), c); 692 } 693 ucnv_close(cnv); 694 } 695 #endif 696 697 #define MAX_UTF16_LEN 2 698 #define MAX_UTF8_LEN 4 699 700 /*Regression test for utf8 converter*/ 701 static void TestRegressionUTF8(){ 702 UChar32 currCh = 0; 703 int32_t offset8; 704 int32_t offset16; 705 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar)); 706 uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH); 707 708 while (currCh <= UNICODE_LIMIT) { 709 offset16 = 0; 710 offset8 = 0; 711 while(currCh <= UNICODE_LIMIT 712 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN) 713 && offset8 < (MAX_LENGTH - MAX_UTF8_LEN)) 714 { 715 if (currCh == SURROGATE_HIGH_START) { 716 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ 717 } 718 UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh); 719 UTF8_APPEND_CHAR_SAFE(utf8, offset8, MAX_LENGTH, currCh); 720 currCh++; 721 } 722 if(!convertFromU(standardForm, offset16, 723 utf8, offset8, "UTF8", 0, TRUE, U_ZERO_ERROR )) { 724 log_err("Unicode->UTF8 did not match.\n"); 725 } 726 if(!convertToU(utf8, offset8, 727 standardForm, offset16, "UTF8", 0, TRUE, U_ZERO_ERROR )) { 728 log_err("UTF8->Unicode did not match.\n"); 729 } 730 } 731 732 free(standardForm); 733 free(utf8); 734 735 { 736 static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 }; 737 static const UChar expected[] = { 0x0301, 0x0300 }; 738 UConverter *conv8; 739 UErrorCode err = U_ZERO_ERROR; 740 UChar pivotBuffer[100]; 741 const UChar* const pivEnd = pivotBuffer + 100; 742 const char* srcBeg; 743 const char* srcEnd; 744 UChar* pivBeg; 745 746 conv8 = ucnv_open("UTF-8", &err); 747 748 srcBeg = src8; 749 pivBeg = pivotBuffer; 750 srcEnd = src8 + 3; 751 ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 752 if (srcBeg != srcEnd) { 753 log_err("Did not consume whole buffer on first call.\n"); 754 } 755 756 srcEnd = src8 + 4; 757 ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 758 if (srcBeg != srcEnd) { 759 log_err("Did not consume whole buffer on second call.\n"); 760 } 761 762 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 763 log_err("Did not get expected results for UTF-8.\n"); 764 } 765 ucnv_close(conv8); 766 } 767 } 768 769 #define MAX_UTF32_LEN 1 770 771 static void TestRegressionUTF32(){ 772 #if !UCONFIG_ONLY_HTML_CONVERSION 773 UChar32 currCh = 0; 774 int32_t offset32; 775 int32_t offset16; 776 UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar)); 777 UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32)); 778 779 while (currCh <= UNICODE_LIMIT) { 780 offset16 = 0; 781 offset32 = 0; 782 while(currCh <= UNICODE_LIMIT 783 && offset16 < (MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN) 784 && offset32 < (MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN)) 785 { 786 if (currCh == SURROGATE_HIGH_START) { 787 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */ 788 } 789 UTF16_APPEND_CHAR_SAFE(standardForm, offset16, MAX_LENGTH, currCh); 790 UTF32_APPEND_CHAR_SAFE(utf32, offset32, MAX_LENGTH, currCh); 791 currCh++; 792 } 793 if(!convertFromU(standardForm, offset16, 794 (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) { 795 log_err("Unicode->UTF32 did not match.\n"); 796 } 797 if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32), 798 standardForm, offset16, "UTF32_PlatformEndian", 0, TRUE, U_ZERO_ERROR )) { 799 log_err("UTF32->Unicode did not match.\n"); 800 } 801 } 802 free(standardForm); 803 free(utf32); 804 805 { 806 /* Check for lone surrogate error handling. */ 807 static const UChar sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 }; 808 static const UChar sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 }; 809 static const uint8_t expectedUTF32BE[] = { 810 0x00, 0x00, 0x00, 0x31, 811 0x00, 0x00, 0xff, 0xfd, 812 0x00, 0x00, 0x00, 0x32 813 }; 814 static const uint8_t expectedUTF32LE[] = { 815 0x31, 0x00, 0x00, 0x00, 816 0xfd, 0xff, 0x00, 0x00, 817 0x32, 0x00, 0x00, 0x00 818 }; 819 static const int32_t offsetsUTF32[] = { 820 0x00, 0x00, 0x00, 0x00, 821 0x01, 0x01, 0x01, 0x01, 822 0x02, 0x02, 0x02, 0x02 823 }; 824 825 if(!convertFromU(sampleBadStartSurrogate, UPRV_LENGTHOF(sampleBadStartSurrogate), 826 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 827 log_err("u->UTF-32BE\n"); 828 if(!convertFromU(sampleBadEndSurrogate, UPRV_LENGTHOF(sampleBadEndSurrogate), 829 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 830 log_err("u->UTF-32BE\n"); 831 832 if(!convertFromU(sampleBadStartSurrogate, UPRV_LENGTHOF(sampleBadStartSurrogate), 833 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 834 log_err("u->UTF-32LE\n"); 835 if(!convertFromU(sampleBadEndSurrogate, UPRV_LENGTHOF(sampleBadEndSurrogate), 836 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, TRUE, U_ZERO_ERROR)) 837 log_err("u->UTF-32LE\n"); 838 } 839 840 { 841 static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 }; 842 static const UChar expected[] = { 0x0031, 0x0030 }; 843 UConverter *convBE; 844 UErrorCode err = U_ZERO_ERROR; 845 UChar pivotBuffer[100]; 846 const UChar* const pivEnd = pivotBuffer + 100; 847 const char* srcBeg; 848 const char* srcEnd; 849 UChar* pivBeg; 850 851 convBE = ucnv_open("UTF-32BE", &err); 852 853 srcBeg = srcBE; 854 pivBeg = pivotBuffer; 855 srcEnd = srcBE + 5; 856 ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 857 if (srcBeg != srcEnd) { 858 log_err("Did not consume whole buffer on first call.\n"); 859 } 860 861 srcEnd = srcBE + 8; 862 ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 863 if (srcBeg != srcEnd) { 864 log_err("Did not consume whole buffer on second call.\n"); 865 } 866 867 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 868 log_err("Did not get expected results for UTF-32BE.\n"); 869 } 870 ucnv_close(convBE); 871 } 872 { 873 static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 }; 874 static const UChar expected[] = { 0x0031, 0x0030 }; 875 UConverter *convLE; 876 UErrorCode err = U_ZERO_ERROR; 877 UChar pivotBuffer[100]; 878 const UChar* const pivEnd = pivotBuffer + 100; 879 const char* srcBeg; 880 const char* srcEnd; 881 UChar* pivBeg; 882 883 convLE = ucnv_open("UTF-32LE", &err); 884 885 srcBeg = srcLE; 886 pivBeg = pivotBuffer; 887 srcEnd = srcLE + 5; 888 ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, FALSE, &err); 889 if (srcBeg != srcEnd) { 890 log_err("Did not consume whole buffer on first call.\n"); 891 } 892 893 srcEnd = srcLE + 8; 894 ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, TRUE, &err); 895 if (srcBeg != srcEnd) { 896 log_err("Did not consume whole buffer on second call.\n"); 897 } 898 899 if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) { 900 log_err("Did not get expected results for UTF-32LE.\n"); 901 } 902 ucnv_close(convLE); 903 } 904 #endif 905 } 906 907 /*Walk through the available converters*/ 908 static void TestAvailableConverters(){ 909 UErrorCode status=U_ZERO_ERROR; 910 UConverter *conv=NULL; 911 int32_t i=0; 912 for(i=0; i < ucnv_countAvailable(); i++){ 913 status=U_ZERO_ERROR; 914 conv=ucnv_open(ucnv_getAvailableName(i), &status); 915 if(U_FAILURE(status)){ 916 log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n", 917 ucnv_getAvailableName(i), myErrorName(status)); 918 continue; 919 } 920 ucnv_close(conv); 921 } 922 923 } 924 925 static void TestFlushInternalBuffer(){ 926 TestWithBufferSize(MAX_LENGTH, 1); 927 TestWithBufferSize(1, 1); 928 TestWithBufferSize(1, MAX_LENGTH); 929 TestWithBufferSize(MAX_LENGTH, MAX_LENGTH); 930 } 931 932 static void TestWithBufferSize(int32_t insize, int32_t outsize){ 933 934 gInBufferSize =insize; 935 gOutBufferSize = outsize; 936 937 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 938 { 939 UChar sampleText[] = 940 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E }; 941 const uint8_t expectedUTF8[] = 942 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 943 int32_t toUTF8Offs[] = 944 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07}; 945 /* int32_t fmUTF8Offs[] = 946 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/ 947 948 /*UTF-8*/ 949 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 950 expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,FALSE)) 951 log_err("u-> UTF8 did not match.\n"); 952 } 953 954 #if !UCONFIG_NO_LEGACY_CONVERSION 955 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n"); 956 { 957 UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 }; 958 const uint8_t toIBM943[]= { 0x61, 959 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 960 0x25, 0x55, 0x44, 0x43, 0x30, 0x31, 961 0x25, 0x55, 0x44, 0x38, 0x30, 0x31, 962 0x61 }; 963 int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4}; 964 965 if(!testConvertFromU(inputTest, UPRV_LENGTHOF(inputTest), 966 toIBM943, sizeof(toIBM943), "ibm-943", 967 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,FALSE)) 968 log_err("u-> ibm-943 with subst with value did not match.\n"); 969 } 970 #endif 971 972 log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n"); 973 { 974 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c, 975 0xe0, 0x80, 0x61}; 976 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061}; 977 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006}; 978 979 if(!testConvertToU(sampleText1, sizeof(sampleText1), 980 expected1, UPRV_LENGTHOF(expected1),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,FALSE)) 981 log_err("utf8->u with substitute did not match.\n");; 982 } 983 984 #if !UCONFIG_NO_LEGACY_CONVERSION 985 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n"); 986 /*to Unicode*/ 987 { 988 const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf, 989 0x81, 0xad, /*unassigned*/ 990 0x89, 0xd3 }; 991 UChar IBM_943toUnicode[] = { 0x0000, 0x6D63, 992 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44, 993 0x7B87}; 994 int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5}; 995 996 if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU), 997 IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943", 998 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,FALSE)) 999 log_err("ibm-943->u with substitute with value did not match.\n"); 1000 1001 } 1002 #endif 1003 } 1004 1005 static UBool convertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 1006 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus) 1007 { 1008 1009 int32_t i=0; 1010 char *p=0; 1011 const UChar *src; 1012 char buffer[MAX_LENGTH]; 1013 int32_t offsetBuffer[MAX_LENGTH]; 1014 int32_t *offs=0; 1015 char *targ; 1016 char *targetLimit; 1017 UChar *sourceLimit=0; 1018 UErrorCode status = U_ZERO_ERROR; 1019 UConverter *conv = 0; 1020 conv = ucnv_open(codepage, &status); 1021 if(U_FAILURE(status)) 1022 { 1023 log_data_err("Couldn't open converter %s\n",codepage); 1024 return TRUE; 1025 } 1026 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); 1027 1028 for(i=0; i<MAX_LENGTH; i++){ 1029 buffer[i]=(char)0xF0; 1030 offsetBuffer[i]=0xFF; 1031 } 1032 1033 src=source; 1034 sourceLimit=(UChar*)src+(sourceLen); 1035 targ=buffer; 1036 targetLimit=targ+MAX_LENGTH; 1037 offs=offsetBuffer; 1038 ucnv_fromUnicode (conv, 1039 (char **)&targ, 1040 (const char *)targetLimit, 1041 &src, 1042 sourceLimit, 1043 expectOffsets ? offs : NULL, 1044 doFlush, 1045 &status); 1046 ucnv_close(conv); 1047 if(status != expectedStatus){ 1048 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus)); 1049 return FALSE; 1050 } 1051 1052 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1053 sourceLen, targ-buffer); 1054 1055 if(expectLen != targ-buffer) 1056 { 1057 log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage); 1058 log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage); 1059 printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer)); 1060 printSeqErr((const unsigned char*)expect, expectLen); 1061 return FALSE; 1062 } 1063 1064 if(memcmp(buffer, expect, expectLen)){ 1065 log_err("String does not match. FROM Unicode to codePage%s\n", codepage); 1066 log_info("\nGot:"); 1067 printSeqErr((const unsigned char *)buffer, expectLen); 1068 log_info("\nExpected:"); 1069 printSeqErr((const unsigned char *)expect, expectLen); 1070 return FALSE; 1071 } 1072 else { 1073 log_verbose("Matches!\n"); 1074 } 1075 1076 if (expectOffsets != 0){ 1077 log_verbose("comparing %d offsets..\n", targ-buffer); 1078 if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){ 1079 log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage); 1080 log_info("\nGot : "); 1081 printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer)); 1082 for(p=buffer;p<targ;p++) 1083 log_info("%d, ", offsetBuffer[p-buffer]); 1084 log_info("\nExpected: "); 1085 for(i=0; i< (targ-buffer); i++) 1086 log_info("%d,", expectOffsets[i]); 1087 } 1088 } 1089 1090 return TRUE; 1091 } 1092 1093 1094 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen, 1095 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus) 1096 { 1097 UErrorCode status = U_ZERO_ERROR; 1098 UConverter *conv = 0; 1099 int32_t i=0; 1100 UChar *p=0; 1101 const char* src; 1102 UChar buffer[MAX_LENGTH]; 1103 int32_t offsetBuffer[MAX_LENGTH]; 1104 int32_t *offs=0; 1105 UChar *targ; 1106 UChar *targetLimit; 1107 uint8_t *sourceLimit=0; 1108 1109 1110 1111 conv = ucnv_open(codepage, &status); 1112 if(U_FAILURE(status)) 1113 { 1114 log_data_err("Couldn't open converter %s\n",codepage); 1115 return TRUE; 1116 } 1117 log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status)); 1118 1119 1120 1121 for(i=0; i<MAX_LENGTH; i++){ 1122 buffer[i]=0xFFFE; 1123 offsetBuffer[i]=-1; 1124 } 1125 1126 src=(const char *)source; 1127 sourceLimit=(uint8_t*)(src+(sourceLen)); 1128 targ=buffer; 1129 targetLimit=targ+MAX_LENGTH; 1130 offs=offsetBuffer; 1131 1132 1133 1134 ucnv_toUnicode (conv, 1135 &targ, 1136 targetLimit, 1137 (const char **)&src, 1138 (const char *)sourceLimit, 1139 expectOffsets ? offs : NULL, 1140 doFlush, 1141 &status); 1142 1143 ucnv_close(conv); 1144 if(status != expectedStatus){ 1145 log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus)); 1146 return FALSE; 1147 } 1148 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1149 sourceLen, targ-buffer); 1150 1151 1152 1153 1154 log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2); 1155 1156 if (expectOffsets != 0) { 1157 if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){ 1158 1159 log_err("did not get the expected offsets from %s To UNICODE\n", codepage); 1160 log_info("\nGot : "); 1161 for(p=buffer;p<targ;p++) 1162 log_info("%d, ", offsetBuffer[p-buffer]); 1163 log_info("\nExpected: "); 1164 for(i=0; i<(targ-buffer); i++) 1165 log_info("%d, ", expectOffsets[i]); 1166 log_info("\nGot result:"); 1167 for(i=0; i<(targ-buffer); i++) 1168 log_info("0x%04X,", buffer[i]); 1169 log_info("\nFrom Input:"); 1170 for(i=0; i<(src-(const char *)source); i++) 1171 log_info("0x%02X,", (unsigned char)source[i]); 1172 log_info("\n"); 1173 } 1174 } 1175 if(memcmp(buffer, expect, expectLen*2)){ 1176 log_err("String does not match. from codePage %s TO Unicode\n", codepage); 1177 log_info("\nGot:"); 1178 printUSeqErr(buffer, expectLen); 1179 log_info("\nExpected:"); 1180 printUSeqErr(expect, expectLen); 1181 return FALSE; 1182 } 1183 else { 1184 log_verbose("Matches!\n"); 1185 } 1186 1187 return TRUE; 1188 } 1189 1190 1191 static UBool testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 1192 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset) 1193 { 1194 UErrorCode status = U_ZERO_ERROR; 1195 UConverter *conv = 0; 1196 char junkout[MAX_LENGTH]; /* FIX */ 1197 int32_t junokout[MAX_LENGTH]; /* FIX */ 1198 char *p; 1199 const UChar *src; 1200 char *end; 1201 char *targ; 1202 int32_t *offs; 1203 int i; 1204 int32_t realBufferSize; 1205 char *realBufferEnd; 1206 const UChar *realSourceEnd; 1207 const UChar *sourceLimit; 1208 UBool checkOffsets = TRUE; 1209 UBool doFlush; 1210 1211 UConverterFromUCallback oldAction = NULL; 1212 const void* oldContext = NULL; 1213 1214 for(i=0;i<MAX_LENGTH;i++) 1215 junkout[i] = (char)0xF0; 1216 for(i=0;i<MAX_LENGTH;i++) 1217 junokout[i] = 0xFF; 1218 1219 setNuConvTestName(codepage, "FROM"); 1220 1221 log_verbose("\n========= %s\n", gNuConvTestName); 1222 1223 conv = ucnv_open(codepage, &status); 1224 if(U_FAILURE(status)) 1225 { 1226 log_data_err("Couldn't open converter %s\n",codepage); 1227 return TRUE; 1228 } 1229 1230 log_verbose("Converter opened..\n"); 1231 /*----setting the callback routine----*/ 1232 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 1233 if (U_FAILURE(status)) { 1234 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 1235 } 1236 /*------------------------*/ 1237 1238 src = source; 1239 targ = junkout; 1240 offs = junokout; 1241 1242 realBufferSize = UPRV_LENGTHOF(junkout); 1243 realBufferEnd = junkout + realBufferSize; 1244 realSourceEnd = source + sourceLen; 1245 1246 if ( gOutBufferSize != realBufferSize ) 1247 checkOffsets = FALSE; 1248 1249 if( gInBufferSize != MAX_LENGTH ) 1250 checkOffsets = FALSE; 1251 1252 do 1253 { 1254 end = nct_min(targ + gOutBufferSize, realBufferEnd); 1255 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 1256 1257 doFlush = (UBool)(sourceLimit == realSourceEnd); 1258 1259 if(targ == realBufferEnd) 1260 { 1261 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 1262 return FALSE; 1263 } 1264 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 1265 1266 1267 status = U_ZERO_ERROR; 1268 if(gInBufferSize ==999 && gOutBufferSize==999) 1269 doFlush = FALSE; 1270 ucnv_fromUnicode (conv, 1271 (char **)&targ, 1272 (const char *)end, 1273 &src, 1274 sourceLimit, 1275 offs, 1276 doFlush, /* flush if we're at the end of the input data */ 1277 &status); 1278 if(testReset) 1279 ucnv_resetToUnicode(conv); 1280 if(gInBufferSize ==999 && gOutBufferSize==999) 1281 ucnv_resetToUnicode(conv); 1282 1283 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 1284 1285 if(U_FAILURE(status)) { 1286 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 1287 return FALSE; 1288 } 1289 1290 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 1291 sourceLen, targ-junkout); 1292 if(getTestOption(VERBOSITY_OPTION)) 1293 { 1294 char junk[999]; 1295 char offset_str[999]; 1296 char *ptr; 1297 1298 junk[0] = 0; 1299 offset_str[0] = 0; 1300 for(ptr = junkout;ptr<targ;ptr++) 1301 { 1302 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr); 1303 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]); 1304 } 1305 1306 log_verbose(junk); 1307 printSeq((const unsigned char *)expect, expectLen); 1308 if ( checkOffsets ) 1309 { 1310 log_verbose("\nOffsets:"); 1311 log_verbose(offset_str); 1312 } 1313 log_verbose("\n"); 1314 } 1315 ucnv_close(conv); 1316 1317 1318 if(expectLen != targ-junkout) 1319 { 1320 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 1321 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 1322 log_info("\nGot:"); 1323 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 1324 log_info("\nExpected:"); 1325 printSeqErr((const unsigned char*)expect, expectLen); 1326 return FALSE; 1327 } 1328 1329 if (checkOffsets && (expectOffsets != 0) ) 1330 { 1331 log_verbose("comparing %d offsets..\n", targ-junkout); 1332 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 1333 log_err("did not get the expected offsets. %s", gNuConvTestName); 1334 log_err("Got : "); 1335 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 1336 for(p=junkout;p<targ;p++) 1337 log_err("%d, ", junokout[p-junkout]); 1338 log_err("\nExpected: "); 1339 for(i=0; i<(targ-junkout); i++) 1340 log_err("%d,", expectOffsets[i]); 1341 } 1342 } 1343 1344 log_verbose("comparing..\n"); 1345 if(!memcmp(junkout, expect, expectLen)) 1346 { 1347 log_verbose("Matches!\n"); 1348 return TRUE; 1349 } 1350 else 1351 { 1352 log_err("String does not match. %s\n", gNuConvTestName); 1353 printUSeqErr(source, sourceLen); 1354 log_info("\nGot:"); 1355 printSeqErr((const unsigned char *)junkout, expectLen); 1356 log_info("\nExpected:"); 1357 printSeqErr((const unsigned char *)expect, expectLen); 1358 1359 return FALSE; 1360 } 1361 } 1362 1363 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 1364 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset) 1365 { 1366 UErrorCode status = U_ZERO_ERROR; 1367 UConverter *conv = 0; 1368 UChar junkout[MAX_LENGTH]; /* FIX */ 1369 int32_t junokout[MAX_LENGTH]; /* FIX */ 1370 const char *src; 1371 const char *realSourceEnd; 1372 const char *srcLimit; 1373 UChar *p; 1374 UChar *targ; 1375 UChar *end; 1376 int32_t *offs; 1377 int i; 1378 UBool checkOffsets = TRUE; 1379 int32_t realBufferSize; 1380 UChar *realBufferEnd; 1381 UBool doFlush; 1382 1383 UConverterToUCallback oldAction = NULL; 1384 const void* oldContext = NULL; 1385 1386 1387 for(i=0;i<MAX_LENGTH;i++) 1388 junkout[i] = 0xFFFE; 1389 1390 for(i=0;i<MAX_LENGTH;i++) 1391 junokout[i] = -1; 1392 1393 setNuConvTestName(codepage, "TO"); 1394 1395 log_verbose("\n========= %s\n", gNuConvTestName); 1396 1397 conv = ucnv_open(codepage, &status); 1398 if(U_FAILURE(status)) 1399 { 1400 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 1401 return TRUE; 1402 } 1403 1404 log_verbose("Converter opened..\n"); 1405 /*----setting the callback routine----*/ 1406 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status); 1407 if (U_FAILURE(status)) { 1408 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status)); 1409 } 1410 /*-------------------------------------*/ 1411 1412 src = (const char *)source; 1413 targ = junkout; 1414 offs = junokout; 1415 1416 realBufferSize = UPRV_LENGTHOF(junkout); 1417 realBufferEnd = junkout + realBufferSize; 1418 realSourceEnd = src + sourcelen; 1419 1420 if ( gOutBufferSize != realBufferSize ) 1421 checkOffsets = FALSE; 1422 1423 if( gInBufferSize != MAX_LENGTH ) 1424 checkOffsets = FALSE; 1425 1426 do 1427 { 1428 end = nct_min( targ + gOutBufferSize, realBufferEnd); 1429 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 1430 1431 if(targ == realBufferEnd) 1432 { 1433 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 1434 return FALSE; 1435 } 1436 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 1437 1438 /* oldTarg = targ; */ 1439 1440 status = U_ZERO_ERROR; 1441 doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : FALSE); 1442 1443 ucnv_toUnicode (conv, 1444 &targ, 1445 end, 1446 (const char **)&src, 1447 (const char *)srcLimit, 1448 offs, 1449 doFlush, /* flush if we're at the end of hte source data */ 1450 &status); 1451 if(testReset) 1452 ucnv_resetFromUnicode(conv); 1453 if(gInBufferSize ==999 && gOutBufferSize==999) 1454 ucnv_resetToUnicode(conv); 1455 /* offs += (targ-oldTarg); */ 1456 1457 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 1458 1459 if(U_FAILURE(status)) 1460 { 1461 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 1462 return FALSE; 1463 } 1464 1465 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 1466 sourcelen, targ-junkout); 1467 if(getTestOption(VERBOSITY_OPTION)) 1468 { 1469 char junk[999]; 1470 char offset_str[999]; 1471 1472 UChar *ptr; 1473 1474 junk[0] = 0; 1475 offset_str[0] = 0; 1476 1477 for(ptr = junkout;ptr<targ;ptr++) 1478 { 1479 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 1480 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 1481 } 1482 1483 log_verbose(junk); 1484 1485 if ( checkOffsets ) 1486 { 1487 log_verbose("\nOffsets:"); 1488 log_verbose(offset_str); 1489 } 1490 log_verbose("\n"); 1491 } 1492 ucnv_close(conv); 1493 1494 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 1495 1496 if (checkOffsets && (expectOffsets != 0)) 1497 { 1498 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 1499 1500 log_err("did not get the expected offsets. %s",gNuConvTestName); 1501 for(p=junkout;p<targ;p++) 1502 log_err("%d, ", junokout[p-junkout]); 1503 log_err("\nExpected: "); 1504 for(i=0; i<(targ-junkout); i++) 1505 log_err("%d,", expectOffsets[i]); 1506 log_err(""); 1507 for(i=0; i<(targ-junkout); i++) 1508 log_err("%X,", junkout[i]); 1509 log_err(""); 1510 for(i=0; i<(src-(const char *)source); i++) 1511 log_err("%X,", (unsigned char)source[i]); 1512 } 1513 } 1514 1515 if(!memcmp(junkout, expect, expectlen*2)) 1516 { 1517 log_verbose("Matches!\n"); 1518 return TRUE; 1519 } 1520 else 1521 { 1522 log_err("String does not match. %s\n", gNuConvTestName); 1523 log_verbose("String does not match. %s\n", gNuConvTestName); 1524 log_info("\nGot:"); 1525 printUSeq(junkout, expectlen); 1526 log_info("\nExpected:"); 1527 printUSeq(expect, expectlen); 1528 return FALSE; 1529 } 1530 } 1531 1532 1533 static void TestResetBehaviour(void){ 1534 #if !UCONFIG_NO_LEGACY_CONVERSION 1535 log_verbose("Testing Reset for DBCS and MBCS\n"); 1536 { 1537 static const UChar sampleText[] = {0x00a1, 0xd801, 0xdc01, 0x00a4}; 1538 static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4}; 1539 static const int32_t offsets[] = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 }; 1540 1541 1542 static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8}; 1543 static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7}; 1544 static const int32_t offsets1[] = { 0,2,4,6}; 1545 1546 /*DBCS*/ 1547 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1548 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1549 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 1550 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1551 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1552 log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n"); 1553 1554 if(!testConvertToU(expected1, sizeof(expected1), 1555 sampleText1, UPRV_LENGTHOF(sampleText1), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1556 offsets1, TRUE)) 1557 log_err("ibm-1363 -> did not match.\n"); 1558 /*MBCS*/ 1559 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1560 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1561 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 1562 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1563 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1564 log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n"); 1565 1566 if(!testConvertToU(expected1, sizeof(expected1), 1567 sampleText1, UPRV_LENGTHOF(sampleText1), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1568 offsets1, TRUE)) 1569 log_err("ibm-1363 -> did not match.\n"); 1570 1571 } 1572 1573 log_verbose("Testing Reset for ISO-2022-jp\n"); 1574 { 1575 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1576 1577 static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 1578 0x31,0x1A, 0x32}; 1579 1580 1581 static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 }; 1582 1583 1584 static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; 1585 static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42, 1586 0x31,0x1A, 0x32}; 1587 static const int32_t offsets1[] = { 3,5,10,11,12}; 1588 1589 /*iso-2022-jp*/ 1590 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1591 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1592 log_err("u-> not match.\n"); 1593 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1594 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1595 log_err("u-> not match.\n"); 1596 1597 if(!testConvertToU(expected1, sizeof(expected1), 1598 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1599 offsets1, TRUE)) 1600 log_err("iso-2022-jp -> did not match.\n"); 1601 1602 } 1603 1604 /* BEGIN android-removed */ 1605 /* To save space, Android does not build full ISO-2022-CN tables. 1606 We skip the tests for ISO-2022-CN. */ 1607 /* 1608 log_verbose("Testing Reset for ISO-2022-cn\n"); 1609 { 1610 static const UChar sampleText[] = { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1611 1612 static const uint8_t expected[] = { 1613 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 1614 0x36, 0x21, 1615 0x0f, 0x31, 1616 0x1A, 1617 0x32 1618 }; 1619 1620 1621 static const int32_t offsets[] = { 1622 0, 0, 0, 0, 0, 0, 0, 1623 1, 1, 1624 2, 2, 1625 3, 1626 5, }; 1627 1628 UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032}; 1629 static const uint8_t expected1[] = { 1630 0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B, 1631 0x36, 0x21, 1632 0x1B, 0x24, 0x29, 0x47, 0x24, 0x22, 1633 0x0f, 0x1A, 1634 0x32 1635 }; 1636 static const int32_t offsets1[] = { 5,7,13,16,17}; 1637 1638 // iso-2022-CN android-change 1639 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1640 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1641 log_err("u-> not match.\n"); 1642 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1643 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1644 log_err("u-> not match.\n"); 1645 1646 if(!testConvertToU(expected1, sizeof(expected1), 1647 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1648 offsets1, TRUE)) 1649 log_err("iso-2022-cn -> did not match.\n"); 1650 } 1651 */ 1652 /* END android-removed */ 1653 1654 log_verbose("Testing Reset for ISO-2022-kr\n"); 1655 { 1656 UChar sampleText[] = { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1657 1658 static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43, 1659 0x0E, 0x6C, 0x69, 1660 0x0f, 0x1A, 1661 0x0e, 0x6F, 0x4B, 1662 0x0F, 0x31, 1663 0x1A, 1664 0x32 }; 1665 1666 static const int32_t offsets[] = {-1, -1, -1, -1, 1667 0, 0, 0, 1668 1, 1, 1669 3, 3, 3, 1670 4, 4, 1671 5, 1672 7, 1673 }; 1674 static const UChar sampleText1[] = { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032}; 1675 1676 static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43, 1677 0x0E, 0x6C, 0x69, 1678 0x0f, 0x41, 1679 0x0e, 0x6F, 0x4B, 1680 0x0F, 0x31, 1681 0x42, 1682 0x32 }; 1683 1684 static const int32_t offsets1[] = { 1685 5, 8, 10, 1686 13, 14, 15 1687 1688 }; 1689 /*iso-2022-kr*/ 1690 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1691 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1692 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 1693 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1694 expected, sizeof(expected), "iso-2022-kr", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1695 log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n"); 1696 if(!testConvertToU(expected1, sizeof(expected1), 1697 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1698 offsets1, TRUE)) 1699 log_err("iso-2022-kr -> did not match.\n"); 1700 } 1701 1702 log_verbose("Testing Reset for HZ\n"); 1703 { 1704 static const UChar sampleText[] = { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032}; 1705 1706 static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B, 1707 0x7E, 0x7D, 0x1A, 1708 0x7E, 0x7B, 0x36, 0x21, 1709 0x7E, 0x7D, 0x31, 1710 0x1A, 1711 0x32 }; 1712 1713 1714 static const int32_t offsets[] = {0,0,0,0, 1715 1,1,1, 1716 3,3,3,3, 1717 4,4,4, 1718 5, 1719 7,}; 1720 static const UChar sampleText1[] = { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032}; 1721 1722 static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B, 1723 0x7E, 0x7D, 0x35, 1724 0x7E, 0x7B, 0x36, 0x21, 1725 0x7E, 0x7D, 0x31, 1726 0x41, 1727 0x32 }; 1728 1729 1730 static const int32_t offsets1[] = {2,6,9,13,14,15 1731 }; 1732 1733 /*hz*/ 1734 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1735 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1736 log_err("u-> not match.\n"); 1737 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1738 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1739 log_err("u-> not match.\n"); 1740 if(!testConvertToU(expected1, sizeof(expected1), 1741 sampleText1, UPRV_LENGTHOF(sampleText1), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE , 1742 offsets1, TRUE)) 1743 log_err("hz -> did not match.\n"); 1744 } 1745 #endif 1746 1747 /*UTF-8*/ 1748 log_verbose("Testing for UTF8\n"); 1749 { 1750 static const UChar sampleText[] = { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032}; 1751 int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 1752 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 1753 0x04, 0x06 }; 1754 static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31, 1755 0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32}; 1756 1757 1758 static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D }; 1759 /*UTF-8*/ 1760 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1761 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1762 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1763 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1764 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1765 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1766 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1767 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , TRUE)) 1768 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1769 if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 1770 expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , TRUE)) 1771 log_err("u-> UTF8 with offsets and flush true did not match.\n"); 1772 if(!testConvertToU(expected, sizeof(expected), 1773 sampleText, UPRV_LENGTHOF(sampleText), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1774 log_err("UTF8 -> did not match.\n"); 1775 if(!testConvertToU(expected, sizeof(expected), 1776 sampleText, UPRV_LENGTHOF(sampleText), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, TRUE)) 1777 log_err("UTF8 -> did not match.\n"); 1778 if(!testConvertToU(expected, sizeof(expected), 1779 sampleText, UPRV_LENGTHOF(sampleText), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) 1780 log_err("UTF8 -> did not match.\n"); 1781 if(!testConvertToU(expected, sizeof(expected), 1782 sampleText, UPRV_LENGTHOF(sampleText), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, TRUE)) 1783 log_err("UTF8 -> did not match.\n"); 1784 1785 } 1786 1787 } 1788 1789 /* Test that U_TRUNCATED_CHAR_FOUND is set. */ 1790 static void 1791 doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) { 1792 UConverter *cnv; 1793 1794 UChar buffer[2]; 1795 UChar *target, *targetLimit; 1796 const char *source, *sourceLimit; 1797 1798 UErrorCode errorCode; 1799 1800 errorCode=U_ZERO_ERROR; 1801 cnv=ucnv_open(cnvName, &errorCode); 1802 if(U_FAILURE(errorCode)) { 1803 log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode)); 1804 return; 1805 } 1806 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 1807 if(U_FAILURE(errorCode)) { 1808 log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n", 1809 cnvName, u_errorName(errorCode)); 1810 ucnv_close(cnv); 1811 return; 1812 } 1813 1814 source=(const char *)bytes; 1815 sourceLimit=source+length; 1816 target=buffer; 1817 targetLimit=buffer+UPRV_LENGTHOF(buffer); 1818 1819 /* 1. input bytes with flush=FALSE, then input nothing with flush=TRUE */ 1820 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &errorCode); 1821 if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) { 1822 log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=FALSE: %s, input left %d, output %d\n", 1823 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); 1824 } 1825 1826 errorCode=U_ZERO_ERROR; 1827 source=sourceLimit; 1828 target=buffer; 1829 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); 1830 if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) { 1831 log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n", 1832 cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer)); 1833 } 1834 1835 /* 2. input bytes with flush=TRUE */ 1836 ucnv_resetToUnicode(cnv); 1837 1838 errorCode=U_ZERO_ERROR; 1839 source=(const char *)bytes; 1840 target=buffer; 1841 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &errorCode); 1842 if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) { 1843 log_err("error TestTruncated(%s, 2): input bytes[%d], flush=TRUE: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n", 1844 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer)); 1845 } 1846 1847 1848 ucnv_close(cnv); 1849 } 1850 1851 static void 1852 TestTruncated() { 1853 static const struct { 1854 const char *cnvName; 1855 uint8_t bytes[8]; /* partial input bytes resulting in no output */ 1856 int32_t length; 1857 } testCases[]={ 1858 { "IMAP-mailbox-name", { 0x26 }, 1 }, /* & */ 1859 { "IMAP-mailbox-name", { 0x26, 0x42 }, 2 }, /* &B */ 1860 { "IMAP-mailbox-name", { 0x26, 0x42, 0x42 }, 3 }, /* &BB */ 1861 { "IMAP-mailbox-name", { 0x26, 0x41, 0x41 }, 3 }, /* &AA */ 1862 1863 { "UTF-7", { 0x2b, 0x42 }, 2 }, /* +B */ 1864 { "UTF-8", { 0xd1 }, 1 }, 1865 1866 { "UTF-16BE", { 0x4e }, 1 }, 1867 { "UTF-16LE", { 0x4e }, 1 }, 1868 { "UTF-16", { 0x4e }, 1 }, 1869 { "UTF-16", { 0xff }, 1 }, 1870 { "UTF-16", { 0xfe, 0xff, 0x4e }, 3 }, 1871 1872 { "UTF-32BE", { 0, 0, 0x4e }, 3 }, 1873 { "UTF-32LE", { 0x4e }, 1 }, 1874 { "UTF-32", { 0, 0, 0x4e }, 3 }, 1875 { "UTF-32", { 0xff }, 1 }, 1876 { "UTF-32", { 0, 0, 0xfe, 0xff, 0 }, 5 }, 1877 { "SCSU", { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */ 1878 1879 #if !UCONFIG_NO_LEGACY_CONVERSION 1880 { "BOCU-1", { 0xd5 }, 1 }, 1881 1882 { "Shift-JIS", { 0xe0 }, 1 }, 1883 1884 { "ibm-939", { 0x0e, 0x41 }, 2 } /* SO 0x41 */ 1885 #else 1886 { "BOCU-1", { 0xd5 }, 1 ,} 1887 #endif 1888 }; 1889 int32_t i; 1890 1891 for(i=0; i<UPRV_LENGTHOF(testCases); ++i) { 1892 doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length); 1893 } 1894 } 1895 1896 typedef struct NameRange { 1897 const char *name; 1898 UChar32 start, end, start2, end2, notStart, notEnd; 1899 } NameRange; 1900 1901 static void 1902 TestUnicodeSet() { 1903 UErrorCode errorCode; 1904 UConverter *cnv; 1905 USet *set; 1906 const char *name; 1907 int32_t i, count; 1908 1909 static const char *const completeSetNames[]={ 1910 "UTF-7", 1911 "UTF-8", 1912 "UTF-16", 1913 "UTF-16BE", 1914 "UTF-16LE", 1915 "UTF-32", 1916 "UTF-32BE", 1917 "UTF-32LE", 1918 "SCSU", 1919 "BOCU-1", 1920 "CESU-8", 1921 #if !UCONFIG_NO_LEGACY_CONVERSION 1922 "gb18030", 1923 #endif 1924 "IMAP-mailbox-name" 1925 }; 1926 #if !UCONFIG_NO_LEGACY_CONVERSION 1927 static const char *const lmbcsNames[]={ 1928 "LMBCS-1", 1929 "LMBCS-2", 1930 "LMBCS-3", 1931 "LMBCS-4", 1932 "LMBCS-5", 1933 "LMBCS-6", 1934 "LMBCS-8", 1935 "LMBCS-11", 1936 "LMBCS-16", 1937 "LMBCS-17", 1938 "LMBCS-18", 1939 "LMBCS-19" 1940 }; 1941 #endif 1942 1943 static const NameRange nameRanges[]={ 1944 { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff }, 1945 #if !UCONFIG_NO_LEGACY_CONVERSION 1946 { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff }, 1947 #endif 1948 { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff }, 1949 #if !UCONFIG_NO_LEGACY_CONVERSION 1950 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }, 1951 { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff }, 1952 /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */ 1953 { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff } 1954 #else 1955 { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff } 1956 #endif 1957 }; 1958 1959 /* open an empty set */ 1960 set=uset_open(1, 0); 1961 1962 count=ucnv_countAvailable(); 1963 for(i=0; i<count; ++i) { 1964 errorCode=U_ZERO_ERROR; 1965 name=ucnv_getAvailableName(i); 1966 cnv=ucnv_open(name, &errorCode); 1967 if(U_FAILURE(errorCode)) { 1968 log_data_err("error: unable to open converter %s - %s\n", 1969 name, u_errorName(errorCode)); 1970 continue; 1971 } 1972 1973 uset_clear(set); 1974 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 1975 if(U_FAILURE(errorCode)) { 1976 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 1977 name, u_errorName(errorCode)); 1978 } else if(uset_size(set)==0) { 1979 log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name); 1980 } 1981 1982 ucnv_close(cnv); 1983 } 1984 1985 /* test converters that are known to convert all of Unicode (except maybe for surrogates) */ 1986 for(i=0; i<UPRV_LENGTHOF(completeSetNames); ++i) { 1987 errorCode=U_ZERO_ERROR; 1988 name=completeSetNames[i]; 1989 cnv=ucnv_open(name, &errorCode); 1990 if(U_FAILURE(errorCode)) { 1991 log_data_err("error: unable to open converter %s - %s\n", 1992 name, u_errorName(errorCode)); 1993 continue; 1994 } 1995 1996 uset_clear(set); 1997 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 1998 if(U_FAILURE(errorCode)) { 1999 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2000 name, u_errorName(errorCode)); 2001 } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) { 2002 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name); 2003 } 2004 2005 ucnv_close(cnv); 2006 } 2007 2008 #if !UCONFIG_NO_LEGACY_CONVERSION 2009 /* test LMBCS variants which convert all of Unicode except for U+F6xx */ 2010 for(i=0; i<UPRV_LENGTHOF(lmbcsNames); ++i) { 2011 errorCode=U_ZERO_ERROR; 2012 name=lmbcsNames[i]; 2013 cnv=ucnv_open(name, &errorCode); 2014 if(U_FAILURE(errorCode)) { 2015 log_data_err("error: unable to open converter %s - %s\n", 2016 name, u_errorName(errorCode)); 2017 continue; 2018 } 2019 2020 uset_clear(set); 2021 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2022 if(U_FAILURE(errorCode)) { 2023 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2024 name, u_errorName(errorCode)); 2025 } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) { 2026 log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name); 2027 } 2028 2029 ucnv_close(cnv); 2030 } 2031 #endif 2032 2033 /* test specific sets */ 2034 for(i=0; i<UPRV_LENGTHOF(nameRanges); ++i) { 2035 errorCode=U_ZERO_ERROR; 2036 name=nameRanges[i].name; 2037 cnv=ucnv_open(name, &errorCode); 2038 if(U_FAILURE(errorCode)) { 2039 log_data_err("error: unable to open converter %s - %s\n", 2040 name, u_errorName(errorCode)); 2041 continue; 2042 } 2043 2044 uset_clear(set); 2045 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2046 if(U_FAILURE(errorCode)) { 2047 log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n", 2048 name, u_errorName(errorCode)); 2049 } else if( 2050 !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) || 2051 (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2)) 2052 ) { 2053 log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name); 2054 } else if(nameRanges[i].notStart>=0) { 2055 /* simulate containsAny() with the C API */ 2056 uset_complement(set); 2057 if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) { 2058 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name); 2059 } 2060 } 2061 2062 ucnv_close(cnv); 2063 } 2064 2065 errorCode = U_ZERO_ERROR; 2066 ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode); 2067 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 2068 log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode)); 2069 } 2070 errorCode = U_PARSE_ERROR; 2071 /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */ 2072 ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode); 2073 if (errorCode != U_PARSE_ERROR) { 2074 log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode)); 2075 } 2076 2077 uset_close(set); 2078 } 2079