1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2012, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************* 7 * 8 * File CCONVTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Steven R. Loomis 7/8/1999 Adding input buffer test 13 ******************************************************************************** 14 */ 15 #include <stdio.h> 16 #include "cstring.h" 17 #include "unicode/uloc.h" 18 #include "unicode/ucnv.h" 19 #include "unicode/ucnv_err.h" 20 #include "unicode/ucnv_cb.h" 21 #include "cintltst.h" 22 #include "unicode/utypes.h" 23 #include "unicode/ustring.h" 24 #include "unicode/ucol.h" 25 #include "unicode/utf16.h" 26 #include "cmemory.h" 27 #include "nucnvtst.h" 28 29 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) 30 31 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); 32 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); 33 #if !UCONFIG_NO_COLLATION 34 static void TestJitterbug981(void); 35 #endif 36 #if !UCONFIG_NO_LEGACY_CONVERSION 37 static void TestJitterbug1293(void); 38 #endif 39 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; 40 static void TestConverterTypesAndStarters(void); 41 static void TestAmbiguous(void); 42 static void TestSignatureDetection(void); 43 static void TestUTF7(void); 44 static void TestIMAP(void); 45 static void TestUTF8(void); 46 static void TestCESU8(void); 47 static void TestUTF16(void); 48 static void TestUTF16BE(void); 49 static void TestUTF16LE(void); 50 static void TestUTF32(void); 51 static void TestUTF32BE(void); 52 static void TestUTF32LE(void); 53 static void TestLATIN1(void); 54 55 #if !UCONFIG_NO_LEGACY_CONVERSION 56 static void TestSBCS(void); 57 static void TestDBCS(void); 58 static void TestMBCS(void); 59 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 60 static void TestICCRunout(void); 61 #endif 62 63 #ifdef U_ENABLE_GENERIC_ISO_2022 64 static void TestISO_2022(void); 65 #endif 66 67 static void TestISO_2022_JP(void); 68 static void TestISO_2022_JP_1(void); 69 static void TestISO_2022_JP_2(void); 70 static void TestISO_2022_KR(void); 71 static void TestISO_2022_KR_1(void); 72 static void TestISO_2022_CN(void); 73 #if 0 74 /* 75 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 76 */ 77 static void TestISO_2022_CN_EXT(void); 78 #endif 79 static void TestJIS(void); 80 static void TestHZ(void); 81 #endif 82 83 static void TestSCSU(void); 84 85 #if !UCONFIG_NO_LEGACY_CONVERSION 86 static void TestEBCDIC_STATEFUL(void); 87 static void TestGB18030(void); 88 static void TestLMBCS(void); 89 static void TestJitterbug255(void); 90 static void TestEBCDICUS4XML(void); 91 #if 0 92 /* 93 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 94 */ 95 static void TestJitterbug915(void); 96 #endif 97 static void TestISCII(void); 98 99 static void TestCoverageMBCS(void); 100 static void TestJitterbug2346(void); 101 static void TestJitterbug2411(void); 102 static void TestJB5275(void); 103 static void TestJB5275_1(void); 104 static void TestJitterbug6175(void); 105 106 static void TestIsFixedWidth(void); 107 #endif 108 109 static void TestInBufSizes(void); 110 111 static void TestRoundTrippingAllUTF(void); 112 static void TestConv(const uint16_t in[], 113 int len, 114 const char* conv, 115 const char* lang, 116 char byteArr[], 117 int byteArrLen); 118 119 /* open a converter, using test data if it begins with '@' */ 120 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); 121 122 123 #define NEW_MAX_BUFFER 999 124 125 static int32_t gInBufferSize = NEW_MAX_BUFFER; 126 static int32_t gOutBufferSize = NEW_MAX_BUFFER; 127 static char gNuConvTestName[1024]; 128 129 #define nct_min(x,y) ((x<y) ? x : y) 130 131 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) 132 { 133 if(cnv && cnv[0] == '@') { 134 return ucnv_openPackage(loadTestData(err), cnv+1, err); 135 } else { 136 return ucnv_open(cnv, err); 137 } 138 } 139 140 static void printSeq(const unsigned char* a, int len) 141 { 142 int i=0; 143 log_verbose("{"); 144 while (i<len) 145 log_verbose("0x%02x ", a[i++]); 146 log_verbose("}\n"); 147 } 148 149 static void printUSeq(const UChar* a, int len) 150 { 151 int i=0; 152 log_verbose("{U+"); 153 while (i<len) log_verbose("0x%04x ", a[i++]); 154 log_verbose("}\n"); 155 } 156 157 static void printSeqErr(const unsigned char* a, int len) 158 { 159 int i=0; 160 fprintf(stderr, "{"); 161 while (i<len) 162 fprintf(stderr, "0x%02x ", a[i++]); 163 fprintf(stderr, "}\n"); 164 } 165 166 static void printUSeqErr(const UChar* a, int len) 167 { 168 int i=0; 169 fprintf(stderr, "{U+"); 170 while (i<len) 171 fprintf(stderr, "0x%04x ", a[i++]); 172 fprintf(stderr,"}\n"); 173 } 174 175 static void 176 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message) 177 { 178 const char* s0; 179 const char* s=(char*)source; 180 const int32_t *r=results; 181 UErrorCode errorCode=U_ZERO_ERROR; 182 UChar32 c; 183 184 while(s<limit) { 185 s0=s; 186 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 187 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 188 break; /* no more significant input */ 189 } else if(U_FAILURE(errorCode)) { 190 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 191 break; 192 } else if( 193 /* test the expected number of input bytes only if >=0 */ 194 (*r>=0 && (int32_t)(s-s0)!=*r) || 195 c!=*(r+1) 196 ) { 197 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 198 message, c, (s-s0), *(r+1), *r); 199 break; 200 } 201 r+=2; 202 } 203 } 204 205 static void 206 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message) 207 { 208 const char* s=(char*)source; 209 UErrorCode errorCode=U_ZERO_ERROR; 210 uint32_t c; 211 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 212 if(errorCode != expected){ 213 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode)); 214 } 215 if(c != 0xFFFD && c != 0xffff){ 216 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c); 217 } 218 219 } 220 221 static void TestInBufSizes(void) 222 { 223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); 224 #if 1 225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); 226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); 227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); 228 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); 229 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); 230 TestNewConvertWithBufferSizes(1,1); 231 TestNewConvertWithBufferSizes(2,3); 232 TestNewConvertWithBufferSizes(3,2); 233 #endif 234 } 235 236 static void TestOutBufSizes(void) 237 { 238 #if 1 239 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); 240 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); 241 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); 242 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); 243 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); 244 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); 245 246 #endif 247 } 248 249 250 void addTestNewConvert(TestNode** root) 251 { 252 #if !UCONFIG_NO_FILE_IO 253 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); 254 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); 255 #endif 256 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); 257 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); 258 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); 259 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); 260 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); 261 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); 262 263 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ 264 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); 265 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); 266 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); 267 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); 268 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); 269 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); 270 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); 271 272 #if !UCONFIG_NO_LEGACY_CONVERSION 273 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); 274 #endif 275 276 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); 277 278 #if !UCONFIG_NO_LEGACY_CONVERSION 279 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); 280 #if !UCONFIG_NO_FILE_IO 281 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); 282 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); 283 #endif 284 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); 285 286 #ifdef U_ENABLE_GENERIC_ISO_2022 287 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); 288 #endif 289 /* BEGIN android-changed 290 To save space, Android does not build full ISO2022 CJK tables. 291 We turn off the tests here. 292 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); 293 END android-changed */ 294 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); 295 /* BEGIN android-changed 296 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); 297 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); 298 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); 299 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); 300 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); 301 END android-changed */ 302 /* 303 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 304 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); 305 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); 306 */ 307 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); 308 #endif 309 310 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); 311 312 #if !UCONFIG_NO_LEGACY_CONVERSION 313 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); 314 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); 315 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); 316 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); 317 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); 318 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); 319 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); 320 #if !UCONFIG_NO_COLLATION 321 /* BEGIN android-removed 322 To save space, Android does not include the collation tailoring rules. 323 Skip the related tests. 324 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); 325 END android-removed */ 326 #endif 327 328 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); 329 #endif 330 331 332 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 333 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); 334 #endif 335 336 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); 337 338 #if !UCONFIG_NO_LEGACY_CONVERSION 339 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); 340 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); 341 /* BEGIN android-removed 342 To save space, Android does not build full ISO2022 CJK tables. 343 We turn off the tests here. 344 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); 345 END android-removed */ 346 347 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth"); 348 #endif 349 } 350 351 352 /* Note that this test already makes use of statics, so it's not really 353 multithread safe. 354 This convenience function lets us make the error messages actually useful. 355 */ 356 357 static void setNuConvTestName(const char *codepage, const char *direction) 358 { 359 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 360 codepage, 361 direction, 362 (int)gInBufferSize, 363 (int)gOutBufferSize); 364 } 365 366 typedef enum 367 { 368 TC_OK = 0, /* test was OK */ 369 TC_MISMATCH = 1, /* Match failed - err was printed */ 370 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */ 371 } ETestConvertResult; 372 373 /* Note: This function uses global variables and it will not do offset 374 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 375 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 376 const char *codepage, const int32_t *expectOffsets , UBool useFallback) 377 { 378 UErrorCode status = U_ZERO_ERROR; 379 UConverter *conv = 0; 380 char junkout[NEW_MAX_BUFFER]; /* FIX */ 381 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 382 char *p; 383 const UChar *src; 384 char *end; 385 char *targ; 386 int32_t *offs; 387 int i; 388 int32_t realBufferSize; 389 char *realBufferEnd; 390 const UChar *realSourceEnd; 391 const UChar *sourceLimit; 392 UBool checkOffsets = TRUE; 393 UBool doFlush; 394 395 for(i=0;i<NEW_MAX_BUFFER;i++) 396 junkout[i] = (char)0xF0; 397 for(i=0;i<NEW_MAX_BUFFER;i++) 398 junokout[i] = 0xFF; 399 400 setNuConvTestName(codepage, "FROM"); 401 402 log_verbose("\n========= %s\n", gNuConvTestName); 403 404 conv = my_ucnv_open(codepage, &status); 405 406 if(U_FAILURE(status)) 407 { 408 log_data_err("Couldn't open converter %s\n",codepage); 409 return TC_FAIL; 410 } 411 if(useFallback){ 412 ucnv_setFallback(conv,useFallback); 413 } 414 415 log_verbose("Converter opened..\n"); 416 417 src = source; 418 targ = junkout; 419 offs = junokout; 420 421 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 422 realBufferEnd = junkout + realBufferSize; 423 realSourceEnd = source + sourceLen; 424 425 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 426 checkOffsets = FALSE; 427 428 do 429 { 430 end = nct_min(targ + gOutBufferSize, realBufferEnd); 431 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 432 433 doFlush = (UBool)(sourceLimit == realSourceEnd); 434 435 if(targ == realBufferEnd) { 436 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 437 return TC_FAIL; 438 } 439 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 440 441 442 status = U_ZERO_ERROR; 443 444 ucnv_fromUnicode (conv, 445 &targ, 446 end, 447 &src, 448 sourceLimit, 449 checkOffsets ? offs : NULL, 450 doFlush, /* flush if we're at the end of the input data */ 451 &status); 452 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 453 454 if(U_FAILURE(status)) { 455 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 456 return TC_FAIL; 457 } 458 459 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 460 sourceLen, targ-junkout); 461 462 if(getTestOption(VERBOSITY_OPTION)) 463 { 464 char junk[9999]; 465 char offset_str[9999]; 466 char *ptr; 467 468 junk[0] = 0; 469 offset_str[0] = 0; 470 for(ptr = junkout;ptr<targ;ptr++) { 471 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); 472 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout])); 473 } 474 475 log_verbose(junk); 476 printSeq((const uint8_t *)expect, expectLen); 477 if ( checkOffsets ) { 478 log_verbose("\nOffsets:"); 479 log_verbose(offset_str); 480 } 481 log_verbose("\n"); 482 } 483 ucnv_close(conv); 484 485 if(expectLen != targ-junkout) { 486 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 487 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 488 fprintf(stderr, "Got:\n"); 489 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 490 fprintf(stderr, "Expected:\n"); 491 printSeqErr((const unsigned char*)expect, expectLen); 492 return TC_MISMATCH; 493 } 494 495 if (checkOffsets && (expectOffsets != 0) ) { 496 log_verbose("comparing %d offsets..\n", targ-junkout); 497 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 498 log_err("did not get the expected offsets. %s\n", gNuConvTestName); 499 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 500 log_err("\n"); 501 log_err("Got : "); 502 for(p=junkout;p<targ;p++) { 503 log_err("%d,", junokout[p-junkout]); 504 } 505 log_err("\n"); 506 log_err("Expected: "); 507 for(i=0; i<(targ-junkout); i++) { 508 log_err("%d,", expectOffsets[i]); 509 } 510 log_err("\n"); 511 } 512 } 513 514 log_verbose("comparing..\n"); 515 if(!memcmp(junkout, expect, expectLen)) { 516 log_verbose("Matches!\n"); 517 return TC_OK; 518 } else { 519 log_err("String does not match u->%s\n", gNuConvTestName); 520 printUSeqErr(source, sourceLen); 521 fprintf(stderr, "Got:\n"); 522 printSeqErr((const unsigned char *)junkout, expectLen); 523 fprintf(stderr, "Expected:\n"); 524 printSeqErr((const unsigned char *)expect, expectLen); 525 526 return TC_MISMATCH; 527 } 528 } 529 530 /* Note: This function uses global variables and it will not do offset 531 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 532 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 533 const char *codepage, const int32_t *expectOffsets, UBool useFallback) 534 { 535 UErrorCode status = U_ZERO_ERROR; 536 UConverter *conv = 0; 537 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 538 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 539 const char *src; 540 const char *realSourceEnd; 541 const char *srcLimit; 542 UChar *p; 543 UChar *targ; 544 UChar *end; 545 int32_t *offs; 546 int i; 547 UBool checkOffsets = TRUE; 548 549 int32_t realBufferSize; 550 UChar *realBufferEnd; 551 552 553 for(i=0;i<NEW_MAX_BUFFER;i++) 554 junkout[i] = 0xFFFE; 555 556 for(i=0;i<NEW_MAX_BUFFER;i++) 557 junokout[i] = -1; 558 559 setNuConvTestName(codepage, "TO"); 560 561 log_verbose("\n========= %s\n", gNuConvTestName); 562 563 conv = my_ucnv_open(codepage, &status); 564 565 if(U_FAILURE(status)) 566 { 567 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 568 return TC_FAIL; 569 } 570 if(useFallback){ 571 ucnv_setFallback(conv,useFallback); 572 } 573 log_verbose("Converter opened..\n"); 574 575 src = (const char *)source; 576 targ = junkout; 577 offs = junokout; 578 579 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 580 realBufferEnd = junkout + realBufferSize; 581 realSourceEnd = src + sourcelen; 582 583 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 584 checkOffsets = FALSE; 585 586 do 587 { 588 end = nct_min( targ + gOutBufferSize, realBufferEnd); 589 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 590 591 if(targ == realBufferEnd) 592 { 593 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); 594 return TC_FAIL; 595 } 596 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 597 598 /* oldTarg = targ; */ 599 600 status = U_ZERO_ERROR; 601 602 ucnv_toUnicode (conv, 603 &targ, 604 end, 605 &src, 606 srcLimit, 607 checkOffsets ? offs : NULL, 608 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ 609 &status); 610 611 /* offs += (targ-oldTarg); */ 612 613 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 614 615 if(U_FAILURE(status)) 616 { 617 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 618 return TC_FAIL; 619 } 620 621 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 622 sourcelen, targ-junkout); 623 if(getTestOption(VERBOSITY_OPTION)) 624 { 625 char junk[9999]; 626 char offset_str[9999]; 627 UChar *ptr; 628 629 junk[0] = 0; 630 offset_str[0] = 0; 631 632 for(ptr = junkout;ptr<targ;ptr++) 633 { 634 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 635 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 636 } 637 638 log_verbose(junk); 639 printUSeq(expect, expectlen); 640 if ( checkOffsets ) 641 { 642 log_verbose("\nOffsets:"); 643 log_verbose(offset_str); 644 } 645 log_verbose("\n"); 646 } 647 ucnv_close(conv); 648 649 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 650 651 if (checkOffsets && (expectOffsets != 0)) 652 { 653 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 654 log_err("did not get the expected offsets. %s\n",gNuConvTestName); 655 log_err("Got: "); 656 for(p=junkout;p<targ;p++) { 657 log_err("%d,", junokout[p-junkout]); 658 } 659 log_err("\n"); 660 log_err("Expected: "); 661 for(i=0; i<(targ-junkout); i++) { 662 log_err("%d,", expectOffsets[i]); 663 } 664 log_err("\n"); 665 log_err("output: "); 666 for(i=0; i<(targ-junkout); i++) { 667 log_err("%X,", junkout[i]); 668 } 669 log_err("\n"); 670 log_err("input: "); 671 for(i=0; i<(src-(const char *)source); i++) { 672 log_err("%X,", (unsigned char)source[i]); 673 } 674 log_err("\n"); 675 } 676 } 677 678 if(!memcmp(junkout, expect, expectlen*2)) 679 { 680 log_verbose("Matches!\n"); 681 return TC_OK; 682 } 683 else 684 { 685 log_err("String does not match. %s\n", gNuConvTestName); 686 log_verbose("String does not match. %s\n", gNuConvTestName); 687 printf("\nGot:"); 688 printUSeqErr(junkout, expectlen); 689 printf("\nExpected:"); 690 printUSeqErr(expect, expectlen); 691 return TC_MISMATCH; 692 } 693 } 694 695 696 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 697 { 698 /** test chars #1 */ 699 /* 1 2 3 1Han 2Han 3Han . */ 700 static const UChar sampleText[] = 701 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 }; 702 static const UChar sampleTextRoundTripUnmappable[] = 703 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; 704 705 706 static const uint8_t expectedUTF8[] = 707 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; 708 static const int32_t toUTF8Offs[] = 709 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; 710 static const int32_t fmUTF8Offs[] = 711 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e }; 712 713 #ifdef U_ENABLE_GENERIC_ISO_2022 714 /* Same as UTF8, but with ^[%B preceeding */ 715 static const const uint8_t expectedISO2022[] = 716 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 717 static const int32_t toISO2022Offs[] = 718 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 719 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ 720 static const int32_t fmISO2022Offs[] = 721 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */ 722 #endif 723 724 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ 725 static const uint8_t expectedIBM930[] = 726 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f }; 727 static const int32_t toIBM930Offs[] = 728 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 }; 729 static const int32_t fmIBM930Offs[] = 730 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; 731 732 /* 1 2 3 0 h1 h2 h3 . MBCS*/ 733 static const uint8_t expectedIBM943[] = 734 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc }; 735 static const int32_t toIBM943Offs [] = 736 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 }; 737 static const int32_t fmIBM943Offs[] = 738 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; 739 740 /* 1 2 3 0 h1 h2 h3 . DBCS*/ 741 static const uint8_t expectedIBM9027[] = 742 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; 743 static const int32_t toIBM9027Offs [] = 744 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; 745 746 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 747 static const uint8_t expectedIBM920[] = 748 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; 749 static const int32_t toIBM920Offs [] = 750 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 751 752 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 753 static const uint8_t expectedISO88593[] = 754 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 755 static const int32_t toISO88593Offs[] = 756 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 757 758 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ 759 static const uint8_t expectedLATIN1[] = 760 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 761 static const int32_t toLATIN1Offs[] = 762 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 763 764 765 /* etc */ 766 static const uint8_t expectedUTF16BE[] = 767 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; 768 static const int32_t toUTF16BEOffs[]= 769 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 770 static const int32_t fmUTF16BEOffs[] = 771 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 772 773 static const uint8_t expectedUTF16LE[] = 774 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; 775 static const int32_t toUTF16LEOffs[]= 776 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 777 static const int32_t fmUTF16LEOffs[] = 778 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 779 780 static const uint8_t expectedUTF32BE[] = 781 { 0x00, 0x00, 0x00, 0x31, 782 0x00, 0x00, 0x00, 0x32, 783 0x00, 0x00, 0x00, 0x33, 784 0x00, 0x00, 0x00, 0x00, 785 0x00, 0x00, 0x4e, 0x00, 786 0x00, 0x00, 0x4e, 0x8c, 787 0x00, 0x00, 0x4e, 0x09, 788 0x00, 0x00, 0x00, 0x2e, 789 0x00, 0x02, 0x00, 0x21 }; 790 static const int32_t toUTF32BEOffs[]= 791 { 0x00, 0x00, 0x00, 0x00, 792 0x01, 0x01, 0x01, 0x01, 793 0x02, 0x02, 0x02, 0x02, 794 0x03, 0x03, 0x03, 0x03, 795 0x04, 0x04, 0x04, 0x04, 796 0x05, 0x05, 0x05, 0x05, 797 0x06, 0x06, 0x06, 0x06, 798 0x07, 0x07, 0x07, 0x07, 799 0x08, 0x08, 0x08, 0x08, 800 0x08, 0x08, 0x08, 0x08 }; 801 static const int32_t fmUTF32BEOffs[] = 802 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 803 804 static const uint8_t expectedUTF32LE[] = 805 { 0x31, 0x00, 0x00, 0x00, 806 0x32, 0x00, 0x00, 0x00, 807 0x33, 0x00, 0x00, 0x00, 808 0x00, 0x00, 0x00, 0x00, 809 0x00, 0x4e, 0x00, 0x00, 810 0x8c, 0x4e, 0x00, 0x00, 811 0x09, 0x4e, 0x00, 0x00, 812 0x2e, 0x00, 0x00, 0x00, 813 0x21, 0x00, 0x02, 0x00 }; 814 static const int32_t toUTF32LEOffs[]= 815 { 0x00, 0x00, 0x00, 0x00, 816 0x01, 0x01, 0x01, 0x01, 817 0x02, 0x02, 0x02, 0x02, 818 0x03, 0x03, 0x03, 0x03, 819 0x04, 0x04, 0x04, 0x04, 820 0x05, 0x05, 0x05, 0x05, 821 0x06, 0x06, 0x06, 0x06, 822 0x07, 0x07, 0x07, 0x07, 823 0x08, 0x08, 0x08, 0x08, 824 0x08, 0x08, 0x08, 0x08 }; 825 static const int32_t fmUTF32LEOffs[] = 826 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 827 828 829 830 831 /** Test chars #2 **/ 832 833 /* Sahha [health], slashed h's */ 834 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; 835 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; 836 837 /* LMBCS */ 838 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; 839 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; 840 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; 841 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; 842 /*********************************** START OF CODE finally *************/ 843 844 gInBufferSize = insize; 845 gOutBufferSize = outsize; 846 847 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); 848 849 850 /*UTF-8*/ 851 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 852 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); 853 854 log_verbose("Test surrogate behaviour for UTF8\n"); 855 { 856 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; 857 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, 858 0xf0, 0x90, 0x90, 0x81, 859 0xef, 0xbf, 0xbd 860 }; 861 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; 862 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), 863 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); 864 865 866 } 867 868 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 869 /*ISO-2022*/ 870 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 871 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); 872 #endif 873 874 /*UTF16 LE*/ 875 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 876 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); 877 /*UTF16 BE*/ 878 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 879 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); 880 /*UTF32 LE*/ 881 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 882 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); 883 /*UTF32 BE*/ 884 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 885 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); 886 887 /*LATIN_1*/ 888 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 889 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); 890 891 #if !UCONFIG_NO_LEGACY_CONVERSION 892 /*EBCDIC_STATEFUL*/ 893 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 894 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); 895 896 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 897 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 898 899 /*MBCS*/ 900 901 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 902 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); 903 /*DBCS*/ 904 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 905 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); 906 /*SBCS*/ 907 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 908 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); 909 /*SBCS*/ 910 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 911 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 912 #endif 913 914 915 /****/ 916 917 /*UTF-8*/ 918 testConvertToU(expectedUTF8, sizeof(expectedUTF8), 919 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE); 920 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 921 /*ISO-2022*/ 922 testConvertToU(expectedISO2022, sizeof(expectedISO2022), 923 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE); 924 #endif 925 926 /*UTF16 LE*/ 927 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 928 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 929 /*UTF16 BE*/ 930 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), 931 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE); 932 /*UTF32 LE*/ 933 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), 934 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE); 935 /*UTF32 BE*/ 936 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), 937 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE); 938 939 #if !UCONFIG_NO_LEGACY_CONVERSION 940 /*EBCDIC_STATEFUL*/ 941 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable, 942 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE); 943 /*MBCS*/ 944 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable, 945 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE); 946 #endif 947 948 /* Try it again to make sure it still works */ 949 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 950 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 951 952 #if !UCONFIG_NO_LEGACY_CONVERSION 953 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), 954 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE); 955 956 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), 957 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); 958 959 /*LMBCS*/ 960 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), 961 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); 962 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), 963 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE); 964 #endif 965 966 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ 967 { 968 /* encode directly set D and set O */ 969 static const uint8_t utf7[] = { 970 /* 971 Hi Mom -+Jjo--! 972 A+ImIDkQ. 973 +- 974 +ZeVnLIqe- 975 */ 976 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 977 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 978 0x2b, 0x2d, 979 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d 980 }; 981 static const UChar unicode[] = { 982 /* 983 Hi Mom -<WHITE SMILING FACE>-! 984 A<NOT IDENTICAL TO><ALPHA>. 985 + 986 [Japanese word "nihongo"] 987 */ 988 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 989 0x41, 0x2262, 0x0391, 0x2e, 990 0x2b, 991 0x65e5, 0x672c, 0x8a9e 992 }; 993 static const int32_t toUnicodeOffsets[] = { 994 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 995 15, 17, 19, 23, 996 24, 997 27, 29, 32 998 }; 999 static const int32_t fromUnicodeOffsets[] = { 1000 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 1001 11, 12, 12, 12, 13, 13, 13, 13, 14, 1002 15, 15, 1003 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 1004 }; 1005 1006 /* same but escaping set O (the exclamation mark) */ 1007 static const uint8_t utf7Restricted[] = { 1008 /* 1009 Hi Mom -+Jjo--+ACE- 1010 A+ImIDkQ. 1011 +- 1012 +ZeVnLIqe- 1013 */ 1014 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, 1015 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 1016 0x2b, 0x2d, 1017 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d 1018 }; 1019 static const int32_t toUnicodeOffsetsR[] = { 1020 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, 1021 19, 21, 23, 27, 1022 28, 1023 31, 33, 36 1024 }; 1025 static const int32_t fromUnicodeOffsetsR[] = { 1026 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, 1027 11, 12, 12, 12, 13, 13, 13, 13, 14, 1028 15, 15, 1029 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 1030 }; 1031 1032 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); 1033 1034 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE); 1035 1036 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); 1037 1038 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); 1039 } 1040 1041 /* 1042 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, 1043 * modified according to RFC 2060, 1044 * and supplemented with the one example in RFC 2060 itself. 1045 */ 1046 { 1047 static const uint8_t imap[] = { 1048 /* Hi Mom -&Jjo--! 1049 A&ImIDkQ-. 1050 &- 1051 &ZeVnLIqe- 1052 \ 1053 ~peter 1054 /mail 1055 /&ZeVnLIqe- 1056 /&U,BTFw- 1057 */ 1058 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 1059 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, 1060 0x26, 0x2d, 1061 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1062 0x5c, 1063 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1064 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1065 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1066 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d 1067 }; 1068 static const UChar unicode[] = { 1069 /* Hi Mom -<WHITE SMILING FACE>-! 1070 A<NOT IDENTICAL TO><ALPHA>. 1071 & 1072 [Japanese word "nihongo"] 1073 \ 1074 ~peter 1075 /mail 1076 /<65e5, 672c, 8a9e> 1077 /<53f0, 5317> 1078 */ 1079 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 1080 0x41, 0x2262, 0x0391, 0x2e, 1081 0x26, 1082 0x65e5, 0x672c, 0x8a9e, 1083 0x5c, 1084 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1085 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1086 0x2f, 0x65e5, 0x672c, 0x8a9e, 1087 0x2f, 0x53f0, 0x5317 1088 }; 1089 static const int32_t toUnicodeOffsets[] = { 1090 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 1091 15, 17, 19, 24, 1092 25, 1093 28, 30, 33, 1094 37, 1095 38, 39, 40, 41, 42, 43, 1096 44, 45, 46, 47, 48, 1097 49, 51, 53, 56, 1098 60, 62, 64 1099 }; 1100 static const int32_t fromUnicodeOffsets[] = { 1101 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 1102 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, 1103 15, 15, 1104 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 1105 19, 1106 20, 21, 22, 23, 24, 25, 1107 26, 27, 28, 29, 30, 1108 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 1109 35, 36, 36, 36, 37, 37, 37, 37, 37 1110 }; 1111 1112 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); 1113 1114 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); 1115 } 1116 1117 /* Test UTF-8 bad data handling*/ 1118 { 1119 static const uint8_t utf8[]={ 1120 0x61, 1121 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1122 0x00, 1123 0x62, 1124 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1125 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1126 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ 1127 0xdf, 0xbf, /* 7ff */ 1128 0xbf, /* truncated tail */ 1129 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ 1130 0x02 1131 }; 1132 1133 static const uint16_t utf8Expected[]={ 1134 0x0061, 1135 0xfffd, 1136 0x0000, 1137 0x0062, 1138 0xfffd, 1139 0xfffd, 1140 0xdbff, 0xdfff, 1141 0x07ff, 1142 0xfffd, 1143 0xfffd, 1144 0x0002 1145 }; 1146 1147 static const int32_t utf8Offsets[]={ 1148 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 1149 }; 1150 testConvertToU(utf8, sizeof(utf8), 1151 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE); 1152 1153 } 1154 1155 /* Test UTF-32BE bad data handling*/ 1156 { 1157 static const uint8_t utf32[]={ 1158 0x00, 0x00, 0x00, 0x61, 1159 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 1160 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1161 0x00, 0x00, 0x00, 0x62, 1162 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1163 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 1164 0x00, 0x00, 0x01, 0x62, 1165 0x00, 0x00, 0x02, 0x62 1166 }; 1167 static const uint16_t utf32Expected[]={ 1168 0x0061, 1169 0xfffd, /* 0x110000 out of range */ 1170 0xDBFF, /* 0x10FFFF in range */ 1171 0xDFFF, 1172 0x0062, 1173 0xfffd, /* 0xffffffff out of range */ 1174 0xfffd, /* 0x7fffffff out of range */ 1175 0x0162, 1176 0x0262 1177 }; 1178 static const int32_t utf32Offsets[]={ 1179 0, 4, 8, 8, 12, 16, 20, 24, 28 1180 }; 1181 static const uint8_t utf32ExpectedBack[]={ 1182 0x00, 0x00, 0x00, 0x61, 1183 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ 1184 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1185 0x00, 0x00, 0x00, 0x62, 1186 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ 1187 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ 1188 0x00, 0x00, 0x01, 0x62, 1189 0x00, 0x00, 0x02, 0x62 1190 }; 1191 static const int32_t utf32OffsetsBack[]={ 1192 0,0,0,0, 1193 1,1,1,1, 1194 2,2,2,2, 1195 4,4,4,4, 1196 5,5,5,5, 1197 6,6,6,6, 1198 7,7,7,7, 1199 8,8,8,8 1200 }; 1201 1202 testConvertToU(utf32, sizeof(utf32), 1203 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE); 1204 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1205 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE); 1206 } 1207 1208 /* Test UTF-32LE bad data handling*/ 1209 { 1210 static const uint8_t utf32[]={ 1211 0x61, 0x00, 0x00, 0x00, 1212 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 1213 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1214 0x62, 0x00, 0x00, 0x00, 1215 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1216 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 1217 0x62, 0x01, 0x00, 0x00, 1218 0x62, 0x02, 0x00, 0x00, 1219 }; 1220 1221 static const uint16_t utf32Expected[]={ 1222 0x0061, 1223 0xfffd, /* 0x110000 out of range */ 1224 0xDBFF, /* 0x10FFFF in range */ 1225 0xDFFF, 1226 0x0062, 1227 0xfffd, /* 0xffffffff out of range */ 1228 0xfffd, /* 0x7fffffff out of range */ 1229 0x0162, 1230 0x0262 1231 }; 1232 static const int32_t utf32Offsets[]={ 1233 0, 4, 8, 8, 12, 16, 20, 24, 28 1234 }; 1235 static const uint8_t utf32ExpectedBack[]={ 1236 0x61, 0x00, 0x00, 0x00, 1237 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ 1238 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1239 0x62, 0x00, 0x00, 0x00, 1240 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ 1241 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ 1242 0x62, 0x01, 0x00, 0x00, 1243 0x62, 0x02, 0x00, 0x00 1244 }; 1245 static const int32_t utf32OffsetsBack[]={ 1246 0,0,0,0, 1247 1,1,1,1, 1248 2,2,2,2, 1249 4,4,4,4, 1250 5,5,5,5, 1251 6,6,6,6, 1252 7,7,7,7, 1253 8,8,8,8 1254 }; 1255 testConvertToU(utf32, sizeof(utf32), 1256 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE ); 1257 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1258 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE); 1259 } 1260 } 1261 1262 static void TestCoverageMBCS(){ 1263 #if 0 1264 UErrorCode status = U_ZERO_ERROR; 1265 const char *directory = loadTestData(&status); 1266 char* tdpath = NULL; 1267 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); 1268 int len = strlen(directory); 1269 char* index=NULL; 1270 1271 tdpath = (char*) malloc(sizeof(char) * (len * 2)); 1272 uprv_strcpy(saveDirectory,u_getDataDirectory()); 1273 log_verbose("Retrieved data directory %s \n",saveDirectory); 1274 uprv_strcpy(tdpath,directory); 1275 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); 1276 1277 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ 1278 *(index+1)=0; 1279 } 1280 u_setDataDirectory(tdpath); 1281 log_verbose("ICU data directory is set to: %s \n" ,tdpath); 1282 #endif 1283 1284 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm 1285 which is test file for MBCS conversion with single-byte codepage data.*/ 1286 { 1287 1288 /* MBCS with single byte codepage data test1.ucm*/ 1289 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; 1290 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; 1291 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; 1292 1293 /*from Unicode*/ 1294 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1295 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); 1296 } 1297 1298 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm 1299 which is test file for MBCS conversion with three-byte codepage data.*/ 1300 { 1301 1302 /* MBCS with three byte codepage data test3.ucm*/ 1303 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1304 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; 1305 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; 1306 1307 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; 1308 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1309 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; 1310 1311 /*from Unicode*/ 1312 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1313 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); 1314 1315 /*to Unicode*/ 1316 testConvertToU(test3input, sizeof(test3input), 1317 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE); 1318 1319 } 1320 1321 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm 1322 which is test file for MBCS conversion with four-byte codepage data.*/ 1323 { 1324 1325 /* MBCS with three byte codepage data test4.ucm*/ 1326 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1327 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; 1328 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; 1329 1330 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; 1331 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1332 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; 1333 1334 /*from Unicode*/ 1335 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1336 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); 1337 1338 /*to Unicode*/ 1339 testConvertToU(test4input, sizeof(test4input), 1340 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE ); 1341 1342 } 1343 #if 0 1344 free(tdpath); 1345 /* restore the original data directory */ 1346 log_verbose("Setting the data directory to %s \n", saveDirectory); 1347 u_setDataDirectory(saveDirectory); 1348 free(saveDirectory); 1349 #endif 1350 1351 } 1352 1353 static void TestConverterType(const char *convName, UConverterType convType) { 1354 UConverter* myConverter; 1355 UErrorCode err = U_ZERO_ERROR; 1356 1357 myConverter = my_ucnv_open(convName, &err); 1358 1359 if (U_FAILURE(err)) { 1360 log_data_err("Failed to create an %s converter\n", convName); 1361 return; 1362 } 1363 else 1364 { 1365 if (ucnv_getType(myConverter)!=convType) { 1366 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", 1367 convName, convType); 1368 } 1369 else { 1370 log_verbose("ucnv_getType %s ok\n", convName); 1371 } 1372 } 1373 ucnv_close(myConverter); 1374 } 1375 1376 static void TestConverterTypesAndStarters() 1377 { 1378 #if !UCONFIG_NO_LEGACY_CONVERSION 1379 UConverter* myConverter; 1380 UErrorCode err = U_ZERO_ERROR; 1381 UBool mystarters[256]; 1382 1383 /* const UBool expectedKSCstarters[256] = { 1384 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1385 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1386 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1387 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1388 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1389 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1390 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1391 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1392 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1393 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1394 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1395 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1396 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1397 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1398 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1399 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1400 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1401 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, 1402 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1403 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1404 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1405 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1406 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1407 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1408 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1409 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ 1410 1411 1412 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); 1413 1414 myConverter = ucnv_open("ksc", &err); 1415 if (U_FAILURE(err)) { 1416 log_data_err("Failed to create an ibm-ksc converter\n"); 1417 return; 1418 } 1419 else 1420 { 1421 if (ucnv_getType(myConverter)!=UCNV_MBCS) 1422 log_err("ucnv_getType Failed for ibm-949\n"); 1423 else 1424 log_verbose("ucnv_getType ibm-949 ok\n"); 1425 1426 if(myConverter!=NULL) 1427 ucnv_getStarters(myConverter, mystarters, &err); 1428 1429 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) 1430 log_err("Failed ucnv_getStarters for ksc\n"); 1431 else 1432 log_verbose("ucnv_getStarters ok\n");*/ 1433 1434 } 1435 ucnv_close(myConverter); 1436 1437 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); 1438 TestConverterType("ibm-878", UCNV_SBCS); 1439 #endif 1440 1441 TestConverterType("iso-8859-1", UCNV_LATIN_1); 1442 1443 TestConverterType("ibm-1208", UCNV_UTF8); 1444 1445 TestConverterType("utf-8", UCNV_UTF8); 1446 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); 1447 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); 1448 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); 1449 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); 1450 1451 #if !UCONFIG_NO_LEGACY_CONVERSION 1452 1453 #if defined(U_ENABLE_GENERIC_ISO_2022) 1454 TestConverterType("iso-2022", UCNV_ISO_2022); 1455 #endif 1456 1457 TestConverterType("hz", UCNV_HZ); 1458 #endif 1459 1460 TestConverterType("scsu", UCNV_SCSU); 1461 1462 #if !UCONFIG_NO_LEGACY_CONVERSION 1463 TestConverterType("x-iscii-de", UCNV_ISCII); 1464 #endif 1465 1466 TestConverterType("ascii", UCNV_US_ASCII); 1467 TestConverterType("utf-7", UCNV_UTF7); 1468 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); 1469 TestConverterType("bocu-1", UCNV_BOCU1); 1470 } 1471 1472 static void 1473 TestAmbiguousConverter(UConverter *cnv) { 1474 static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; 1475 UChar outUnicode[20]={ 0, 0, 0, 0 }; 1476 1477 const char *s; 1478 UChar *u; 1479 UErrorCode errorCode; 1480 UBool isAmbiguous; 1481 1482 /* try to convert an 'a', a square bracket and a US-ASCII backslash */ 1483 errorCode=U_ZERO_ERROR; 1484 s=inBytes; 1485 u=outUnicode; 1486 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); 1487 if(U_FAILURE(errorCode)) { 1488 /* we do not care about general failures in this test; the input may just not be mappable */ 1489 return; 1490 } 1491 1492 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { 1493 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ 1494 /* There are some encodings that are partially ASCII based, 1495 like the ISO-7 and GSM series of codepages, which we ignore. */ 1496 return; 1497 } 1498 1499 isAmbiguous=ucnv_isAmbiguous(cnv); 1500 1501 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ 1502 if((outUnicode[2]!=0x5c)!=isAmbiguous) { 1503 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", 1504 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); 1505 return; 1506 } 1507 1508 if(outUnicode[2]!=0x5c) { 1509 /* needs fixup, fix it */ 1510 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); 1511 if(outUnicode[2]!=0x5c) { 1512 /* the fix failed */ 1513 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); 1514 return; 1515 } 1516 } 1517 } 1518 1519 static void TestAmbiguous() 1520 { 1521 UErrorCode status = U_ZERO_ERROR; 1522 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; 1523 static const char target[] = { 1524 /* "\\usr\\local\\share\\data\\icutest.txt" */ 1525 0x5c, 0x75, 0x73, 0x72, 1526 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 1527 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, 1528 0x5c, 0x64, 0x61, 0x74, 0x61, 1529 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, 1530 0 1531 }; 1532 UChar asciiResult[200], sjisResult[200]; 1533 int32_t /*asciiLength = 0,*/ sjisLength = 0, i; 1534 const char *name; 1535 1536 /* enumerate all converters */ 1537 status=U_ZERO_ERROR; 1538 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { 1539 cnv=ucnv_open(name, &status); 1540 if(U_SUCCESS(status)) { 1541 /* BEGIN android-changed 1542 To save space, Android does not build full ISO2022 CJK tables. 1543 We skip the tests for ISO-2022. */ 1544 const char* cnvName = ucnv_getName(cnv, &status); 1545 if (strlen(cnvName) < 8 || 1546 strncmp(cnvName, "ISO_2022", 8) != 0) { 1547 TestAmbiguousConverter(cnv); 1548 } 1549 /* END android-changed */ 1550 } else { 1551 log_err("error: unable to open available converter \"%s\"\n", name); 1552 status=U_ZERO_ERROR; 1553 } 1554 } 1555 1556 #if !UCONFIG_NO_LEGACY_CONVERSION 1557 sjis_cnv = ucnv_open("ibm-943", &status); 1558 if (U_FAILURE(status)) 1559 { 1560 log_data_err("Failed to create a SJIS converter\n"); 1561 return; 1562 } 1563 ascii_cnv = ucnv_open("LATIN-1", &status); 1564 if (U_FAILURE(status)) 1565 { 1566 log_data_err("Failed to create a LATIN-1 converter\n"); 1567 ucnv_close(sjis_cnv); 1568 return; 1569 } 1570 /* convert target from SJIS to Unicode */ 1571 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1572 if (U_FAILURE(status)) 1573 { 1574 log_err("Failed to convert the SJIS string.\n"); 1575 ucnv_close(sjis_cnv); 1576 ucnv_close(ascii_cnv); 1577 return; 1578 } 1579 /* convert target from Latin-1 to Unicode */ 1580 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1581 if (U_FAILURE(status)) 1582 { 1583 log_err("Failed to convert the Latin-1 string.\n"); 1584 ucnv_close(sjis_cnv); 1585 ucnv_close(ascii_cnv); 1586 return; 1587 } 1588 if (!ucnv_isAmbiguous(sjis_cnv)) 1589 { 1590 log_err("SJIS converter should contain ambiguous character mappings.\n"); 1591 ucnv_close(sjis_cnv); 1592 ucnv_close(ascii_cnv); 1593 return; 1594 } 1595 if (u_strcmp(sjisResult, asciiResult) == 0) 1596 { 1597 log_err("File separators for SJIS don't need to be fixed.\n"); 1598 } 1599 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); 1600 if (u_strcmp(sjisResult, asciiResult) != 0) 1601 { 1602 log_err("Fixing file separator for SJIS failed.\n"); 1603 } 1604 ucnv_close(sjis_cnv); 1605 ucnv_close(ascii_cnv); 1606 #endif 1607 } 1608 1609 static void 1610 TestSignatureDetection(){ 1611 /* with null terminated strings */ 1612 { 1613 static const char* data[] = { 1614 "\xFE\xFF\x00\x00", /* UTF-16BE */ 1615 "\xFF\xFE\x00\x00", /* UTF-16LE */ 1616 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1617 "\x0E\xFE\xFF\x00", /* SCSU */ 1618 1619 "\xFE\xFF", /* UTF-16BE */ 1620 "\xFF\xFE", /* UTF-16LE */ 1621 "\xEF\xBB\xBF", /* UTF-8 */ 1622 "\x0E\xFE\xFF", /* SCSU */ 1623 1624 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1625 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1626 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1627 "\x0E\xFE\xFF\x41", /* SCSU */ 1628 1629 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ 1630 "\x2B\x2F\x76\x38\x41", /* UTF-7 */ 1631 "\x2B\x2F\x76\x39\x41", /* UTF-7 */ 1632 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ 1633 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ 1634 1635 "\xDD\x73\x66\x73" /* UTF-EBCDIC */ 1636 }; 1637 static const char* expected[] = { 1638 "UTF-16BE", 1639 "UTF-16LE", 1640 "UTF-8", 1641 "SCSU", 1642 1643 "UTF-16BE", 1644 "UTF-16LE", 1645 "UTF-8", 1646 "SCSU", 1647 1648 "UTF-16BE", 1649 "UTF-16LE", 1650 "UTF-8", 1651 "SCSU", 1652 1653 "UTF-7", 1654 "UTF-7", 1655 "UTF-7", 1656 "UTF-7", 1657 "UTF-7", 1658 "UTF-EBCDIC" 1659 }; 1660 static const int32_t expectedLength[] ={ 1661 2, 1662 2, 1663 3, 1664 3, 1665 1666 2, 1667 2, 1668 3, 1669 3, 1670 1671 2, 1672 2, 1673 3, 1674 3, 1675 1676 5, 1677 4, 1678 4, 1679 4, 1680 4, 1681 4 1682 }; 1683 int i=0; 1684 UErrorCode err; 1685 int32_t signatureLength = -1; 1686 const char* source = NULL; 1687 const char* enc = NULL; 1688 for( ; i<sizeof(data)/sizeof(char*); i++){ 1689 err = U_ZERO_ERROR; 1690 source = data[i]; 1691 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err); 1692 if(U_FAILURE(err)){ 1693 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1694 continue; 1695 } 1696 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1697 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1698 continue; 1699 } 1700 if(signatureLength != expectedLength[i]){ 1701 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1702 } 1703 } 1704 } 1705 { 1706 static const char* data[] = { 1707 "\xFE\xFF\x00", /* UTF-16BE */ 1708 "\xFF\xFE\x00", /* UTF-16LE */ 1709 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1710 "\x0E\xFE\xFF\x00", /* SCSU */ 1711 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1712 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1713 "\xFE\xFF", /* UTF-16BE */ 1714 "\xFF\xFE", /* UTF-16LE */ 1715 "\xEF\xBB\xBF", /* UTF-8 */ 1716 "\x0E\xFE\xFF", /* SCSU */ 1717 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1718 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1719 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1720 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1721 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1722 "\x0E\xFE\xFF\x41", /* SCSU */ 1723 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ 1724 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ 1725 "\xFB\xEE\x28", /* BOCU-1 */ 1726 "\xFF\x41\x42" /* NULL */ 1727 }; 1728 static const int len[] = { 1729 3, 1730 3, 1731 4, 1732 4, 1733 4, 1734 4, 1735 2, 1736 2, 1737 3, 1738 3, 1739 4, 1740 4, 1741 4, 1742 4, 1743 4, 1744 4, 1745 5, 1746 5, 1747 3, 1748 3 1749 }; 1750 1751 static const char* expected[] = { 1752 "UTF-16BE", 1753 "UTF-16LE", 1754 "UTF-8", 1755 "SCSU", 1756 "UTF-32BE", 1757 "UTF-32LE", 1758 "UTF-16BE", 1759 "UTF-16LE", 1760 "UTF-8", 1761 "SCSU", 1762 "UTF-32BE", 1763 "UTF-32LE", 1764 "UTF-16BE", 1765 "UTF-16LE", 1766 "UTF-8", 1767 "SCSU", 1768 "UTF-32BE", 1769 "UTF-32LE", 1770 "BOCU-1", 1771 NULL 1772 }; 1773 static const int32_t expectedLength[] ={ 1774 2, 1775 2, 1776 3, 1777 3, 1778 4, 1779 4, 1780 2, 1781 2, 1782 3, 1783 3, 1784 4, 1785 4, 1786 2, 1787 2, 1788 3, 1789 3, 1790 4, 1791 4, 1792 3, 1793 0 1794 }; 1795 int i=0; 1796 UErrorCode err; 1797 int32_t signatureLength = -1; 1798 int32_t sourceLength=-1; 1799 const char* source = NULL; 1800 const char* enc = NULL; 1801 for( ; i<sizeof(data)/sizeof(char*); i++){ 1802 err = U_ZERO_ERROR; 1803 source = data[i]; 1804 sourceLength = len[i]; 1805 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err); 1806 if(U_FAILURE(err)){ 1807 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1808 continue; 1809 } 1810 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1811 if(expected[i] !=NULL){ 1812 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1813 continue; 1814 } 1815 } 1816 if(signatureLength != expectedLength[i]){ 1817 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1818 } 1819 } 1820 } 1821 } 1822 1823 static void TestUTF7() { 1824 /* test input */ 1825 static const uint8_t in[]={ 1826 /* H - +Jjo- - ! +- +2AHcAQ */ 1827 0x48, 1828 0x2d, 1829 0x2b, 0x4a, 0x6a, 0x6f, 1830 0x2d, 0x2d, 1831 0x21, 1832 0x2b, 0x2d, 1833 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 1834 }; 1835 1836 /* expected test results */ 1837 static const int32_t results[]={ 1838 /* number of bytes read, code point */ 1839 1, 0x48, 1840 1, 0x2d, 1841 4, 0x263a, /* <WHITE SMILING FACE> */ 1842 2, 0x2d, 1843 1, 0x21, 1844 2, 0x2b, 1845 7, 0x10401 1846 }; 1847 1848 const char *cnvName; 1849 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1850 UErrorCode errorCode=U_ZERO_ERROR; 1851 UConverter *cnv=ucnv_open("UTF-7", &errorCode); 1852 if(U_FAILURE(errorCode)) { 1853 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1854 return; 1855 } 1856 TestNextUChar(cnv, source, limit, results, "UTF-7"); 1857 /* Test the condition when source >= sourceLimit */ 1858 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1859 cnvName = ucnv_getName(cnv, &errorCode); 1860 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { 1861 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1862 } 1863 ucnv_close(cnv); 1864 } 1865 1866 static void TestIMAP() { 1867 /* test input */ 1868 static const uint8_t in[]={ 1869 /* H - &Jjo- - ! &- &2AHcAQ- \ */ 1870 0x48, 1871 0x2d, 1872 0x26, 0x4a, 0x6a, 0x6f, 1873 0x2d, 0x2d, 1874 0x21, 1875 0x26, 0x2d, 1876 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d 1877 }; 1878 1879 /* expected test results */ 1880 static const int32_t results[]={ 1881 /* number of bytes read, code point */ 1882 1, 0x48, 1883 1, 0x2d, 1884 4, 0x263a, /* <WHITE SMILING FACE> */ 1885 2, 0x2d, 1886 1, 0x21, 1887 2, 0x26, 1888 7, 0x10401 1889 }; 1890 1891 const char *cnvName; 1892 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1893 UErrorCode errorCode=U_ZERO_ERROR; 1894 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); 1895 if(U_FAILURE(errorCode)) { 1896 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1897 return; 1898 } 1899 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); 1900 /* Test the condition when source >= sourceLimit */ 1901 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1902 cnvName = ucnv_getName(cnv, &errorCode); 1903 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { 1904 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1905 } 1906 ucnv_close(cnv); 1907 } 1908 1909 static void TestUTF8() { 1910 /* test input */ 1911 static const uint8_t in[]={ 1912 0x61, 1913 0xc2, 0x80, 1914 0xe0, 0xa0, 0x80, 1915 0xf0, 0x90, 0x80, 0x80, 1916 0xf4, 0x84, 0x8c, 0xa1, 1917 0xf0, 0x90, 0x90, 0x81 1918 }; 1919 1920 /* expected test results */ 1921 static const int32_t results[]={ 1922 /* number of bytes read, code point */ 1923 1, 0x61, 1924 2, 0x80, 1925 3, 0x800, 1926 4, 0x10000, 1927 4, 0x104321, 1928 4, 0x10401 1929 }; 1930 1931 /* error test input */ 1932 static const uint8_t in2[]={ 1933 0x61, 1934 0xc0, 0x80, /* illegal non-shortest form */ 1935 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1936 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1937 0xc0, 0xc0, /* illegal trail byte */ 1938 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1939 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1940 0xfe, /* illegal byte altogether */ 1941 0x62 1942 }; 1943 1944 /* expected error test results */ 1945 static const int32_t results2[]={ 1946 /* number of bytes read, code point */ 1947 1, 0x61, 1948 22, 0x62 1949 }; 1950 1951 UConverterToUCallback cb; 1952 const void *p; 1953 1954 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 1955 UErrorCode errorCode=U_ZERO_ERROR; 1956 UConverter *cnv=ucnv_open("UTF-8", &errorCode); 1957 if(U_FAILURE(errorCode)) { 1958 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); 1959 return; 1960 } 1961 TestNextUChar(cnv, source, limit, results, "UTF-8"); 1962 /* Test the condition when source >= sourceLimit */ 1963 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1964 1965 /* test error behavior with a skip callback */ 1966 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 1967 source=(const char *)in2; 1968 limit=(const char *)(in2+sizeof(in2)); 1969 TestNextUChar(cnv, source, limit, results2, "UTF-8"); 1970 1971 ucnv_close(cnv); 1972 } 1973 1974 static void TestCESU8() { 1975 /* test input */ 1976 static const uint8_t in[]={ 1977 0x61, 1978 0xc2, 0x80, 1979 0xe0, 0xa0, 0x80, 1980 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 1981 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, 1982 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1983 0xef, 0xbf, 0xbc 1984 }; 1985 1986 /* expected test results */ 1987 static const int32_t results[]={ 1988 /* number of bytes read, code point */ 1989 1, 0x61, 1990 2, 0x80, 1991 3, 0x800, 1992 6, 0x10000, 1993 3, 0xdc01, 1994 -1,0xd802, /* may read 3 or 6 bytes */ 1995 -1,0x10ffff,/* may read 0 or 3 bytes */ 1996 3, 0xfffc 1997 }; 1998 1999 /* error test input */ 2000 static const uint8_t in2[]={ 2001 0x61, 2002 0xc0, 0x80, /* illegal non-shortest form */ 2003 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 2004 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 2005 0xc0, 0xc0, /* illegal trail byte */ 2006 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ 2007 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ 2008 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ 2009 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 2010 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 2011 0xfe, /* illegal byte altogether */ 2012 0x62 2013 }; 2014 2015 /* expected error test results */ 2016 static const int32_t results2[]={ 2017 /* number of bytes read, code point */ 2018 1, 0x61, 2019 34, 0x62 2020 }; 2021 2022 UConverterToUCallback cb; 2023 const void *p; 2024 2025 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 2026 UErrorCode errorCode=U_ZERO_ERROR; 2027 UConverter *cnv=ucnv_open("CESU-8", &errorCode); 2028 if(U_FAILURE(errorCode)) { 2029 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); 2030 return; 2031 } 2032 TestNextUChar(cnv, source, limit, results, "CESU-8"); 2033 /* Test the condition when source >= sourceLimit */ 2034 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2035 2036 /* test error behavior with a skip callback */ 2037 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2038 source=(const char *)in2; 2039 limit=(const char *)(in2+sizeof(in2)); 2040 TestNextUChar(cnv, source, limit, results2, "CESU-8"); 2041 2042 ucnv_close(cnv); 2043 } 2044 2045 static void TestUTF16() { 2046 /* test input */ 2047 static const uint8_t in1[]={ 2048 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff 2049 }; 2050 static const uint8_t in2[]={ 2051 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff 2052 }; 2053 static const uint8_t in3[]={ 2054 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 2055 }; 2056 2057 /* expected test results */ 2058 static const int32_t results1[]={ 2059 /* number of bytes read, code point */ 2060 4, 0x4e00, 2061 2, 0xfeff 2062 }; 2063 static const int32_t results2[]={ 2064 /* number of bytes read, code point */ 2065 4, 0x004e, 2066 2, 0xfffe 2067 }; 2068 static const int32_t results3[]={ 2069 /* number of bytes read, code point */ 2070 2, 0xfefe, 2071 2, 0x4e00, 2072 2, 0xfeff, 2073 4, 0x20001 2074 }; 2075 2076 const char *source, *limit; 2077 2078 UErrorCode errorCode=U_ZERO_ERROR; 2079 UConverter *cnv=ucnv_open("UTF-16", &errorCode); 2080 if(U_FAILURE(errorCode)) { 2081 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); 2082 return; 2083 } 2084 2085 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2086 TestNextUChar(cnv, source, limit, results1, "UTF-16"); 2087 2088 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2089 ucnv_resetToUnicode(cnv); 2090 TestNextUChar(cnv, source, limit, results2, "UTF-16"); 2091 2092 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2093 ucnv_resetToUnicode(cnv); 2094 TestNextUChar(cnv, source, limit, results3, "UTF-16"); 2095 2096 /* Test the condition when source >= sourceLimit */ 2097 ucnv_resetToUnicode(cnv); 2098 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2099 2100 ucnv_close(cnv); 2101 } 2102 2103 static void TestUTF16BE() { 2104 /* test input */ 2105 static const uint8_t in[]={ 2106 0x00, 0x61, 2107 0x00, 0xc0, 2108 0x00, 0x31, 2109 0x00, 0xf4, 2110 0xce, 0xfe, 2111 0xd8, 0x01, 0xdc, 0x01 2112 }; 2113 2114 /* expected test results */ 2115 static const int32_t results[]={ 2116 /* number of bytes read, code point */ 2117 2, 0x61, 2118 2, 0xc0, 2119 2, 0x31, 2120 2, 0xf4, 2121 2, 0xcefe, 2122 4, 0x10401 2123 }; 2124 2125 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2126 UErrorCode errorCode=U_ZERO_ERROR; 2127 UConverter *cnv=ucnv_open("utf-16be", &errorCode); 2128 if(U_FAILURE(errorCode)) { 2129 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); 2130 return; 2131 } 2132 TestNextUChar(cnv, source, limit, results, "UTF-16BE"); 2133 /* Test the condition when source >= sourceLimit */ 2134 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2135 /*Test for the condition where there is an invalid character*/ 2136 { 2137 static const uint8_t source2[]={0x61}; 2138 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2139 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2140 } 2141 #if 0 2142 /* 2143 * Test disabled because currently the UTF-16BE/LE converters are supposed 2144 * to not set errors for unpaired surrogates. 2145 * This may change with 2146 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2147 */ 2148 2149 /*Test for the condition where there is a surrogate pair*/ 2150 { 2151 const uint8_t source2[]={0xd8, 0x01}; 2152 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2153 } 2154 #endif 2155 ucnv_close(cnv); 2156 } 2157 2158 static void 2159 TestUTF16LE() { 2160 /* test input */ 2161 static const uint8_t in[]={ 2162 0x61, 0x00, 2163 0x31, 0x00, 2164 0x4e, 0x2e, 2165 0x4e, 0x00, 2166 0x01, 0xd8, 0x01, 0xdc 2167 }; 2168 2169 /* expected test results */ 2170 static const int32_t results[]={ 2171 /* number of bytes read, code point */ 2172 2, 0x61, 2173 2, 0x31, 2174 2, 0x2e4e, 2175 2, 0x4e, 2176 4, 0x10401 2177 }; 2178 2179 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2180 UErrorCode errorCode=U_ZERO_ERROR; 2181 UConverter *cnv=ucnv_open("utf-16le", &errorCode); 2182 if(U_FAILURE(errorCode)) { 2183 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); 2184 return; 2185 } 2186 TestNextUChar(cnv, source, limit, results, "UTF-16LE"); 2187 /* Test the condition when source >= sourceLimit */ 2188 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2189 /*Test for the condition where there is an invalid character*/ 2190 { 2191 static const uint8_t source2[]={0x61}; 2192 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2193 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2194 } 2195 #if 0 2196 /* 2197 * Test disabled because currently the UTF-16BE/LE converters are supposed 2198 * to not set errors for unpaired surrogates. 2199 * This may change with 2200 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2201 */ 2202 2203 /*Test for the condition where there is a surrogate character*/ 2204 { 2205 static const uint8_t source2[]={0x01, 0xd8}; 2206 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2207 } 2208 #endif 2209 2210 ucnv_close(cnv); 2211 } 2212 2213 static void TestUTF32() { 2214 /* test input */ 2215 static const uint8_t in1[]={ 2216 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff 2217 }; 2218 static const uint8_t in2[]={ 2219 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 2220 }; 2221 static const uint8_t in3[]={ 2222 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 2223 }; 2224 2225 /* expected test results */ 2226 static const int32_t results1[]={ 2227 /* number of bytes read, code point */ 2228 8, 0x100f00, 2229 4, 0xfeff 2230 }; 2231 static const int32_t results2[]={ 2232 /* number of bytes read, code point */ 2233 8, 0x0f1000, 2234 4, 0xfffe 2235 }; 2236 static const int32_t results3[]={ 2237 /* number of bytes read, code point */ 2238 4, 0xfefe, 2239 4, 0x100f00, 2240 4, 0xfffd, /* unmatched surrogate */ 2241 4, 0xfffd /* unmatched surrogate */ 2242 }; 2243 2244 const char *source, *limit; 2245 2246 UErrorCode errorCode=U_ZERO_ERROR; 2247 UConverter *cnv=ucnv_open("UTF-32", &errorCode); 2248 if(U_FAILURE(errorCode)) { 2249 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); 2250 return; 2251 } 2252 2253 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2254 TestNextUChar(cnv, source, limit, results1, "UTF-32"); 2255 2256 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2257 ucnv_resetToUnicode(cnv); 2258 TestNextUChar(cnv, source, limit, results2, "UTF-32"); 2259 2260 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2261 ucnv_resetToUnicode(cnv); 2262 TestNextUChar(cnv, source, limit, results3, "UTF-32"); 2263 2264 /* Test the condition when source >= sourceLimit */ 2265 ucnv_resetToUnicode(cnv); 2266 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2267 2268 ucnv_close(cnv); 2269 } 2270 2271 static void 2272 TestUTF32BE() { 2273 /* test input */ 2274 static const uint8_t in[]={ 2275 0x00, 0x00, 0x00, 0x61, 2276 0x00, 0x00, 0x30, 0x61, 2277 0x00, 0x00, 0xdc, 0x00, 2278 0x00, 0x00, 0xd8, 0x00, 2279 0x00, 0x00, 0xdf, 0xff, 2280 0x00, 0x00, 0xff, 0xfe, 2281 0x00, 0x10, 0xab, 0xcd, 2282 0x00, 0x10, 0xff, 0xff 2283 }; 2284 2285 /* expected test results */ 2286 static const int32_t results[]={ 2287 /* number of bytes read, code point */ 2288 4, 0x61, 2289 4, 0x3061, 2290 4, 0xfffd, 2291 4, 0xfffd, 2292 4, 0xfffd, 2293 4, 0xfffe, 2294 4, 0x10abcd, 2295 4, 0x10ffff 2296 }; 2297 2298 /* error test input */ 2299 static const uint8_t in2[]={ 2300 0x00, 0x00, 0x00, 0x61, 2301 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 2302 0x00, 0x00, 0x00, 0x62, 2303 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2304 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 2305 0x00, 0x00, 0x01, 0x62, 2306 0x00, 0x00, 0x02, 0x62 2307 }; 2308 2309 /* expected error test results */ 2310 static const int32_t results2[]={ 2311 /* number of bytes read, code point */ 2312 4, 0x61, 2313 8, 0x62, 2314 12, 0x162, 2315 4, 0x262 2316 }; 2317 2318 UConverterToUCallback cb; 2319 const void *p; 2320 2321 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2322 UErrorCode errorCode=U_ZERO_ERROR; 2323 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); 2324 if(U_FAILURE(errorCode)) { 2325 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); 2326 return; 2327 } 2328 TestNextUChar(cnv, source, limit, results, "UTF-32BE"); 2329 2330 /* Test the condition when source >= sourceLimit */ 2331 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2332 2333 /* test error behavior with a skip callback */ 2334 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2335 source=(const char *)in2; 2336 limit=(const char *)(in2+sizeof(in2)); 2337 TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); 2338 2339 ucnv_close(cnv); 2340 } 2341 2342 static void 2343 TestUTF32LE() { 2344 /* test input */ 2345 static const uint8_t in[]={ 2346 0x61, 0x00, 0x00, 0x00, 2347 0x61, 0x30, 0x00, 0x00, 2348 0x00, 0xdc, 0x00, 0x00, 2349 0x00, 0xd8, 0x00, 0x00, 2350 0xff, 0xdf, 0x00, 0x00, 2351 0xfe, 0xff, 0x00, 0x00, 2352 0xcd, 0xab, 0x10, 0x00, 2353 0xff, 0xff, 0x10, 0x00 2354 }; 2355 2356 /* expected test results */ 2357 static const int32_t results[]={ 2358 /* number of bytes read, code point */ 2359 4, 0x61, 2360 4, 0x3061, 2361 4, 0xfffd, 2362 4, 0xfffd, 2363 4, 0xfffd, 2364 4, 0xfffe, 2365 4, 0x10abcd, 2366 4, 0x10ffff 2367 }; 2368 2369 /* error test input */ 2370 static const uint8_t in2[]={ 2371 0x61, 0x00, 0x00, 0x00, 2372 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 2373 0x62, 0x00, 0x00, 0x00, 2374 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2375 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 2376 0x62, 0x01, 0x00, 0x00, 2377 0x62, 0x02, 0x00, 0x00, 2378 }; 2379 2380 /* expected error test results */ 2381 static const int32_t results2[]={ 2382 /* number of bytes read, code point */ 2383 4, 0x61, 2384 8, 0x62, 2385 12, 0x162, 2386 4, 0x262, 2387 }; 2388 2389 UConverterToUCallback cb; 2390 const void *p; 2391 2392 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2393 UErrorCode errorCode=U_ZERO_ERROR; 2394 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); 2395 if(U_FAILURE(errorCode)) { 2396 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); 2397 return; 2398 } 2399 TestNextUChar(cnv, source, limit, results, "UTF-32LE"); 2400 2401 /* Test the condition when source >= sourceLimit */ 2402 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2403 2404 /* test error behavior with a skip callback */ 2405 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2406 source=(const char *)in2; 2407 limit=(const char *)(in2+sizeof(in2)); 2408 TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); 2409 2410 ucnv_close(cnv); 2411 } 2412 2413 static void 2414 TestLATIN1() { 2415 /* test input */ 2416 static const uint8_t in[]={ 2417 0x61, 2418 0x31, 2419 0x32, 2420 0xc0, 2421 0xf0, 2422 0xf4, 2423 }; 2424 2425 /* expected test results */ 2426 static const int32_t results[]={ 2427 /* number of bytes read, code point */ 2428 1, 0x61, 2429 1, 0x31, 2430 1, 0x32, 2431 1, 0xc0, 2432 1, 0xf0, 2433 1, 0xf4, 2434 }; 2435 static const uint16_t in1[] = { 2436 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2437 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2438 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2439 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2440 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2441 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2442 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2443 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2444 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2445 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2446 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2447 0xcb, 0x82 2448 }; 2449 static const uint8_t out1[] = { 2450 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2451 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2452 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2453 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2454 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2455 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2456 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2457 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2458 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2459 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2460 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2461 0xcb, 0x82 2462 }; 2463 static const uint16_t in2[]={ 2464 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2465 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2466 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2467 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2468 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2469 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2470 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2471 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2472 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2473 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2474 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2475 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2476 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2477 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2478 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2479 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2480 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2481 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2482 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2483 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2484 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2485 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2486 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2487 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2488 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2489 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2490 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2491 0x37, 0x20, 0x2A, 0x2F, 2492 }; 2493 static const unsigned char out2[]={ 2494 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2495 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2496 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2497 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2498 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2499 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2500 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2501 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2502 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2503 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2504 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2505 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2506 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2507 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2508 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2509 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2510 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2511 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2512 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2513 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2514 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2515 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2516 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2517 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2518 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2519 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2520 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2521 0x37, 0x20, 0x2A, 0x2F, 2522 }; 2523 const char *source=(const char *)in; 2524 const char *limit=(const char *)in+sizeof(in); 2525 2526 UErrorCode errorCode=U_ZERO_ERROR; 2527 UConverter *cnv=ucnv_open("LATIN_1", &errorCode); 2528 if(U_FAILURE(errorCode)) { 2529 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode)); 2530 return; 2531 } 2532 TestNextUChar(cnv, source, limit, results, "LATIN_1"); 2533 /* Test the condition when source >= sourceLimit */ 2534 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2535 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1)); 2536 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2)); 2537 2538 ucnv_close(cnv); 2539 } 2540 2541 static void 2542 TestSBCS() { 2543 /* test input */ 2544 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; 2545 /* expected test results */ 2546 static const int32_t results[]={ 2547 /* number of bytes read, code point */ 2548 1, 0x61, 2549 1, 0xbf, 2550 1, 0xc4, 2551 1, 0x2021, 2552 1, 0xf8ff, 2553 1, 0x00d9 2554 }; 2555 2556 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2557 UErrorCode errorCode=U_ZERO_ERROR; 2558 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); 2559 if(U_FAILURE(errorCode)) { 2560 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode)); 2561 return; 2562 } 2563 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); 2564 /* Test the condition when source >= sourceLimit */ 2565 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2566 /*Test for Illegal character */ /* 2567 { 2568 static const uint8_t input1[]={ 0xA1 }; 2569 const char* illegalsource=(const char*)input1; 2570 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte"); 2571 } 2572 */ 2573 ucnv_close(cnv); 2574 } 2575 2576 static void 2577 TestDBCS() { 2578 /* test input */ 2579 static const uint8_t in[]={ 2580 0x44, 0x6a, 2581 0xc4, 0x9c, 2582 0x7a, 0x74, 2583 0x46, 0xab, 2584 0x42, 0x5b, 2585 2586 }; 2587 2588 /* expected test results */ 2589 static const int32_t results[]={ 2590 /* number of bytes read, code point */ 2591 2, 0x00a7, 2592 2, 0xe1d2, 2593 2, 0x6962, 2594 2, 0xf842, 2595 2, 0xffe5, 2596 }; 2597 2598 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2599 UErrorCode errorCode=U_ZERO_ERROR; 2600 2601 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); 2602 if(U_FAILURE(errorCode)) { 2603 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode)); 2604 return; 2605 } 2606 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); 2607 /* Test the condition when source >= sourceLimit */ 2608 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2609 /*Test for the condition where there is an invalid character*/ 2610 { 2611 static const uint8_t source2[]={0x1a, 0x1b}; 2612 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2613 } 2614 /*Test for the condition where we have a truncated char*/ 2615 { 2616 static const uint8_t source1[]={0xc4}; 2617 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2618 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2619 } 2620 ucnv_close(cnv); 2621 } 2622 2623 static void 2624 TestMBCS() { 2625 /* test input */ 2626 static const uint8_t in[]={ 2627 0x01, 2628 0xa6, 0xa3, 2629 0x00, 2630 0xa6, 0xa1, 2631 0x08, 2632 0xc2, 0x76, 2633 0xc2, 0x78, 2634 2635 }; 2636 2637 /* expected test results */ 2638 static const int32_t results[]={ 2639 /* number of bytes read, code point */ 2640 1, 0x0001, 2641 2, 0x250c, 2642 1, 0x0000, 2643 2, 0x2500, 2644 1, 0x0008, 2645 2, 0xd60c, 2646 2, 0xd60e, 2647 }; 2648 2649 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2650 UErrorCode errorCode=U_ZERO_ERROR; 2651 2652 UConverter *cnv=ucnv_open("ibm-1363", &errorCode); 2653 if(U_FAILURE(errorCode)) { 2654 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode)); 2655 return; 2656 } 2657 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); 2658 /* Test the condition when source >= sourceLimit */ 2659 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2660 /*Test for the condition where there is an invalid character*/ 2661 { 2662 static const uint8_t source2[]={0xa1, 0x80}; 2663 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2664 } 2665 /*Test for the condition where we have a truncated char*/ 2666 { 2667 static const uint8_t source1[]={0xc4}; 2668 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2669 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2670 } 2671 ucnv_close(cnv); 2672 2673 } 2674 2675 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 2676 static void 2677 TestICCRunout() { 2678 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */ 2679 2680 const char *cnvName = "ibm-1363"; 2681 UErrorCode status = U_ZERO_ERROR; 2682 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 }; 2683 /* UChar expectUData[] = { 0x00a1, 0x001a }; */ 2684 const char *source = sourceData; 2685 const char *sourceLim = sourceData+sizeof(sourceData); 2686 UChar c1, c2, c3; 2687 UConverter *cnv=ucnv_open(cnvName, &status); 2688 if(U_FAILURE(status)) { 2689 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status)); 2690 return; 2691 } 2692 2693 #if 0 2694 { 2695 UChar targetBuf[256]; 2696 UChar *target = targetBuf; 2697 UChar *targetLim = target+256; 2698 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status); 2699 2700 log_info("After convert: target@%d, source@%d, status%s\n", 2701 target-targetBuf, source-sourceData, u_errorName(status)); 2702 2703 if(U_FAILURE(status)) { 2704 log_err("Failed to convert: %s\n", u_errorName(status)); 2705 } else { 2706 2707 } 2708 } 2709 #endif 2710 2711 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2712 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status)); 2713 2714 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2715 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status)); 2716 2717 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2718 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status)); 2719 2720 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { 2721 log_verbose("OK\n"); 2722 } else { 2723 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n"); 2724 } 2725 2726 ucnv_close(cnv); 2727 2728 } 2729 #endif 2730 2731 #ifdef U_ENABLE_GENERIC_ISO_2022 2732 2733 static void 2734 TestISO_2022() { 2735 /* test input */ 2736 static const uint8_t in[]={ 2737 0x1b, 0x25, 0x42, 2738 0x31, 2739 0x32, 2740 0x61, 2741 0xc2, 0x80, 2742 0xe0, 0xa0, 0x80, 2743 0xf0, 0x90, 0x80, 0x80 2744 }; 2745 2746 2747 2748 /* expected test results */ 2749 static const int32_t results[]={ 2750 /* number of bytes read, code point */ 2751 4, 0x0031, /* 4 bytes including the escape sequence */ 2752 1, 0x0032, 2753 1, 0x61, 2754 2, 0x80, 2755 3, 0x800, 2756 4, 0x10000 2757 }; 2758 2759 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2760 UErrorCode errorCode=U_ZERO_ERROR; 2761 UConverter *cnv; 2762 2763 cnv=ucnv_open("ISO_2022", &errorCode); 2764 if(U_FAILURE(errorCode)) { 2765 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 2766 return; 2767 } 2768 TestNextUChar(cnv, source, limit, results, "ISO_2022"); 2769 2770 /* Test the condition when source >= sourceLimit */ 2771 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source"); 2772 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2773 /*Test for the condition where we have a truncated char*/ 2774 { 2775 static const uint8_t source1[]={0xc4}; 2776 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2777 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2778 } 2779 /*Test for the condition where there is an invalid character*/ 2780 { 2781 static const uint8_t source2[]={0xa1, 0x01}; 2782 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); 2783 } 2784 ucnv_close(cnv); 2785 } 2786 2787 #endif 2788 2789 static void 2790 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2791 const UChar* uSource; 2792 const UChar* uSourceLimit; 2793 const char* cSource; 2794 const char* cSourceLimit; 2795 UChar *uTargetLimit =NULL; 2796 UChar *uTarget; 2797 char *cTarget; 2798 const char *cTargetLimit; 2799 char *cBuf; 2800 UChar *uBuf; /*,*test;*/ 2801 int32_t uBufSize = 120; 2802 int len=0; 2803 int i=2; 2804 UErrorCode errorCode=U_ZERO_ERROR; 2805 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2806 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2807 ucnv_reset(cnv); 2808 for(;--i>0; ){ 2809 uSource = (UChar*) source; 2810 uSourceLimit=(const UChar*)sourceLimit; 2811 cTarget = cBuf; 2812 uTarget = uBuf; 2813 cSource = cBuf; 2814 cTargetLimit = cBuf; 2815 uTargetLimit = uBuf; 2816 2817 do{ 2818 2819 cTargetLimit = cTargetLimit+ i; 2820 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2821 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2822 errorCode=U_ZERO_ERROR; 2823 continue; 2824 } 2825 2826 if(U_FAILURE(errorCode)){ 2827 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2828 return; 2829 } 2830 2831 }while (uSource<uSourceLimit); 2832 2833 cSourceLimit =cTarget; 2834 do{ 2835 uTargetLimit=uTargetLimit+i; 2836 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2837 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2838 errorCode=U_ZERO_ERROR; 2839 continue; 2840 } 2841 if(U_FAILURE(errorCode)){ 2842 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2843 return; 2844 } 2845 }while(cSource<cSourceLimit); 2846 2847 uSource = source; 2848 /*test =uBuf;*/ 2849 for(len=0;len<(int)(source - sourceLimit);len++){ 2850 if(uBuf[len]!=uSource[len]){ 2851 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2852 } 2853 } 2854 } 2855 free(uBuf); 2856 free(cBuf); 2857 } 2858 /* Test for Jitterbug 778 */ 2859 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2860 const UChar* uSource; 2861 const UChar* uSourceLimit; 2862 const char* cSource; 2863 UChar *uTargetLimit =NULL; 2864 UChar *uTarget; 2865 char *cTarget; 2866 const char *cTargetLimit; 2867 char *cBuf; 2868 UChar *uBuf,*test; 2869 int32_t uBufSize = 120; 2870 int numCharsInTarget=0; 2871 UErrorCode errorCode=U_ZERO_ERROR; 2872 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2873 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 2874 uSource = source; 2875 uSourceLimit=sourceLimit; 2876 cTarget = cBuf; 2877 cTargetLimit = cBuf +uBufSize*5; 2878 uTarget = uBuf; 2879 uTargetLimit = uBuf+ uBufSize*5; 2880 ucnv_reset(cnv); 2881 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); 2882 if(U_FAILURE(errorCode)){ 2883 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2884 return; 2885 } 2886 cSource = cBuf; 2887 test =uBuf; 2888 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode); 2889 if(U_FAILURE(errorCode)){ 2890 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode)); 2891 return; 2892 } 2893 uSource = source; 2894 while(uSource<uSourceLimit){ 2895 if(*test!=*uSource){ 2896 2897 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 2898 } 2899 uSource++; 2900 test++; 2901 } 2902 free(uBuf); 2903 free(cBuf); 2904 } 2905 2906 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2907 const UChar* uSource; 2908 const UChar* uSourceLimit; 2909 const char* cSource; 2910 const char* cSourceLimit; 2911 UChar *uTargetLimit =NULL; 2912 UChar *uTarget; 2913 char *cTarget; 2914 const char *cTargetLimit; 2915 char *cBuf; 2916 UChar *uBuf; /*,*test;*/ 2917 int32_t uBufSize = 120; 2918 int len=0; 2919 int i=2; 2920 const UChar *temp = sourceLimit; 2921 UErrorCode errorCode=U_ZERO_ERROR; 2922 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2923 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2924 2925 ucnv_reset(cnv); 2926 for(;--i>0;){ 2927 uSource = (UChar*) source; 2928 cTarget = cBuf; 2929 uTarget = uBuf; 2930 cSource = cBuf; 2931 cTargetLimit = cBuf; 2932 uTargetLimit = uBuf+uBufSize*5; 2933 cTargetLimit = cTargetLimit+uBufSize*10; 2934 uSourceLimit=uSource; 2935 do{ 2936 2937 if (uSourceLimit < sourceLimit) { 2938 uSourceLimit = uSourceLimit+1; 2939 } 2940 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2941 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2942 errorCode=U_ZERO_ERROR; 2943 continue; 2944 } 2945 2946 if(U_FAILURE(errorCode)){ 2947 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2948 return; 2949 } 2950 2951 }while (uSource<temp); 2952 2953 cSourceLimit =cBuf; 2954 do{ 2955 if (cSourceLimit < cBuf + (cTarget - cBuf)) { 2956 cSourceLimit = cSourceLimit+1; 2957 } 2958 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2959 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2960 errorCode=U_ZERO_ERROR; 2961 continue; 2962 } 2963 if(U_FAILURE(errorCode)){ 2964 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2965 return; 2966 } 2967 }while(cSource<cTarget); 2968 2969 uSource = source; 2970 /*test =uBuf;*/ 2971 for(;len<(int)(source - sourceLimit);len++){ 2972 if(uBuf[len]!=uSource[len]){ 2973 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2974 } 2975 } 2976 } 2977 free(uBuf); 2978 free(cBuf); 2979 } 2980 static void 2981 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, 2982 const uint16_t results[], const char* message){ 2983 /* const char* s0; */ 2984 const char* s=(char*)source; 2985 const uint16_t *r=results; 2986 UErrorCode errorCode=U_ZERO_ERROR; 2987 uint32_t c,exC; 2988 ucnv_reset(cnv); 2989 while(s<limit) { 2990 /* s0=s; */ 2991 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 2992 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 2993 break; /* no more significant input */ 2994 } else if(U_FAILURE(errorCode)) { 2995 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 2996 break; 2997 } else { 2998 if(U16_IS_LEAD(*r)){ 2999 int i =0, len = 2; 3000 U16_NEXT(r, i, len, exC); 3001 r++; 3002 }else{ 3003 exC = *r; 3004 } 3005 if(c!=(uint32_t)(exC)) 3006 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c); 3007 } 3008 r++; 3009 } 3010 } 3011 3012 static int TestJitterbug930(const char* enc){ 3013 UErrorCode err = U_ZERO_ERROR; 3014 UConverter*converter; 3015 char out[80]; 3016 char*target = out; 3017 UChar in[4]; 3018 const UChar*source = in; 3019 int32_t off[80]; 3020 int32_t* offsets = off; 3021 int numOffWritten=0; 3022 UBool flush = 0; 3023 converter = my_ucnv_open(enc, &err); 3024 3025 in[0] = 0x41; /* 0x4E00;*/ 3026 in[1] = 0x4E01; 3027 in[2] = 0x4E02; 3028 in[3] = 0x4E03; 3029 3030 memset(off, '*', sizeof(off)); 3031 3032 ucnv_fromUnicode (converter, 3033 &target, 3034 target+2, 3035 &source, 3036 source+3, 3037 offsets, 3038 flush, 3039 &err); 3040 3041 /* writes three bytes into the output buffer: 41 1B 24 3042 * but offsets contains 0 1 1 3043 */ 3044 while(*offsets< off[10]){ 3045 numOffWritten++; 3046 offsets++; 3047 } 3048 log_verbose("Testing Jitterbug 930 for encoding %s",enc); 3049 if(numOffWritten!= (int)(target-out)){ 3050 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten); 3051 } 3052 3053 err = U_ZERO_ERROR; 3054 3055 memset(off,'*' , sizeof(off)); 3056 3057 flush = 1; 3058 offsets=off; 3059 ucnv_fromUnicode (converter, 3060 &target, 3061 target+4, 3062 &source, 3063 source, 3064 offsets, 3065 flush, 3066 &err); 3067 numOffWritten=0; 3068 while(*offsets< off[10]){ 3069 numOffWritten++; 3070 if(*offsets!= -1){ 3071 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ; 3072 } 3073 offsets++; 3074 } 3075 3076 /* writes 42 43 7A into output buffer, 3077 * offsets contains -1 -1 -1 3078 */ 3079 ucnv_close(converter); 3080 return 0; 3081 } 3082 3083 static void 3084 TestHZ() { 3085 /* test input */ 3086 static const uint16_t in[]={ 3087 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 3088 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 3089 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 3090 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 3091 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 3092 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 3093 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 3094 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 3095 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 3096 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 3097 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 3098 0x005A, 0x005B, 0x005C, 0x000A 3099 }; 3100 const UChar* uSource; 3101 const UChar* uSourceLimit; 3102 const char* cSource; 3103 const char* cSourceLimit; 3104 UChar *uTargetLimit =NULL; 3105 UChar *uTarget; 3106 char *cTarget; 3107 const char *cTargetLimit; 3108 char *cBuf; 3109 UChar *uBuf,*test; 3110 int32_t uBufSize = 120; 3111 UErrorCode errorCode=U_ZERO_ERROR; 3112 UConverter *cnv; 3113 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3114 int32_t* myOff= offsets; 3115 cnv=ucnv_open("HZ", &errorCode); 3116 if(U_FAILURE(errorCode)) { 3117 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)); 3118 return; 3119 } 3120 3121 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3122 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3123 uSource = (const UChar*)in; 3124 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3125 cTarget = cBuf; 3126 cTargetLimit = cBuf +uBufSize*5; 3127 uTarget = uBuf; 3128 uTargetLimit = uBuf+ uBufSize*5; 3129 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3130 if(U_FAILURE(errorCode)){ 3131 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3132 return; 3133 } 3134 cSource = cBuf; 3135 cSourceLimit =cTarget; 3136 test =uBuf; 3137 myOff=offsets; 3138 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3139 if(U_FAILURE(errorCode)){ 3140 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3141 return; 3142 } 3143 uSource = (const UChar*)in; 3144 while(uSource<uSourceLimit){ 3145 if(*test!=*uSource){ 3146 3147 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3148 } 3149 uSource++; 3150 test++; 3151 } 3152 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); 3153 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3154 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3155 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3156 TestJitterbug930("csISO2022JP"); 3157 ucnv_close(cnv); 3158 free(offsets); 3159 free(uBuf); 3160 free(cBuf); 3161 } 3162 3163 static void 3164 TestISCII(){ 3165 /* test input */ 3166 static const uint16_t in[]={ 3167 /* test full range of Devanagari */ 3168 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, 3169 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, 3170 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, 3171 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, 3172 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, 3173 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, 3174 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, 3175 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, 3176 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, 3177 0x096D,0x096E,0x096F, 3178 /* test Soft halant*/ 3179 0x0915,0x094d, 0x200D, 3180 /* test explicit halant */ 3181 0x0915,0x094d, 0x200c, 3182 /* test double danda */ 3183 0x965, 3184 /* test ASCII */ 3185 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3186 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3187 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3188 /* tests from Lotus */ 3189 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, 3190 0x0930,0x094D,0x200D, 3191 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, 3192 0x0915,0x0921,0x002B,0x095F, 3193 /* tamil range */ 3194 0x0B86, 0xB87, 0xB88, 3195 /* telugu range */ 3196 0x0C05, 0x0C02, 0x0C03,0x0c31, 3197 /* kannada range */ 3198 0x0C85, 0xC82, 0x0C83, 3199 /* test Abbr sign and Anudatta */ 3200 0x0970, 0x952, 3201 /* 0x0958, 3202 0x0959, 3203 0x095A, 3204 0x095B, 3205 0x095C, 3206 0x095D, 3207 0x095E, 3208 0x095F,*/ 3209 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, 3210 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, 3211 0x090C , 3212 0x0962, 3213 0x0961 /* Vocallic LL 0xa6, 0xE9 */, 3214 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, 3215 0x0950 /* OM Symbol 0xa1, 0xE9,*/, 3216 0x093D /* Avagraha 0xEA, 0xE9*/, 3217 0x0958, 3218 0x0959, 3219 0x095A, 3220 0x095B, 3221 0x095C, 3222 0x095D, 3223 0x095E, 3224 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 3225 }; 3226 static const unsigned char byteArr[]={ 3227 3228 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, 3229 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, 3230 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, 3231 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, 3232 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, 3233 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 3234 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, 3235 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, 3236 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, 3237 0xf8,0xf9,0xfa, 3238 /* test soft halant */ 3239 0xb3, 0xE8, 0xE9, 3240 /* test explicit halant */ 3241 0xb3, 0xE8, 0xE8, 3242 /* test double danda */ 3243 0xea, 0xea, 3244 /* test ASCII */ 3245 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3246 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3247 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3248 /* test ATR code */ 3249 3250 /* tests from Lotus */ 3251 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, 3252 0xEF,0x42,0xCF,0xE8,0xD9, 3253 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, 3254 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, 3255 /* tamil range */ 3256 0xEF, 0x44, 0xa5, 0xa6, 0xa7, 3257 /* telugu range */ 3258 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, 3259 /* kannada range */ 3260 0xEF, 0x48,0xa4, 0xa2, 0xa3, 3261 /* anudatta and abbreviation sign */ 3262 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, 3263 3264 3265 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ 3266 3267 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ 3268 3269 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ 3270 3271 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ 3272 3273 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ 3274 3275 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ 3276 3277 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ 3278 3279 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ 3280 3281 0xB3, 0xE9, /* Ka + NUKTA */ 3282 3283 0xB4, 0xE9, /* Kha + NUKTA */ 3284 3285 0xB5, 0xE9, /* Ga + NUKTA */ 3286 3287 0xBA, 0xE9, 3288 3289 0xBF, 0xE9, 3290 3291 0xC0, 0xE9, 3292 3293 0xC9, 0xE9, 3294 /* INV halant RA */ 3295 0xD9, 0xE8, 0xCF, 3296 0x00, 0x00A0, 3297 /* just consume unhandled codepoints */ 3298 0xEF, 0x30, 3299 3300 }; 3301 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE); 3302 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr)); 3303 3304 } 3305 3306 static void 3307 TestISO_2022_JP() { 3308 /* test input */ 3309 static const uint16_t in[]={ 3310 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, 3311 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3312 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3313 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3314 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3315 0x201D, 0x3014, 0x000D, 0x000A, 3316 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3317 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3318 }; 3319 const UChar* uSource; 3320 const UChar* uSourceLimit; 3321 const char* cSource; 3322 const char* cSourceLimit; 3323 UChar *uTargetLimit =NULL; 3324 UChar *uTarget; 3325 char *cTarget; 3326 const char *cTargetLimit; 3327 char *cBuf; 3328 UChar *uBuf,*test; 3329 int32_t uBufSize = 120; 3330 UErrorCode errorCode=U_ZERO_ERROR; 3331 UConverter *cnv; 3332 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3333 int32_t* myOff= offsets; 3334 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3335 if(U_FAILURE(errorCode)) { 3336 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode)); 3337 return; 3338 } 3339 3340 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3341 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3342 uSource = (const UChar*)in; 3343 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3344 cTarget = cBuf; 3345 cTargetLimit = cBuf +uBufSize*5; 3346 uTarget = uBuf; 3347 uTargetLimit = uBuf+ uBufSize*5; 3348 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3349 if(U_FAILURE(errorCode)){ 3350 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3351 return; 3352 } 3353 cSource = cBuf; 3354 cSourceLimit =cTarget; 3355 test =uBuf; 3356 myOff=offsets; 3357 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3358 if(U_FAILURE(errorCode)){ 3359 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3360 return; 3361 } 3362 3363 uSource = (const UChar*)in; 3364 while(uSource<uSourceLimit){ 3365 if(*test!=*uSource){ 3366 3367 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3368 } 3369 uSource++; 3370 test++; 3371 } 3372 3373 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3374 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3375 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); 3376 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3377 TestJitterbug930("csISO2022JP"); 3378 ucnv_close(cnv); 3379 free(uBuf); 3380 free(cBuf); 3381 free(offsets); 3382 } 3383 3384 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){ 3385 const UChar* uSource; 3386 const UChar* uSourceLimit; 3387 const char* cSource; 3388 const char* cSourceLimit; 3389 UChar *uTargetLimit =NULL; 3390 UChar *uTarget; 3391 char *cTarget; 3392 const char *cTargetLimit; 3393 char *cBuf; 3394 UChar *uBuf,*test; 3395 int32_t uBufSize = 120*10; 3396 UErrorCode errorCode=U_ZERO_ERROR; 3397 UConverter *cnv; 3398 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); 3399 int32_t* myOff= offsets; 3400 cnv=my_ucnv_open(conv, &errorCode); 3401 if(U_FAILURE(errorCode)) { 3402 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode)); 3403 return; 3404 } 3405 3406 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); 3407 cBuf =(char*)malloc(uBufSize * sizeof(char)); 3408 uSource = (const UChar*)in; 3409 uSourceLimit=uSource+len; 3410 cTarget = cBuf; 3411 cTargetLimit = cBuf +uBufSize; 3412 uTarget = uBuf; 3413 uTargetLimit = uBuf+ uBufSize; 3414 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3415 if(U_FAILURE(errorCode)){ 3416 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3417 return; 3418 } 3419 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/ 3420 cSource = cBuf; 3421 cSourceLimit =cTarget; 3422 test =uBuf; 3423 myOff=offsets; 3424 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3425 if(U_FAILURE(errorCode)){ 3426 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode)); 3427 return; 3428 } 3429 3430 uSource = (const UChar*)in; 3431 while(uSource<uSourceLimit){ 3432 if(*test!=*uSource){ 3433 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ; 3434 } 3435 uSource++; 3436 test++; 3437 } 3438 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); 3439 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); 3440 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); 3441 if(byteArr && byteArrLen!=0){ 3442 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); 3443 TestToAndFromUChars(in,(const UChar*)&in[len],cnv); 3444 { 3445 cSource = byteArr; 3446 cSourceLimit = cSource+byteArrLen; 3447 test=uBuf; 3448 myOff = offsets; 3449 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3450 if(U_FAILURE(errorCode)){ 3451 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3452 return; 3453 } 3454 3455 uSource = (const UChar*)in; 3456 while(uSource<uSourceLimit){ 3457 if(*test!=*uSource){ 3458 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3459 } 3460 uSource++; 3461 test++; 3462 } 3463 } 3464 } 3465 3466 ucnv_close(cnv); 3467 free(uBuf); 3468 free(cBuf); 3469 free(offsets); 3470 } 3471 static UChar U_CALLCONV 3472 _charAt(int32_t offset, void *context) { 3473 return ((char*)context)[offset]; 3474 } 3475 3476 static int32_t 3477 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){ 3478 int32_t srcIndex=0; 3479 int32_t dstIndex=0; 3480 if(U_FAILURE(*status)){ 3481 return 0; 3482 } 3483 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ 3484 *status = U_ILLEGAL_ARGUMENT_ERROR; 3485 return 0; 3486 } 3487 if(srcLen==-1){ 3488 srcLen = (int32_t)uprv_strlen(src); 3489 } 3490 3491 for (; srcIndex<srcLen; ) { 3492 UChar32 c = src[srcIndex++]; 3493 if (c == 0x005C /*'\\'*/) { 3494 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/ 3495 if (c == (UChar32)0xFFFFFFFF) { 3496 *status=U_INVALID_CHAR_FOUND; /* return empty string */ 3497 break; /* invalid escape sequence */ 3498 } 3499 } 3500 if(dstIndex < dstLen){ 3501 if(c>0xFFFF){ 3502 dst[dstIndex++] = U16_LEAD(c); 3503 if(dstIndex<dstLen){ 3504 dst[dstIndex]=U16_TRAIL(c); 3505 }else{ 3506 *status=U_BUFFER_OVERFLOW_ERROR; 3507 } 3508 }else{ 3509 dst[dstIndex]=(UChar)c; 3510 } 3511 3512 }else{ 3513 *status = U_BUFFER_OVERFLOW_ERROR; 3514 } 3515 dstIndex++; /* for preflighting */ 3516 } 3517 return dstIndex; 3518 } 3519 3520 static void 3521 TestFullRoundtrip(const char* cp){ 3522 UChar usource[10] ={0}; 3523 UChar nsrc[10] = {0}; 3524 uint32_t i=1; 3525 int len=0, ulen; 3526 nsrc[0]=0x0061; 3527 /* Test codepoint 0 */ 3528 TestConv(usource,1,cp,"",NULL,0); 3529 TestConv(usource,2,cp,"",NULL,0); 3530 nsrc[2]=0x5555; 3531 TestConv(nsrc,3,cp,"",NULL,0); 3532 3533 for(;i<=0x10FFFF;i++){ 3534 if(i==0xD800){ 3535 i=0xDFFF; 3536 continue; 3537 } 3538 if(i<=0xFFFF){ 3539 usource[0] =(UChar) i; 3540 len=1; 3541 }else{ 3542 usource[0]=U16_LEAD(i); 3543 usource[1]=U16_TRAIL(i); 3544 len=2; 3545 } 3546 ulen=len; 3547 if(i==0x80) { 3548 usource[2]=0; 3549 } 3550 /* Test only single code points */ 3551 TestConv(usource,ulen,cp,"",NULL,0); 3552 /* Test codepoint repeated twice */ 3553 usource[ulen]=usource[0]; 3554 usource[ulen+1]=usource[1]; 3555 ulen+=len; 3556 TestConv(usource,ulen,cp,"",NULL,0); 3557 /* Test codepoint repeated 3 times */ 3558 usource[ulen]=usource[0]; 3559 usource[ulen+1]=usource[1]; 3560 ulen+=len; 3561 TestConv(usource,ulen,cp,"",NULL,0); 3562 /* Test codepoint in between 2 codepoints */ 3563 nsrc[1]=usource[0]; 3564 nsrc[2]=usource[1]; 3565 nsrc[len+1]=0x5555; 3566 TestConv(nsrc,len+2,cp,"",NULL,0); 3567 uprv_memset(usource,0,sizeof(UChar)*10); 3568 } 3569 } 3570 3571 static void 3572 TestRoundTrippingAllUTF(void){ 3573 if(!getTestOption(QUICK_OPTION)){ 3574 log_verbose("Running exhaustive round trip test for BOCU-1\n"); 3575 TestFullRoundtrip("BOCU-1"); 3576 log_verbose("Running exhaustive round trip test for SCSU\n"); 3577 TestFullRoundtrip("SCSU"); 3578 log_verbose("Running exhaustive round trip test for UTF-8\n"); 3579 TestFullRoundtrip("UTF-8"); 3580 log_verbose("Running exhaustive round trip test for CESU-8\n"); 3581 TestFullRoundtrip("CESU-8"); 3582 log_verbose("Running exhaustive round trip test for UTF-16BE\n"); 3583 TestFullRoundtrip("UTF-16BE"); 3584 log_verbose("Running exhaustive round trip test for UTF-16LE\n"); 3585 TestFullRoundtrip("UTF-16LE"); 3586 log_verbose("Running exhaustive round trip test for UTF-16\n"); 3587 TestFullRoundtrip("UTF-16"); 3588 log_verbose("Running exhaustive round trip test for UTF-32BE\n"); 3589 TestFullRoundtrip("UTF-32BE"); 3590 log_verbose("Running exhaustive round trip test for UTF-32LE\n"); 3591 TestFullRoundtrip("UTF-32LE"); 3592 log_verbose("Running exhaustive round trip test for UTF-32\n"); 3593 TestFullRoundtrip("UTF-32"); 3594 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3595 TestFullRoundtrip("UTF-7"); 3596 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3597 TestFullRoundtrip("UTF-7,version=1"); 3598 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"); 3599 TestFullRoundtrip("IMAP-mailbox-name"); 3600 /* 3601 * 3602 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of 3603 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA). 3604 * The old mappings remain as fallbacks. 3605 * This test may be reintroduced at a later time. 3606 * 3607 * 110118 - mow 3608 */ 3609 /* 3610 log_verbose("Running exhaustive round trip test for GB18030\n"); 3611 TestFullRoundtrip("GB18030"); 3612 */ 3613 } 3614 } 3615 3616 static void 3617 TestSCSU() { 3618 3619 static const uint16_t germanUTF16[]={ 3620 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 3621 }; 3622 3623 static const uint8_t germanSCSU[]={ 3624 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 3625 }; 3626 3627 static const uint16_t russianUTF16[]={ 3628 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 3629 }; 3630 3631 static const uint8_t russianSCSU[]={ 3632 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 3633 }; 3634 3635 static const uint16_t japaneseUTF16[]={ 3636 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 3637 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 3638 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 3639 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 3640 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 3641 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 3642 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 3643 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 3644 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 3645 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 3646 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 3647 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 3648 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 3649 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 3650 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 3651 }; 3652 3653 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 3654 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */ 3655 static const uint8_t japaneseSCSU[]={ 3656 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 3657 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 3658 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 3659 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 3660 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 3661 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 3662 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 3663 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 3664 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 3665 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 3666 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 3667 0xcb, 0x82 3668 }; 3669 3670 static const uint16_t allFeaturesUTF16[]={ 3671 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 3672 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 3673 0x01df, 0xf000, 0xdbff, 0xdfff 3674 }; 3675 3676 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter 3677 * result here (34B vs. 35B) 3678 */ 3679 static const uint8_t allFeaturesSCSU[]={ 3680 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, 3681 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, 3682 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, 3683 0xdf, 0x14, 0x80, 0x15, 0xff 3684 }; 3685 static const uint16_t monkeyIn[]={ 3686 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3687 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3688 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3689 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3690 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3691 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3692 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3693 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3694 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3695 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3696 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3697 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3698 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3699 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3700 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3701 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3702 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3703 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3704 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 3705 /* test non-BMP code points */ 3706 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 3707 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 3708 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 3709 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 3710 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 3711 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 3712 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 3713 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 3714 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 3715 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 3716 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 3717 3718 3719 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 3720 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 3721 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 3722 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 3723 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 3724 }; 3725 static const char *fTestCases [] = { 3726 "\\ud800\\udc00", /* smallest surrogate*/ 3727 "\\ud8ff\\udcff", 3728 "\\udBff\\udFff", /* largest surrogate pair*/ 3729 "\\ud834\\udc00", 3730 "\\U0010FFFF", 3731 "Hello \\u9292 \\u9192 World!", 3732 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", 3733 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3734 3735 "\\u0648\\u06c8", /* catch missing reset*/ 3736 "\\u0648\\u06c8", 3737 3738 "\\u4444\\uE001", /* lowest quotable*/ 3739 "\\u4444\\uf2FF", /* highest quotable*/ 3740 "\\u4444\\uf188\\u4444", 3741 "\\u4444\\uf188\\uf288", 3742 "\\u4444\\uf188abc\\u0429\\uf288", 3743 "\\u9292\\u2222", 3744 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", 3745 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3746 "Hello World!123456", 3747 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ 3748 3749 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ 3750 "abc\\u4411d", /* uses SQU*/ 3751 "abc\\u4411\\u4412d",/* uses SCU*/ 3752 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ 3753 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/ 3754 "\\u9292\\u2222", 3755 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", 3756 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c", 3757 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", 3758 3759 "", /* empty input*/ 3760 "\\u0000", /* smallest BMP character*/ 3761 "\\uFFFF", /* largest BMP character*/ 3762 3763 /* regression tests*/ 3764 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", 3765 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff", 3766 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", 3767 "\\u0041\\u00df\\u0401\\u015f", 3768 "\\u9066\\u2123abc", 3769 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5", 3770 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489", 3771 }; 3772 int i=0; 3773 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){ 3774 const char* cSrc = fTestCases[i]; 3775 UErrorCode status = U_ZERO_ERROR; 3776 int32_t cSrcLen,srcLen; 3777 UChar* src; 3778 /* UConverter* cnv = ucnv_open("SCSU",&status); */ 3779 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); 3780 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); 3781 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); 3782 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); 3783 TestConv(src,srcLen,"SCSU","Coverage",NULL,0); 3784 free(src); 3785 } 3786 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3787 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3788 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3789 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3790 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU)); 3791 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU)); 3792 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); 3793 } 3794 3795 #if !UCONFIG_NO_LEGACY_CONVERSION 3796 static void TestJitterbug2346(){ 3797 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, 3798 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; 3799 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; 3800 3801 UChar uTarget[500]={'\0'}; 3802 UChar* utarget=uTarget; 3803 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 3804 3805 char cTarget[500]={'\0'}; 3806 char* ctarget=cTarget; 3807 char* ctargetLimit=cTarget+sizeof(cTarget); 3808 const char* csource=source; 3809 UChar* temp = expected; 3810 UErrorCode err=U_ZERO_ERROR; 3811 3812 UConverter* conv =ucnv_open("ISO_2022_JP",&err); 3813 if(U_FAILURE(err)) { 3814 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 3815 return; 3816 } 3817 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err); 3818 if(U_FAILURE(err)) { 3819 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err)); 3820 return; 3821 } 3822 utargetLimit=utarget; 3823 utarget = uTarget; 3824 while(utarget<utargetLimit){ 3825 if(*temp!=*utarget){ 3826 3827 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ; 3828 } 3829 utarget++; 3830 temp++; 3831 } 3832 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 3833 if(U_FAILURE(err)) { 3834 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err)); 3835 return; 3836 } 3837 ctargetLimit=ctarget; 3838 ctarget =cTarget; 3839 ucnv_close(conv); 3840 3841 3842 } 3843 3844 static void 3845 TestISO_2022_JP_1() { 3846 /* test input */ 3847 static const uint16_t in[]={ 3848 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A, 3849 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3850 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A, 3851 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3852 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3853 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3854 0x201D, 0x000D, 0x000A, 3855 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3856 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A, 3857 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3858 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A, 3859 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A, 3860 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A 3861 }; 3862 const UChar* uSource; 3863 const UChar* uSourceLimit; 3864 const char* cSource; 3865 const char* cSourceLimit; 3866 UChar *uTargetLimit =NULL; 3867 UChar *uTarget; 3868 char *cTarget; 3869 const char *cTargetLimit; 3870 char *cBuf; 3871 UChar *uBuf,*test; 3872 int32_t uBufSize = 120; 3873 UErrorCode errorCode=U_ZERO_ERROR; 3874 UConverter *cnv; 3875 3876 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3877 if(U_FAILURE(errorCode)) { 3878 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3879 return; 3880 } 3881 3882 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3883 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3884 uSource = (const UChar*)in; 3885 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3886 cTarget = cBuf; 3887 cTargetLimit = cBuf +uBufSize*5; 3888 uTarget = uBuf; 3889 uTargetLimit = uBuf+ uBufSize*5; 3890 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode); 3891 if(U_FAILURE(errorCode)){ 3892 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3893 return; 3894 } 3895 cSource = cBuf; 3896 cSourceLimit =cTarget; 3897 test =uBuf; 3898 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode); 3899 if(U_FAILURE(errorCode)){ 3900 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3901 return; 3902 } 3903 uSource = (const UChar*)in; 3904 while(uSource<uSourceLimit){ 3905 if(*test!=*uSource){ 3906 3907 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3908 } 3909 uSource++; 3910 test++; 3911 } 3912 /*ucnv_close(cnv); 3913 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ 3914 /*Test for the condition where there is an invalid character*/ 3915 ucnv_reset(cnv); 3916 { 3917 static const uint8_t source2[]={0x0e,0x24,0x053}; 3918 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); 3919 } 3920 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3921 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3922 ucnv_close(cnv); 3923 free(uBuf); 3924 free(cBuf); 3925 } 3926 3927 static void 3928 TestISO_2022_JP_2() { 3929 /* test input */ 3930 static const uint16_t in[]={ 3931 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3932 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3933 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3934 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3935 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3936 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3937 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3938 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3939 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3940 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3941 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3942 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3943 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3944 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3945 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3946 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3947 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3948 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3949 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A 3950 }; 3951 const UChar* uSource; 3952 const UChar* uSourceLimit; 3953 const char* cSource; 3954 const char* cSourceLimit; 3955 UChar *uTargetLimit =NULL; 3956 UChar *uTarget; 3957 char *cTarget; 3958 const char *cTargetLimit; 3959 char *cBuf; 3960 UChar *uBuf,*test; 3961 int32_t uBufSize = 120; 3962 UErrorCode errorCode=U_ZERO_ERROR; 3963 UConverter *cnv; 3964 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3965 int32_t* myOff= offsets; 3966 cnv=ucnv_open("ISO_2022_JP_2", &errorCode); 3967 if(U_FAILURE(errorCode)) { 3968 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3969 return; 3970 } 3971 3972 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3973 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3974 uSource = (const UChar*)in; 3975 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3976 cTarget = cBuf; 3977 cTargetLimit = cBuf +uBufSize*5; 3978 uTarget = uBuf; 3979 uTargetLimit = uBuf+ uBufSize*5; 3980 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3981 if(U_FAILURE(errorCode)){ 3982 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3983 return; 3984 } 3985 cSource = cBuf; 3986 cSourceLimit =cTarget; 3987 test =uBuf; 3988 myOff=offsets; 3989 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3990 if(U_FAILURE(errorCode)){ 3991 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3992 return; 3993 } 3994 uSource = (const UChar*)in; 3995 while(uSource<uSourceLimit){ 3996 if(*test!=*uSource){ 3997 3998 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3999 } 4000 uSource++; 4001 test++; 4002 } 4003 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4004 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4005 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4006 /*Test for the condition where there is an invalid character*/ 4007 ucnv_reset(cnv); 4008 { 4009 static const uint8_t source2[]={0x0e,0x24,0x053}; 4010 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); 4011 } 4012 ucnv_close(cnv); 4013 free(uBuf); 4014 free(cBuf); 4015 free(offsets); 4016 } 4017 4018 static void 4019 TestISO_2022_KR() { 4020 /* test input */ 4021 static const uint16_t in[]={ 4022 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4023 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4024 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4025 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4026 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4027 ,0x53E3,0x53E4,0x000A,0x000D}; 4028 const UChar* uSource; 4029 const UChar* uSourceLimit; 4030 const char* cSource; 4031 const char* cSourceLimit; 4032 UChar *uTargetLimit =NULL; 4033 UChar *uTarget; 4034 char *cTarget; 4035 const char *cTargetLimit; 4036 char *cBuf; 4037 UChar *uBuf,*test; 4038 int32_t uBufSize = 120; 4039 UErrorCode errorCode=U_ZERO_ERROR; 4040 UConverter *cnv; 4041 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4042 int32_t* myOff= offsets; 4043 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); 4044 if(U_FAILURE(errorCode)) { 4045 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4046 return; 4047 } 4048 4049 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4050 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4051 uSource = (const UChar*)in; 4052 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4053 cTarget = cBuf; 4054 cTargetLimit = cBuf +uBufSize*5; 4055 uTarget = uBuf; 4056 uTargetLimit = uBuf+ uBufSize*5; 4057 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4058 if(U_FAILURE(errorCode)){ 4059 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4060 return; 4061 } 4062 cSource = cBuf; 4063 cSourceLimit =cTarget; 4064 test =uBuf; 4065 myOff=offsets; 4066 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4067 if(U_FAILURE(errorCode)){ 4068 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4069 return; 4070 } 4071 uSource = (const UChar*)in; 4072 while(uSource<uSourceLimit){ 4073 if(*test!=*uSource){ 4074 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4075 } 4076 uSource++; 4077 test++; 4078 } 4079 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4080 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4081 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4082 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4083 TestJitterbug930("csISO2022KR"); 4084 /*Test for the condition where there is an invalid character*/ 4085 ucnv_reset(cnv); 4086 { 4087 static const uint8_t source2[]={0x1b,0x24,0x053}; 4088 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4089 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4090 } 4091 ucnv_close(cnv); 4092 free(uBuf); 4093 free(cBuf); 4094 free(offsets); 4095 } 4096 4097 static void 4098 TestISO_2022_KR_1() { 4099 /* test input */ 4100 static const uint16_t in[]={ 4101 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4102 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4103 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4104 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4105 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4106 ,0x53E3,0x53E4,0x000A,0x000D}; 4107 const UChar* uSource; 4108 const UChar* uSourceLimit; 4109 const char* cSource; 4110 const char* cSourceLimit; 4111 UChar *uTargetLimit =NULL; 4112 UChar *uTarget; 4113 char *cTarget; 4114 const char *cTargetLimit; 4115 char *cBuf; 4116 UChar *uBuf,*test; 4117 int32_t uBufSize = 120; 4118 UErrorCode errorCode=U_ZERO_ERROR; 4119 UConverter *cnv; 4120 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4121 int32_t* myOff= offsets; 4122 cnv=ucnv_open("ibm-25546", &errorCode); 4123 if(U_FAILURE(errorCode)) { 4124 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4125 return; 4126 } 4127 4128 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4129 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4130 uSource = (const UChar*)in; 4131 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4132 cTarget = cBuf; 4133 cTargetLimit = cBuf +uBufSize*5; 4134 uTarget = uBuf; 4135 uTargetLimit = uBuf+ uBufSize*5; 4136 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4137 if(U_FAILURE(errorCode)){ 4138 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4139 return; 4140 } 4141 cSource = cBuf; 4142 cSourceLimit =cTarget; 4143 test =uBuf; 4144 myOff=offsets; 4145 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4146 if(U_FAILURE(errorCode)){ 4147 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4148 return; 4149 } 4150 uSource = (const UChar*)in; 4151 while(uSource<uSourceLimit){ 4152 if(*test!=*uSource){ 4153 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4154 } 4155 uSource++; 4156 test++; 4157 } 4158 ucnv_reset(cnv); 4159 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4160 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4161 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4162 ucnv_reset(cnv); 4163 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4164 /*Test for the condition where there is an invalid character*/ 4165 ucnv_reset(cnv); 4166 { 4167 static const uint8_t source2[]={0x1b,0x24,0x053}; 4168 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4169 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4170 } 4171 ucnv_close(cnv); 4172 free(uBuf); 4173 free(cBuf); 4174 free(offsets); 4175 } 4176 4177 static void TestJitterbug2411(){ 4178 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" 4179 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; 4180 UConverter* kr=NULL, *kr1=NULL; 4181 UErrorCode errorCode = U_ZERO_ERROR; 4182 UChar tgt[100]={'\0'}; 4183 UChar* target = tgt; 4184 UChar* targetLimit = target+100; 4185 kr=ucnv_open("iso-2022-kr", &errorCode); 4186 if(U_FAILURE(errorCode)) { 4187 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode)); 4188 return; 4189 } 4190 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4191 if(U_FAILURE(errorCode)) { 4192 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4193 return; 4194 } 4195 kr1 = ucnv_open("ibm-25546", &errorCode); 4196 if(U_FAILURE(errorCode)) { 4197 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode)); 4198 return; 4199 } 4200 target = tgt; 4201 targetLimit = target+100; 4202 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4203 4204 if(U_FAILURE(errorCode)) { 4205 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4206 return; 4207 } 4208 4209 ucnv_close(kr); 4210 ucnv_close(kr1); 4211 4212 } 4213 4214 static void 4215 TestJIS(){ 4216 /* From Unicode moved to testdata/conversion.txt */ 4217 /*To Unicode*/ 4218 { 4219 static const uint8_t sampleTextJIS[] = { 4220 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ 4221 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4222 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4223 }; 4224 static const uint16_t expectedISO2022JIS[] = { 4225 0x0041, 0x0042, 4226 0xFF81, 0xFF82, 4227 0x3000 4228 }; 4229 static const int32_t toISO2022JISOffs[]={ 4230 3,4, 4231 8,9, 4232 16 4233 }; 4234 4235 static const uint8_t sampleTextJIS7[] = { 4236 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ 4237 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4238 0x1b,0x24,0x42,0x21,0x21, 4239 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ 4240 0x21,0x22, 4241 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4242 }; 4243 static const uint16_t expectedISO2022JIS7[] = { 4244 0x0041, 0x0042, 4245 0xFF81, 0xFF82, 4246 0x3000, 4247 0xFF81, 0xFF82, 4248 0x3001, 4249 0x3000 4250 }; 4251 static const int32_t toISO2022JIS7Offs[]={ 4252 3,4, 4253 8,9, 4254 13,16, 4255 17, 4256 19,27 4257 }; 4258 static const uint8_t sampleTextJIS8[] = { 4259 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ 4260 0xa1,0xc8,0xd9,/*Katakana Set*/ 4261 0x1b,0x28,0x42, 4262 0x41,0x42, 4263 0xb1,0xc3, /*Katakana Set*/ 4264 0x1b,0x24,0x42,0x21,0x21 4265 }; 4266 static const uint16_t expectedISO2022JIS8[] = { 4267 0x0041, 0x0042, 4268 0xff61, 0xff88, 0xff99, 4269 0x0041, 0x0042, 4270 0xff71, 0xff83, 4271 0x3000 4272 }; 4273 static const int32_t toISO2022JIS8Offs[]={ 4274 3, 4, 5, 6, 4275 7, 11, 12, 13, 4276 14, 18, 4277 }; 4278 4279 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, 4280 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE); 4281 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, 4282 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE); 4283 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, 4284 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE); 4285 } 4286 4287 } 4288 4289 4290 #if 0 4291 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 4292 4293 static void TestJitterbug915(){ 4294 /* tests for roundtripping of the below sequence 4295 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / 4296 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / 4297 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / 4298 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / 4299 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / 4300 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / 4301 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / 4302 */ 4303 static const char cSource[]={ 4304 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 4305 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 4306 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 4307 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 4308 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 4309 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 4310 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, 4311 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 4312 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 4313 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 4314 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 4315 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 4316 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 4317 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 4318 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4319 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4320 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 4321 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 4322 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4323 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4324 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 4325 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 4326 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 4327 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 4328 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 4329 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 4330 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 4331 0x37, 0x20, 0x2A, 0x2F 4332 }; 4333 UChar uTarget[500]={'\0'}; 4334 UChar* utarget=uTarget; 4335 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 4336 4337 char cTarget[500]={'\0'}; 4338 char* ctarget=cTarget; 4339 char* ctargetLimit=cTarget+sizeof(cTarget); 4340 const char* csource=cSource; 4341 const char* tempSrc = cSource; 4342 UErrorCode err=U_ZERO_ERROR; 4343 4344 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); 4345 if(U_FAILURE(err)) { 4346 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 4347 return; 4348 } 4349 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err); 4350 if(U_FAILURE(err)) { 4351 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err)); 4352 return; 4353 } 4354 utargetLimit=utarget; 4355 utarget = uTarget; 4356 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 4357 if(U_FAILURE(err)) { 4358 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err)); 4359 return; 4360 } 4361 ctargetLimit=ctarget; 4362 ctarget =cTarget; 4363 while(ctarget<ctargetLimit){ 4364 if(*ctarget != *tempSrc){ 4365 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ; 4366 } 4367 ++ctarget; 4368 ++tempSrc; 4369 } 4370 4371 ucnv_close(conv); 4372 } 4373 4374 static void 4375 TestISO_2022_CN_EXT() { 4376 /* test input */ 4377 static const uint16_t in[]={ 4378 /* test Non-BMP code points */ 4379 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 4380 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 4381 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 4382 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 4383 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 4384 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 4385 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 4386 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 4387 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 4388 0xD869, 0xDED5, 4389 4390 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 4391 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 4392 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 4393 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4394 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4395 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4396 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4397 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4398 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4399 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4400 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4401 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4402 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4403 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A, 4404 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 4405 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A, 4406 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A, 4407 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A, 4408 4409 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A 4410 4411 }; 4412 4413 const UChar* uSource; 4414 const UChar* uSourceLimit; 4415 const char* cSource; 4416 const char* cSourceLimit; 4417 UChar *uTargetLimit =NULL; 4418 UChar *uTarget; 4419 char *cTarget; 4420 const char *cTargetLimit; 4421 char *cBuf; 4422 UChar *uBuf,*test; 4423 int32_t uBufSize = 180; 4424 UErrorCode errorCode=U_ZERO_ERROR; 4425 UConverter *cnv; 4426 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4427 int32_t* myOff= offsets; 4428 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); 4429 if(U_FAILURE(errorCode)) { 4430 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4431 return; 4432 } 4433 4434 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4435 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4436 uSource = (const UChar*)in; 4437 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4438 cTarget = cBuf; 4439 cTargetLimit = cBuf +uBufSize*5; 4440 uTarget = uBuf; 4441 uTargetLimit = uBuf+ uBufSize*5; 4442 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4443 if(U_FAILURE(errorCode)){ 4444 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4445 return; 4446 } 4447 cSource = cBuf; 4448 cSourceLimit =cTarget; 4449 test =uBuf; 4450 myOff=offsets; 4451 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4452 if(U_FAILURE(errorCode)){ 4453 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4454 return; 4455 } 4456 uSource = (const UChar*)in; 4457 while(uSource<uSourceLimit){ 4458 if(*test!=*uSource){ 4459 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4460 } 4461 else{ 4462 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4463 } 4464 uSource++; 4465 test++; 4466 } 4467 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4468 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4469 /*Test for the condition where there is an invalid character*/ 4470 ucnv_reset(cnv); 4471 { 4472 static const uint8_t source2[]={0x0e,0x24,0x053}; 4473 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); 4474 } 4475 ucnv_close(cnv); 4476 free(uBuf); 4477 free(cBuf); 4478 free(offsets); 4479 } 4480 #endif 4481 4482 static void 4483 TestISO_2022_CN() { 4484 /* test input */ 4485 static const uint16_t in[]={ 4486 /* jitterbug 951 */ 4487 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52, 4488 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 4489 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52, 4490 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45, 4491 0x0020, 0x0045, 0x004e, 0x0044, 4492 /**/ 4493 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A, 4494 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A, 4495 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A, 4496 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4497 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4498 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4499 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4500 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4501 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4502 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4503 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4504 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4505 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A, 4506 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A, 4507 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4508 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 4509 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A, 4510 4511 }; 4512 const UChar* uSource; 4513 const UChar* uSourceLimit; 4514 const char* cSource; 4515 const char* cSourceLimit; 4516 UChar *uTargetLimit =NULL; 4517 UChar *uTarget; 4518 char *cTarget; 4519 const char *cTargetLimit; 4520 char *cBuf; 4521 UChar *uBuf,*test; 4522 int32_t uBufSize = 180; 4523 UErrorCode errorCode=U_ZERO_ERROR; 4524 UConverter *cnv; 4525 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4526 int32_t* myOff= offsets; 4527 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); 4528 if(U_FAILURE(errorCode)) { 4529 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4530 return; 4531 } 4532 4533 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4534 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4535 uSource = (const UChar*)in; 4536 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4537 cTarget = cBuf; 4538 cTargetLimit = cBuf +uBufSize*5; 4539 uTarget = uBuf; 4540 uTargetLimit = uBuf+ uBufSize*5; 4541 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4542 if(U_FAILURE(errorCode)){ 4543 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4544 return; 4545 } 4546 cSource = cBuf; 4547 cSourceLimit =cTarget; 4548 test =uBuf; 4549 myOff=offsets; 4550 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4551 if(U_FAILURE(errorCode)){ 4552 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4553 return; 4554 } 4555 uSource = (const UChar*)in; 4556 while(uSource<uSourceLimit){ 4557 if(*test!=*uSource){ 4558 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4559 } 4560 else{ 4561 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4562 } 4563 uSource++; 4564 test++; 4565 } 4566 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); 4567 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4568 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4569 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4570 TestJitterbug930("csISO2022CN"); 4571 /*Test for the condition where there is an invalid character*/ 4572 ucnv_reset(cnv); 4573 { 4574 static const uint8_t source2[]={0x0e,0x24,0x053}; 4575 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); 4576 } 4577 4578 ucnv_close(cnv); 4579 free(uBuf); 4580 free(cBuf); 4581 free(offsets); 4582 } 4583 4584 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */ 4585 typedef struct { 4586 const char * converterName; 4587 const char * inputText; 4588 int inputTextLength; 4589 } EmptySegmentTest; 4590 4591 /* Callback for TestJitterbug6175, should only get called for empty segment errors */ 4592 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, 4593 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) { 4594 if (reason > UCNV_IRREGULAR) { 4595 return; 4596 } 4597 if (reason != UCNV_IRREGULAR) { 4598 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); 4599 } 4600 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ 4601 *err = U_ZERO_ERROR; 4602 ucnv_cbToUWriteSub(toArgs,0,err); 4603 } 4604 4605 enum { kEmptySegmentToUCharsMax = 64 }; 4606 static void TestJitterbug6175(void) { 4607 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; 4608 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; 4609 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; 4610 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; 4611 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; 4612 static const EmptySegmentTest emptySegmentTests[] = { 4613 /* converterName inputText inputTextLength */ 4614 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, 4615 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, 4616 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, 4617 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, 4618 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, 4619 /* terminator: */ 4620 { NULL, NULL, 0, } 4621 }; 4622 const EmptySegmentTest * testPtr; 4623 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { 4624 UErrorCode err = U_ZERO_ERROR; 4625 UConverter * cnv = ucnv_open(testPtr->converterName, &err); 4626 if (U_FAILURE(err)) { 4627 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4628 return; 4629 } 4630 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); 4631 if (U_FAILURE(err)) { 4632 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4633 ucnv_close(cnv); 4634 return; 4635 } 4636 { 4637 UChar toUChars[kEmptySegmentToUCharsMax]; 4638 UChar * toUCharsPtr = toUChars; 4639 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; 4640 const char * inCharsPtr = testPtr->inputText; 4641 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; 4642 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); 4643 } 4644 ucnv_close(cnv); 4645 } 4646 } 4647 4648 static void 4649 TestEBCDIC_STATEFUL() { 4650 /* test input */ 4651 static const uint8_t in[]={ 4652 0x61, 4653 0x1a, 4654 0x0f, 0x4b, 4655 0x42, 4656 0x40, 4657 0x36, 4658 }; 4659 4660 /* expected test results */ 4661 static const int32_t results[]={ 4662 /* number of bytes read, code point */ 4663 1, 0x002f, 4664 1, 0x0092, 4665 2, 0x002e, 4666 1, 0xff62, 4667 1, 0x0020, 4668 1, 0x0096, 4669 4670 }; 4671 static const uint8_t in2[]={ 4672 0x0f, 4673 0xa1, 4674 0x01 4675 }; 4676 4677 /* expected test results */ 4678 static const int32_t results2[]={ 4679 /* number of bytes read, code point */ 4680 2, 0x203E, 4681 1, 0x0001, 4682 }; 4683 4684 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 4685 UErrorCode errorCode=U_ZERO_ERROR; 4686 UConverter *cnv=ucnv_open("ibm-930", &errorCode); 4687 if(U_FAILURE(errorCode)) { 4688 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode)); 4689 return; 4690 } 4691 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); 4692 ucnv_reset(cnv); 4693 /* Test the condition when source >= sourceLimit */ 4694 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 4695 ucnv_reset(cnv); 4696 /*Test for the condition where source > sourcelimit after consuming the shift chracter */ 4697 { 4698 static const uint8_t source1[]={0x0f}; 4699 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); 4700 } 4701 /*Test for the condition where there is an invalid character*/ 4702 ucnv_reset(cnv); 4703 { 4704 static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; 4705 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); 4706 } 4707 ucnv_reset(cnv); 4708 source=(const char*)in2; 4709 limit=(const char*)in2+sizeof(in2); 4710 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); 4711 ucnv_close(cnv); 4712 4713 } 4714 4715 static void 4716 TestGB18030() { 4717 /* test input */ 4718 static const uint8_t in[]={ 4719 0x24, 4720 0x7f, 4721 0x81, 0x30, 0x81, 0x30, 4722 0xa8, 0xbf, 4723 0xa2, 0xe3, 4724 0xd2, 0xbb, 4725 0x82, 0x35, 0x8f, 0x33, 4726 0x84, 0x31, 0xa4, 0x39, 4727 0x90, 0x30, 0x81, 0x30, 4728 0xe3, 0x32, 0x9a, 0x35 4729 #if 0 4730 /* 4731 * Feature removed markus 2000-oct-26 4732 * Only some codepages must match surrogate pairs into supplementary code points - 4733 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c . 4734 * GB 18030 provides direct encodings for supplementary code points, therefore 4735 * it must not combine two single-encoded surrogates into one code point. 4736 */ 4737 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */ 4738 #endif 4739 }; 4740 4741 /* expected test results */ 4742 static const int32_t results[]={ 4743 /* number of bytes read, code point */ 4744 1, 0x24, 4745 1, 0x7f, 4746 4, 0x80, 4747 2, 0x1f9, 4748 2, 0x20ac, 4749 2, 0x4e00, 4750 4, 0x9fa6, 4751 4, 0xffff, 4752 4, 0x10000, 4753 4, 0x10ffff 4754 #if 0 4755 /* Feature removed. See comment above. */ 4756 8, 0x10000 4757 #endif 4758 }; 4759 4760 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ 4761 UErrorCode errorCode=U_ZERO_ERROR; 4762 UConverter *cnv=ucnv_open("gb18030", &errorCode); 4763 if(U_FAILURE(errorCode)) { 4764 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode)); 4765 return; 4766 } 4767 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030"); 4768 ucnv_close(cnv); 4769 } 4770 4771 static void 4772 TestLMBCS() { 4773 /* LMBCS-1 string */ 4774 static const uint8_t pszLMBCS[]={ 4775 0x61, 4776 0x01, 0x29, 4777 0x81, 4778 0xA0, 4779 0x0F, 0x27, 4780 0x0F, 0x91, 4781 0x14, 0x0a, 0x74, 4782 0x14, 0xF6, 0x02, 4783 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ 4784 0x10, 0x88, 0xA0, 4785 }; 4786 4787 /* Unicode UChar32 equivalents */ 4788 static const UChar32 pszUnicode32[]={ 4789 /* code point */ 4790 0x00000061, 4791 0x00002013, 4792 0x000000FC, 4793 0x000000E1, 4794 0x00000007, 4795 0x00000091, 4796 0x00000a74, 4797 0x00000200, 4798 0x00023456, /* code point for surrogate pair */ 4799 0x00005516 4800 }; 4801 4802 /* Unicode UChar equivalents */ 4803 static const UChar pszUnicode[]={ 4804 /* code point */ 4805 0x0061, 4806 0x2013, 4807 0x00FC, 4808 0x00E1, 4809 0x0007, 4810 0x0091, 4811 0x0a74, 4812 0x0200, 4813 0xD84D, /* low surrogate */ 4814 0xDC56, /* high surrogate */ 4815 0x5516 4816 }; 4817 4818 /* expected test results */ 4819 static const int offsets32[]={ 4820 /* number of bytes read, code point */ 4821 0, 4822 1, 4823 3, 4824 4, 4825 5, 4826 7, 4827 9, 4828 12, 4829 15, 4830 21, 4831 24 4832 }; 4833 4834 /* expected test results */ 4835 static const int offsets[]={ 4836 /* number of bytes read, code point */ 4837 0, 4838 1, 4839 3, 4840 4, 4841 5, 4842 7, 4843 9, 4844 12, 4845 15, 4846 18, 4847 21, 4848 24 4849 }; 4850 4851 4852 UConverter *cnv; 4853 4854 #define NAME_LMBCS_1 "LMBCS-1" 4855 #define NAME_LMBCS_2 "LMBCS-2" 4856 4857 4858 /* Some basic open/close/property tests on some LMBCS converters */ 4859 { 4860 4861 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ 4862 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ 4863 char get_subchars [1]; 4864 const char * get_name; 4865 UConverter *cnv1; 4866 UConverter *cnv2; 4867 4868 int8_t len = sizeof(get_subchars); 4869 4870 UErrorCode errorCode=U_ZERO_ERROR; 4871 4872 /* Open */ 4873 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); 4874 if(U_FAILURE(errorCode)) { 4875 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4876 return; 4877 } 4878 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); 4879 if(U_FAILURE(errorCode)) { 4880 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode)); 4881 return; 4882 } 4883 4884 /* Name */ 4885 get_name = ucnv_getName (cnv1, &errorCode); 4886 if (strcmp(NAME_LMBCS_1,get_name)){ 4887 log_err("Unexpected converter name: %s\n", get_name); 4888 } 4889 get_name = ucnv_getName (cnv2, &errorCode); 4890 if (strcmp(NAME_LMBCS_2,get_name)){ 4891 log_err("Unexpected converter name: %s\n", get_name); 4892 } 4893 4894 /* substitution chars */ 4895 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); 4896 if(U_FAILURE(errorCode)) { 4897 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4898 } 4899 if (len!=1){ 4900 log_err("Unexpected length of sub chars\n"); 4901 } 4902 if (get_subchars[0] != expected_subchars[0]){ 4903 log_err("Unexpected value of sub chars\n"); 4904 } 4905 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); 4906 if(U_FAILURE(errorCode)) { 4907 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); 4908 } 4909 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); 4910 if(U_FAILURE(errorCode)) { 4911 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4912 } 4913 if (len!=1){ 4914 log_err("Unexpected length of sub chars\n"); 4915 } 4916 if (get_subchars[0] != new_subchars[0]){ 4917 log_err("Unexpected value of sub chars\n"); 4918 } 4919 ucnv_close(cnv1); 4920 ucnv_close(cnv2); 4921 4922 } 4923 4924 /* LMBCS to Unicode - offsets */ 4925 { 4926 UErrorCode errorCode=U_ZERO_ERROR; 4927 4928 const char * pSource = (const char *)pszLMBCS; 4929 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 4930 4931 UChar Out [sizeof(pszUnicode) + 1]; 4932 UChar * pOut = Out; 4933 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 4934 4935 int32_t off [sizeof(offsets)]; 4936 4937 /* last 'offset' in expected results is just the final size. 4938 (Makes other tests easier). Compensate here: */ 4939 4940 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); 4941 4942 4943 4944 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ 4945 if(U_FAILURE(errorCode)) { 4946 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode)); 4947 return; 4948 } 4949 4950 4951 4952 ucnv_toUnicode (cnv, 4953 &pOut, 4954 OutLimit, 4955 &pSource, 4956 sourceLimit, 4957 off, 4958 TRUE, 4959 &errorCode); 4960 4961 4962 if (memcmp(off,offsets,sizeof(offsets))) 4963 { 4964 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n"); 4965 } 4966 if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) 4967 { 4968 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n"); 4969 } 4970 ucnv_close(cnv); 4971 } 4972 { 4973 /* LMBCS to Unicode - getNextUChar */ 4974 const char * sourceStart; 4975 const char *source=(const char *)pszLMBCS; 4976 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); 4977 const UChar32 *results= pszUnicode32; 4978 const int *off = offsets32; 4979 4980 UErrorCode errorCode=U_ZERO_ERROR; 4981 UChar32 uniChar; 4982 4983 cnv=ucnv_open("LMBCS-1", &errorCode); 4984 if(U_FAILURE(errorCode)) { 4985 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4986 return; 4987 } 4988 else 4989 { 4990 4991 while(source<limit) { 4992 sourceStart=source; 4993 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode); 4994 if(U_FAILURE(errorCode)) { 4995 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode)); 4996 break; 4997 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) { 4998 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 4999 uniChar, (source-sourceStart), *results, *off); 5000 break; 5001 } 5002 results++; 5003 off++; 5004 } 5005 } 5006 ucnv_close(cnv); 5007 } 5008 { /* test locale & optimization group operations: Unicode to LMBCS */ 5009 5010 UErrorCode errorCode=U_ZERO_ERROR; 5011 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); 5012 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); 5013 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); 5014 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ 5015 const UChar * pUniOut = uniString; 5016 UChar * pUniIn = uniString; 5017 uint8_t lmbcsString [4]; 5018 const char * pLMBCSOut = (const char *)lmbcsString; 5019 char * pLMBCSIn = (char *)lmbcsString; 5020 5021 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */ 5022 ucnv_fromUnicode (cnv16he, 5023 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5024 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5025 NULL, 1, &errorCode); 5026 5027 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) 5028 { 5029 log_err("LMBCS-16,locale=he gives unexpected translation\n"); 5030 } 5031 5032 pLMBCSIn= (char *)lmbcsString; 5033 pUniOut = uniString; 5034 ucnv_fromUnicode (cnv01us, 5035 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5036 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5037 NULL, 1, &errorCode); 5038 5039 if (lmbcsString[0] != 0x9F) 5040 { 5041 log_err("LMBCS-1,locale=US gives unexpected translation\n"); 5042 } 5043 5044 /* single byte char from mbcs char set */ 5045 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ 5046 pLMBCSOut = (const char *)lmbcsString; 5047 pUniIn = uniString; 5048 ucnv_toUnicode (cnv16jp, 5049 &pUniIn, pUniIn + 1, 5050 &pLMBCSOut, (pLMBCSOut + 1), 5051 NULL, 1, &errorCode); 5052 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5053 { 5054 log_err("Unexpected results from LMBCS-16 single byte char\n"); 5055 } 5056 /* convert to group 1: should be 3 bytes */ 5057 pLMBCSIn = (char *)lmbcsString; 5058 pUniOut = uniString; 5059 ucnv_fromUnicode (cnv01us, 5060 &pLMBCSIn, (const char *)(pLMBCSIn + 3), 5061 &pUniOut, pUniOut + 1, 5062 NULL, 1, &errorCode); 5063 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1 5064 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE) 5065 { 5066 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); 5067 } 5068 pLMBCSOut = (const char *)lmbcsString; 5069 pUniIn = uniString; 5070 ucnv_toUnicode (cnv01us, 5071 &pUniIn, pUniIn + 1, 5072 &pLMBCSOut, (const char *)(pLMBCSOut + 3), 5073 NULL, 1, &errorCode); 5074 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5075 { 5076 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); 5077 } 5078 pLMBCSIn = (char *)lmbcsString; 5079 pUniOut = uniString; 5080 ucnv_fromUnicode (cnv16jp, 5081 &pLMBCSIn, (const char *)(pLMBCSIn + 1), 5082 &pUniOut, pUniOut + 1, 5083 NULL, 1, &errorCode); 5084 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE) 5085 { 5086 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); 5087 } 5088 ucnv_close(cnv16he); 5089 ucnv_close(cnv16jp); 5090 ucnv_close(cnv01us); 5091 } 5092 { 5093 /* Small source buffer testing, LMBCS -> Unicode */ 5094 5095 UErrorCode errorCode=U_ZERO_ERROR; 5096 5097 const char * pSource = (const char *)pszLMBCS; 5098 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 5099 int codepointCount = 0; 5100 5101 UChar Out [sizeof(pszUnicode) + 1]; 5102 UChar * pOut = Out; 5103 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 5104 5105 5106 cnv = ucnv_open(NAME_LMBCS_1, &errorCode); 5107 if(U_FAILURE(errorCode)) { 5108 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 5109 return; 5110 } 5111 5112 5113 while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) 5114 { 5115 ucnv_toUnicode (cnv, 5116 &pOut, 5117 OutLimit, 5118 &pSource, 5119 (pSource+1), /* claim that this is a 1- byte buffer */ 5120 NULL, 5121 FALSE, /* FALSE means there might be more chars in the next buffer */ 5122 &errorCode); 5123 5124 if (U_SUCCESS (errorCode)) 5125 { 5126 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1]) 5127 { 5128 /* we are on to the next code point: check value */ 5129 5130 if (Out[0] != pszUnicode[codepointCount]){ 5131 log_err("LMBCS->Uni result %lx should have been %lx \n", 5132 Out[0], pszUnicode[codepointCount]); 5133 } 5134 5135 pOut = Out; /* reset for accumulating next code point */ 5136 codepointCount++; 5137 } 5138 } 5139 else 5140 { 5141 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode)); 5142 } 5143 } 5144 { 5145 /* limits & surrogate error testing */ 5146 char LIn [sizeof(pszLMBCS)]; 5147 const char * pLIn = LIn; 5148 5149 char LOut [sizeof(pszLMBCS)]; 5150 char * pLOut = LOut; 5151 5152 UChar UOut [sizeof(pszUnicode)]; 5153 UChar * pUOut = UOut; 5154 5155 UChar UIn [sizeof(pszUnicode)]; 5156 const UChar * pUIn = UIn; 5157 5158 int32_t off [sizeof(offsets)]; 5159 UChar32 uniChar; 5160 5161 errorCode=U_ZERO_ERROR; 5162 5163 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ 5164 pUIn++; 5165 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode); 5166 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5167 { 5168 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); 5169 } 5170 pUIn--; 5171 5172 errorCode=U_ZERO_ERROR; 5173 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); 5174 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5175 { 5176 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode)); 5177 } 5178 errorCode=U_ZERO_ERROR; 5179 5180 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode); 5181 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5182 { 5183 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode)); 5184 } 5185 errorCode=U_ZERO_ERROR; 5186 5187 /* 0 byte source request - no error, no pointer movement */ 5188 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode); 5189 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); 5190 if(U_FAILURE(errorCode)) { 5191 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode)); 5192 } 5193 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn)) 5194 { 5195 log_err("Unexpected pointer move in 0 byte source request \n"); 5196 } 5197 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ 5198 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); 5199 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) 5200 { 5201 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); 5202 } 5203 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ 5204 { 5205 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n"); 5206 } 5207 errorCode = U_ZERO_ERROR; 5208 5209 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ 5210 5211 pUIn = pszUnicode; 5212 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode); 5213 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) 5214 { 5215 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); 5216 } 5217 5218 errorCode = U_ZERO_ERROR; 5219 5220 pLIn = (const char *)pszLMBCS; 5221 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); 5222 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4]) 5223 { 5224 log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); 5225 } 5226 5227 /* unpaired or chopped LMBCS surrogates */ 5228 5229 /* OK high surrogate, Low surrogate is chopped */ 5230 LIn [0] = (char)0x14; 5231 LIn [1] = (char)0xD8; 5232 LIn [2] = (char)0x01; 5233 LIn [3] = (char)0x14; 5234 LIn [4] = (char)0xDC; 5235 pLIn = LIn; 5236 errorCode = U_ZERO_ERROR; 5237 pUOut = UOut; 5238 5239 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 5240 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5241 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5242 { 5243 log_err("Unexpected results on chopped low surrogate\n"); 5244 } 5245 5246 /* chopped at surrogate boundary */ 5247 LIn [0] = (char)0x14; 5248 LIn [1] = (char)0xD8; 5249 LIn [2] = (char)0x01; 5250 pLIn = LIn; 5251 errorCode = U_ZERO_ERROR; 5252 pUOut = UOut; 5253 5254 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); 5255 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) 5256 { 5257 log_err("Unexpected results on chopped at surrogate boundary \n"); 5258 } 5259 5260 /* unpaired surrogate plus valid Unichar */ 5261 LIn [0] = (char)0x14; 5262 LIn [1] = (char)0xD8; 5263 LIn [2] = (char)0x01; 5264 LIn [3] = (char)0x14; 5265 LIn [4] = (char)0xC9; 5266 LIn [5] = (char)0xD0; 5267 pLIn = LIn; 5268 errorCode = U_ZERO_ERROR; 5269 pUOut = UOut; 5270 5271 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); 5272 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) 5273 { 5274 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); 5275 } 5276 5277 /* unpaired surrogate plus chopped Unichar */ 5278 LIn [0] = (char)0x14; 5279 LIn [1] = (char)0xD8; 5280 LIn [2] = (char)0x01; 5281 LIn [3] = (char)0x14; 5282 LIn [4] = (char)0xC9; 5283 5284 pLIn = LIn; 5285 errorCode = U_ZERO_ERROR; 5286 pUOut = UOut; 5287 5288 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5289 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5290 { 5291 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); 5292 } 5293 5294 /* unpaired surrogate plus valid non-Unichar */ 5295 LIn [0] = (char)0x14; 5296 LIn [1] = (char)0xD8; 5297 LIn [2] = (char)0x01; 5298 LIn [3] = (char)0x0F; 5299 LIn [4] = (char)0x3B; 5300 5301 pLIn = LIn; 5302 errorCode = U_ZERO_ERROR; 5303 pUOut = UOut; 5304 5305 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5306 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) 5307 { 5308 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); 5309 } 5310 5311 /* unpaired surrogate plus chopped non-Unichar */ 5312 LIn [0] = (char)0x14; 5313 LIn [1] = (char)0xD8; 5314 LIn [2] = (char)0x01; 5315 LIn [3] = (char)0x0F; 5316 5317 pLIn = LIn; 5318 errorCode = U_ZERO_ERROR; 5319 pUOut = UOut; 5320 5321 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); 5322 5323 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) 5324 { 5325 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n"); 5326 } 5327 } 5328 } 5329 ucnv_close(cnv); /* final cleanup */ 5330 } 5331 5332 5333 static void TestJitterbug255() 5334 { 5335 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; 5336 const char *testBuffer = (const char *)testBytes; 5337 const char *testEnd = (const char *)testBytes + sizeof(testBytes); 5338 UErrorCode status = U_ZERO_ERROR; 5339 /*UChar32 result;*/ 5340 UConverter *cnv = 0; 5341 5342 cnv = ucnv_open("shift-jis", &status); 5343 if (U_FAILURE(status) || cnv == 0) { 5344 log_data_err("Failed to open the converter for SJIS.\n"); 5345 return; 5346 } 5347 while (testBuffer != testEnd) 5348 { 5349 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); 5350 if (U_FAILURE(status)) 5351 { 5352 log_err("Failed to convert the next UChar for SJIS.\n"); 5353 break; 5354 } 5355 } 5356 ucnv_close(cnv); 5357 } 5358 5359 static void TestEBCDICUS4XML() 5360 { 5361 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; 5362 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; 5363 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; 5364 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; 5365 char target_x[] = {0x00, 0x00, 0x00, 0x00}; 5366 UChar *unicodes = unicodes_x; 5367 const UChar *toUnicodeMaps = toUnicodeMaps_x; 5368 char *target = target_x; 5369 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; 5370 UErrorCode status = U_ZERO_ERROR; 5371 UConverter *cnv = 0; 5372 5373 cnv = ucnv_open("ebcdic-xml-us", &status); 5374 if (U_FAILURE(status) || cnv == 0) { 5375 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); 5376 return; 5377 } 5378 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status); 5379 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) { 5380 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", 5381 u_errorName(status)); 5382 printUSeqErr(unicodes_x, 3); 5383 printUSeqErr(toUnicodeMaps, 3); 5384 } 5385 status = U_ZERO_ERROR; 5386 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status); 5387 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) { 5388 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", 5389 u_errorName(status)); 5390 printSeqErr((const unsigned char*)target_x, 3); 5391 printSeqErr((const unsigned char*)fromUnicodeMaps, 3); 5392 } 5393 ucnv_close(cnv); 5394 } 5395 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ 5396 5397 #if !UCONFIG_NO_COLLATION 5398 5399 static void TestJitterbug981(){ 5400 const UChar* rules; 5401 int32_t rules_length, target_cap, bytes_needed, buff_size; 5402 UErrorCode status = U_ZERO_ERROR; 5403 UConverter *utf8cnv; 5404 UCollator* myCollator; 5405 char *buff; 5406 int numNeeded=0; 5407 utf8cnv = ucnv_open ("utf8", &status); 5408 if(U_FAILURE(status)){ 5409 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status)); 5410 return; 5411 } 5412 myCollator = ucol_open("zh", &status); 5413 if(U_FAILURE(status)){ 5414 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status)); 5415 ucnv_close(utf8cnv); 5416 return; 5417 } 5418 5419 rules = ucol_getRules(myCollator, &rules_length); 5420 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); 5421 buff = malloc(buff_size); 5422 5423 target_cap = 0; 5424 do { 5425 ucnv_reset(utf8cnv); 5426 status = U_ZERO_ERROR; 5427 if(target_cap >= buff_size) { 5428 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size); 5429 break; 5430 } 5431 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, 5432 rules, rules_length, &status); 5433 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5434 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5435 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5436 break; 5437 } 5438 numNeeded = bytes_needed; 5439 } while (status == U_BUFFER_OVERFLOW_ERROR); 5440 ucol_close(myCollator); 5441 ucnv_close(utf8cnv); 5442 free(buff); 5443 } 5444 5445 #endif 5446 5447 #if !UCONFIG_NO_LEGACY_CONVERSION 5448 static void TestJitterbug1293(){ 5449 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; 5450 char target[256]; 5451 UErrorCode status = U_ZERO_ERROR; 5452 UConverter* conv=NULL; 5453 int32_t target_cap, bytes_needed, numNeeded = 0; 5454 conv = ucnv_open("shift-jis",&status); 5455 if(U_FAILURE(status)){ 5456 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status)); 5457 return; 5458 } 5459 5460 do{ 5461 target_cap =0; 5462 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status); 5463 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5464 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5465 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5466 } 5467 numNeeded = bytes_needed; 5468 } while (status == U_BUFFER_OVERFLOW_ERROR); 5469 if(U_FAILURE(status)){ 5470 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status)); 5471 return; 5472 } 5473 ucnv_close(conv); 5474 } 5475 #endif 5476 5477 static void TestJB5275_1(){ 5478 5479 static const char* data = "\x3B\xB3\x0A" /* Easy characters */ 5480 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5481 /* Switch script: */ 5482 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */ 5483 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/ 5484 "\xEF\x40\x3B\xB3\x0A"; 5485 static const UChar expected[] ={ 5486 0x003b, 0x0a15, 0x000a, /* Easy characters */ 5487 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */ 5488 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/ 5489 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/ 5490 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ 5491 }; 5492 5493 UErrorCode status = U_ZERO_ERROR; 5494 UConverter* conv = ucnv_open("iscii-gur", &status); 5495 UChar dest[100] = {'\0'}; 5496 UChar* target = dest; 5497 UChar* targetLimit = dest+100; 5498 const char* source = data; 5499 const char* sourceLimit = data+strlen(data); 5500 const UChar* exp = expected; 5501 5502 if (U_FAILURE(status)) { 5503 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status)); 5504 return; 5505 } 5506 5507 log_verbose("Testing switching back to default script when new line is encountered.\n"); 5508 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5509 if(U_FAILURE(status)){ 5510 log_err("conversion failed: %s \n", u_errorName(status)); 5511 } 5512 targetLimit = target; 5513 target = dest; 5514 printUSeq(target, targetLimit-target); 5515 while(target<targetLimit){ 5516 if(*exp!=*target){ 5517 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5518 } 5519 target++; 5520 exp++; 5521 } 5522 ucnv_close(conv); 5523 } 5524 5525 static void TestJB5275(){ 5526 static const char* data = 5527 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */ 5528 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */ 5529 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */ 5530 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5531 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ 5532 "\xEF\x48\x38\xB3\x0A" /* Kannada test */ 5533 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ 5534 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ 5535 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ 5536 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; 5537 static const UChar expected[] ={ 5538 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */ 5539 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */ 5540 0x0038, 0x0C95, 0x000A, /* Kannada test */ 5541 0x0039, 0x0D15, 0x000A, /* Malayalam test */ 5542 0x003A, 0x0A95, 0x000A, /* Gujarati test */ 5543 0x003B, 0x0A15, 0x000A, /* Punjabi test */ 5544 }; 5545 5546 UErrorCode status = U_ZERO_ERROR; 5547 UConverter* conv = ucnv_open("iscii", &status); 5548 UChar dest[100] = {'\0'}; 5549 UChar* target = dest; 5550 UChar* targetLimit = dest+100; 5551 const char* source = data; 5552 const char* sourceLimit = data+strlen(data); 5553 const UChar* exp = expected; 5554 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5555 if(U_FAILURE(status)){ 5556 log_err("conversion failed: %s \n", u_errorName(status)); 5557 } 5558 targetLimit = target; 5559 target = dest; 5560 5561 printUSeq(target, targetLimit-target); 5562 5563 while(target<targetLimit){ 5564 if(*exp!=*target){ 5565 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5566 } 5567 target++; 5568 exp++; 5569 } 5570 ucnv_close(conv); 5571 } 5572 5573 static void 5574 TestIsFixedWidth() { 5575 UErrorCode status = U_ZERO_ERROR; 5576 UConverter *cnv = NULL; 5577 int32_t i; 5578 5579 const char *fixedWidth[] = { 5580 "US-ASCII", 5581 "UTF32", 5582 "ibm-5478_P100-1995" 5583 }; 5584 5585 const char *notFixedWidth[] = { 5586 "GB18030", 5587 "UTF8", 5588 "windows-949-2000", 5589 "UTF16" 5590 }; 5591 5592 for (i = 0; i < LENGTHOF(fixedWidth); i++) { 5593 cnv = ucnv_open(fixedWidth[i], &status); 5594 if (cnv == NULL || U_FAILURE(status)) { 5595 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status)); 5596 continue; 5597 } 5598 5599 if (!ucnv_isFixedWidth(cnv, &status)) { 5600 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]); 5601 } 5602 ucnv_close(cnv); 5603 } 5604 5605 for (i = 0; i < LENGTHOF(notFixedWidth); i++) { 5606 cnv = ucnv_open(notFixedWidth[i], &status); 5607 if (cnv == NULL || U_FAILURE(status)) { 5608 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status)); 5609 continue; 5610 } 5611 5612 if (ucnv_isFixedWidth(cnv, &status)) { 5613 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]); 5614 } 5615 ucnv_close(cnv); 5616 } 5617 } 5618