1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************* 7 * 8 * File CCONVTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Steven R. Loomis 7/8/1999 Adding input buffer test 13 ******************************************************************************** 14 */ 15 #include <stdio.h> 16 #include "cstring.h" 17 #include "unicode/uloc.h" 18 #include "unicode/ucnv.h" 19 #include "unicode/ucnv_err.h" 20 #include "unicode/ucnv_cb.h" 21 #include "cintltst.h" 22 #include "unicode/utypes.h" 23 #include "unicode/ustring.h" 24 #include "unicode/ucol.h" 25 #include "cmemory.h" 26 #include "nucnvtst.h" 27 28 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); 29 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); 30 #if !UCONFIG_NO_COLLATION 31 static void TestJitterbug981(void); 32 #endif 33 static void TestJitterbug1293(void); 34 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; 35 static void TestConverterTypesAndStarters(void); 36 static void TestAmbiguous(void); 37 static void TestSignatureDetection(void); 38 static void TestUTF7(void); 39 static void TestIMAP(void); 40 static void TestUTF8(void); 41 static void TestCESU8(void); 42 static void TestUTF16(void); 43 static void TestUTF16BE(void); 44 static void TestUTF16LE(void); 45 static void TestUTF32(void); 46 static void TestUTF32BE(void); 47 static void TestUTF32LE(void); 48 static void TestLATIN1(void); 49 50 #if !UCONFIG_NO_LEGACY_CONVERSION 51 static void TestSBCS(void); 52 static void TestDBCS(void); 53 static void TestMBCS(void); 54 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 55 static void TestICCRunout(void); 56 #endif 57 58 #ifdef U_ENABLE_GENERIC_ISO_2022 59 static void TestISO_2022(void); 60 #endif 61 62 static void TestISO_2022_JP(void); 63 static void TestISO_2022_JP_1(void); 64 static void TestISO_2022_JP_2(void); 65 static void TestISO_2022_KR(void); 66 static void TestISO_2022_KR_1(void); 67 static void TestISO_2022_CN(void); 68 #if 0 69 /* 70 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 71 */ 72 static void TestISO_2022_CN_EXT(void); 73 #endif 74 static void TestJIS(void); 75 static void TestHZ(void); 76 #endif 77 78 static void TestSCSU(void); 79 80 #if !UCONFIG_NO_LEGACY_CONVERSION 81 static void TestEBCDIC_STATEFUL(void); 82 static void TestGB18030(void); 83 static void TestLMBCS(void); 84 static void TestJitterbug255(void); 85 static void TestEBCDICUS4XML(void); 86 #if 0 87 /* 88 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 89 */ 90 static void TestJitterbug915(void); 91 #endif 92 static void TestISCII(void); 93 94 static void TestCoverageMBCS(void); 95 static void TestJitterbug2346(void); 96 static void TestJitterbug2411(void); 97 static void TestJB5275(void); 98 static void TestJB5275_1(void); 99 static void TestJitterbug6175(void); 100 #endif 101 102 static void TestInBufSizes(void); 103 104 static void TestRoundTrippingAllUTF(void); 105 static void TestConv(const uint16_t in[], 106 int len, 107 const char* conv, 108 const char* lang, 109 char byteArr[], 110 int byteArrLen); 111 112 /* open a converter, using test data if it begins with '@' */ 113 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); 114 115 116 #define NEW_MAX_BUFFER 999 117 118 static int32_t gInBufferSize = NEW_MAX_BUFFER; 119 static int32_t gOutBufferSize = NEW_MAX_BUFFER; 120 static char gNuConvTestName[1024]; 121 122 #define nct_min(x,y) ((x<y) ? x : y) 123 124 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) 125 { 126 if(cnv && cnv[0] == '@') { 127 return ucnv_openPackage(loadTestData(err), cnv+1, err); 128 } else { 129 return ucnv_open(cnv, err); 130 } 131 } 132 133 static void printSeq(const unsigned char* a, int len) 134 { 135 int i=0; 136 log_verbose("{"); 137 while (i<len) 138 log_verbose("0x%02x ", a[i++]); 139 log_verbose("}\n"); 140 } 141 142 static void printUSeq(const UChar* a, int len) 143 { 144 int i=0; 145 log_verbose("{U+"); 146 while (i<len) log_verbose("0x%04x ", a[i++]); 147 log_verbose("}\n"); 148 } 149 150 static void printSeqErr(const unsigned char* a, int len) 151 { 152 int i=0; 153 fprintf(stderr, "{"); 154 while (i<len) 155 fprintf(stderr, "0x%02x ", a[i++]); 156 fprintf(stderr, "}\n"); 157 } 158 159 static void printUSeqErr(const UChar* a, int len) 160 { 161 int i=0; 162 fprintf(stderr, "{U+"); 163 while (i<len) 164 fprintf(stderr, "0x%04x ", a[i++]); 165 fprintf(stderr,"}\n"); 166 } 167 168 static void 169 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message) 170 { 171 const char* s0; 172 const char* s=(char*)source; 173 const int32_t *r=results; 174 UErrorCode errorCode=U_ZERO_ERROR; 175 UChar32 c; 176 177 while(s<limit) { 178 s0=s; 179 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 180 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 181 break; /* no more significant input */ 182 } else if(U_FAILURE(errorCode)) { 183 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 184 break; 185 } else if( 186 /* test the expected number of input bytes only if >=0 */ 187 (*r>=0 && (int32_t)(s-s0)!=*r) || 188 c!=*(r+1) 189 ) { 190 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 191 message, c, (s-s0), *(r+1), *r); 192 break; 193 } 194 r+=2; 195 } 196 } 197 198 static void 199 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message) 200 { 201 const char* s=(char*)source; 202 UErrorCode errorCode=U_ZERO_ERROR; 203 uint32_t c; 204 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 205 if(errorCode != expected){ 206 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode)); 207 } 208 if(c != 0xFFFD && c != 0xffff){ 209 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c); 210 } 211 212 } 213 214 static void TestInBufSizes(void) 215 { 216 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); 217 #if 1 218 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); 219 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); 220 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); 221 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); 222 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); 223 TestNewConvertWithBufferSizes(1,1); 224 TestNewConvertWithBufferSizes(2,3); 225 TestNewConvertWithBufferSizes(3,2); 226 #endif 227 } 228 229 static void TestOutBufSizes(void) 230 { 231 #if 1 232 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); 233 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); 234 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); 235 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); 236 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); 237 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); 238 239 #endif 240 } 241 242 243 void addTestNewConvert(TestNode** root) 244 { 245 #if !UCONFIG_NO_FILE_IO 246 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); 247 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); 248 #endif 249 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); 250 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); 251 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); 252 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); 253 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); 254 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); 255 256 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ 257 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); 258 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); 259 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); 260 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); 261 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); 262 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); 263 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); 264 265 #if !UCONFIG_NO_LEGACY_CONVERSION 266 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); 267 #endif 268 269 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); 270 271 #if !UCONFIG_NO_LEGACY_CONVERSION 272 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); 273 #if !UCONFIG_NO_FILE_IO 274 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); 275 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); 276 #endif 277 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); 278 279 #ifdef U_ENABLE_GENERIC_ISO_2022 280 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); 281 #endif 282 283 /* BEGIN android-removed 284 To save space, Android does not build full ISO2022 CJK tables. 285 We turn off the tests here. 286 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); 287 END android-removed */ 288 289 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); /* BEGIN android-removed */ 290 291 /* BEGIN android-removed 292 To save space, Android does not build full ISO2022 CJK tables. 293 We turn off the tests here. 294 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); 295 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); 296 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); 297 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); 298 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); 299 END android-removed */ 300 301 /* 302 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 303 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); 304 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); 305 */ 306 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); 307 #endif 308 309 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); 310 311 #if !UCONFIG_NO_LEGACY_CONVERSION 312 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); 313 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); 314 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); 315 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); 316 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); 317 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); 318 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); 319 #if !UCONFIG_NO_COLLATION 320 /* BEGIN android-removed 321 To save space, Android does not include the collation tailoring rules. 322 Skip the related tests. 323 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); 324 END android-removed */ 325 #endif 326 327 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); 328 #endif 329 330 331 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 332 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); 333 #endif 334 335 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); 336 337 #if !UCONFIG_NO_LEGACY_CONVERSION 338 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); 339 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); 340 /* BEGIN android-removed 341 To save space, Android does not build full ISO2022 CJK tables. 342 We turn off the tests here. 343 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); 344 END android-removed */ 345 #endif 346 347 } 348 349 350 /* Note that this test already makes use of statics, so it's not really 351 multithread safe. 352 This convenience function lets us make the error messages actually useful. 353 */ 354 355 static void setNuConvTestName(const char *codepage, const char *direction) 356 { 357 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 358 codepage, 359 direction, 360 (int)gInBufferSize, 361 (int)gOutBufferSize); 362 } 363 364 typedef enum 365 { 366 TC_OK = 0, /* test was OK */ 367 TC_MISMATCH = 1, /* Match failed - err was printed */ 368 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */ 369 } ETestConvertResult; 370 371 /* Note: This function uses global variables and it will not do offset 372 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 373 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 374 const char *codepage, const int32_t *expectOffsets , UBool useFallback) 375 { 376 UErrorCode status = U_ZERO_ERROR; 377 UConverter *conv = 0; 378 char junkout[NEW_MAX_BUFFER]; /* FIX */ 379 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 380 char *p; 381 const UChar *src; 382 char *end; 383 char *targ; 384 int32_t *offs; 385 int i; 386 int32_t realBufferSize; 387 char *realBufferEnd; 388 const UChar *realSourceEnd; 389 const UChar *sourceLimit; 390 UBool checkOffsets = TRUE; 391 UBool doFlush; 392 393 for(i=0;i<NEW_MAX_BUFFER;i++) 394 junkout[i] = (char)0xF0; 395 for(i=0;i<NEW_MAX_BUFFER;i++) 396 junokout[i] = 0xFF; 397 398 setNuConvTestName(codepage, "FROM"); 399 400 log_verbose("\n========= %s\n", gNuConvTestName); 401 402 conv = my_ucnv_open(codepage, &status); 403 404 if(U_FAILURE(status)) 405 { 406 log_data_err("Couldn't open converter %s\n",codepage); 407 return TC_FAIL; 408 } 409 if(useFallback){ 410 ucnv_setFallback(conv,useFallback); 411 } 412 413 log_verbose("Converter opened..\n"); 414 415 src = source; 416 targ = junkout; 417 offs = junokout; 418 419 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 420 realBufferEnd = junkout + realBufferSize; 421 realSourceEnd = source + sourceLen; 422 423 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 424 checkOffsets = FALSE; 425 426 do 427 { 428 end = nct_min(targ + gOutBufferSize, realBufferEnd); 429 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 430 431 doFlush = (UBool)(sourceLimit == realSourceEnd); 432 433 if(targ == realBufferEnd) { 434 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 435 return TC_FAIL; 436 } 437 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 438 439 440 status = U_ZERO_ERROR; 441 442 ucnv_fromUnicode (conv, 443 &targ, 444 end, 445 &src, 446 sourceLimit, 447 checkOffsets ? offs : NULL, 448 doFlush, /* flush if we're at the end of the input data */ 449 &status); 450 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 451 452 if(U_FAILURE(status)) { 453 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 454 return TC_FAIL; 455 } 456 457 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 458 sourceLen, targ-junkout); 459 460 if(getTestOption(VERBOSITY_OPTION)) 461 { 462 char junk[9999]; 463 char offset_str[9999]; 464 char *ptr; 465 466 junk[0] = 0; 467 offset_str[0] = 0; 468 for(ptr = junkout;ptr<targ;ptr++) { 469 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); 470 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout])); 471 } 472 473 log_verbose(junk); 474 printSeq((const uint8_t *)expect, expectLen); 475 if ( checkOffsets ) { 476 log_verbose("\nOffsets:"); 477 log_verbose(offset_str); 478 } 479 log_verbose("\n"); 480 } 481 ucnv_close(conv); 482 483 if(expectLen != targ-junkout) { 484 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 485 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 486 printf("\nGot:"); 487 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 488 printf("\nExpected:"); 489 printSeqErr((const unsigned char*)expect, expectLen); 490 return TC_MISMATCH; 491 } 492 493 if (checkOffsets && (expectOffsets != 0) ) { 494 log_verbose("comparing %d offsets..\n", targ-junkout); 495 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 496 log_err("did not get the expected offsets. %s\n", gNuConvTestName); 497 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 498 log_err("\n"); 499 log_err("Got : "); 500 for(p=junkout;p<targ;p++) { 501 log_err("%d,", junokout[p-junkout]); 502 } 503 log_err("\n"); 504 log_err("Expected: "); 505 for(i=0; i<(targ-junkout); i++) { 506 log_err("%d,", expectOffsets[i]); 507 } 508 log_err("\n"); 509 } 510 } 511 512 log_verbose("comparing..\n"); 513 if(!memcmp(junkout, expect, expectLen)) { 514 log_verbose("Matches!\n"); 515 return TC_OK; 516 } else { 517 log_err("String does not match u->%s\n", gNuConvTestName); 518 printUSeqErr(source, sourceLen); 519 printf("\nGot:"); 520 printSeqErr((const unsigned char *)junkout, expectLen); 521 printf("\nExpected:"); 522 printSeqErr((const unsigned char *)expect, expectLen); 523 524 return TC_MISMATCH; 525 } 526 } 527 528 /* Note: This function uses global variables and it will not do offset 529 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 530 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 531 const char *codepage, const int32_t *expectOffsets, UBool useFallback) 532 { 533 UErrorCode status = U_ZERO_ERROR; 534 UConverter *conv = 0; 535 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 536 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 537 const char *src; 538 const char *realSourceEnd; 539 const char *srcLimit; 540 UChar *p; 541 UChar *targ; 542 UChar *end; 543 int32_t *offs; 544 int i; 545 UBool checkOffsets = TRUE; 546 547 int32_t realBufferSize; 548 UChar *realBufferEnd; 549 550 551 for(i=0;i<NEW_MAX_BUFFER;i++) 552 junkout[i] = 0xFFFE; 553 554 for(i=0;i<NEW_MAX_BUFFER;i++) 555 junokout[i] = -1; 556 557 setNuConvTestName(codepage, "TO"); 558 559 log_verbose("\n========= %s\n", gNuConvTestName); 560 561 conv = my_ucnv_open(codepage, &status); 562 563 if(U_FAILURE(status)) 564 { 565 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 566 return TC_FAIL; 567 } 568 if(useFallback){ 569 ucnv_setFallback(conv,useFallback); 570 } 571 log_verbose("Converter opened..\n"); 572 573 src = (const char *)source; 574 targ = junkout; 575 offs = junokout; 576 577 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 578 realBufferEnd = junkout + realBufferSize; 579 realSourceEnd = src + sourcelen; 580 581 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 582 checkOffsets = FALSE; 583 584 do 585 { 586 end = nct_min( targ + gOutBufferSize, realBufferEnd); 587 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 588 589 if(targ == realBufferEnd) 590 { 591 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); 592 return TC_FAIL; 593 } 594 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 595 596 /* oldTarg = targ; */ 597 598 status = U_ZERO_ERROR; 599 600 ucnv_toUnicode (conv, 601 &targ, 602 end, 603 &src, 604 srcLimit, 605 checkOffsets ? offs : NULL, 606 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ 607 &status); 608 609 /* offs += (targ-oldTarg); */ 610 611 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 612 613 if(U_FAILURE(status)) 614 { 615 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 616 return TC_FAIL; 617 } 618 619 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 620 sourcelen, targ-junkout); 621 if(getTestOption(VERBOSITY_OPTION)) 622 { 623 char junk[9999]; 624 char offset_str[9999]; 625 UChar *ptr; 626 627 junk[0] = 0; 628 offset_str[0] = 0; 629 630 for(ptr = junkout;ptr<targ;ptr++) 631 { 632 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 633 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 634 } 635 636 log_verbose(junk); 637 printUSeq(expect, expectlen); 638 if ( checkOffsets ) 639 { 640 log_verbose("\nOffsets:"); 641 log_verbose(offset_str); 642 } 643 log_verbose("\n"); 644 } 645 ucnv_close(conv); 646 647 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 648 649 if (checkOffsets && (expectOffsets != 0)) 650 { 651 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 652 log_err("did not get the expected offsets. %s\n",gNuConvTestName); 653 log_err("Got: "); 654 for(p=junkout;p<targ;p++) { 655 log_err("%d,", junokout[p-junkout]); 656 } 657 log_err("\n"); 658 log_err("Expected: "); 659 for(i=0; i<(targ-junkout); i++) { 660 log_err("%d,", expectOffsets[i]); 661 } 662 log_err("\n"); 663 log_err("output: "); 664 for(i=0; i<(targ-junkout); i++) { 665 log_err("%X,", junkout[i]); 666 } 667 log_err("\n"); 668 log_err("input: "); 669 for(i=0; i<(src-(const char *)source); i++) { 670 log_err("%X,", (unsigned char)source[i]); 671 } 672 log_err("\n"); 673 } 674 } 675 676 if(!memcmp(junkout, expect, expectlen*2)) 677 { 678 log_verbose("Matches!\n"); 679 return TC_OK; 680 } 681 else 682 { 683 log_err("String does not match. %s\n", gNuConvTestName); 684 log_verbose("String does not match. %s\n", gNuConvTestName); 685 printf("\nGot:"); 686 printUSeqErr(junkout, expectlen); 687 printf("\nExpected:"); 688 printUSeqErr(expect, expectlen); 689 return TC_MISMATCH; 690 } 691 } 692 693 694 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 695 { 696 /** test chars #1 */ 697 /* 1 2 3 1Han 2Han 3Han . */ 698 static const UChar sampleText[] = 699 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 }; 700 static const UChar sampleTextRoundTripUnmappable[] = 701 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; 702 703 704 static const uint8_t expectedUTF8[] = 705 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; 706 static const int32_t toUTF8Offs[] = 707 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; 708 static const int32_t fmUTF8Offs[] = 709 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e }; 710 711 #ifdef U_ENABLE_GENERIC_ISO_2022 712 /* Same as UTF8, but with ^[%B preceeding */ 713 static const const uint8_t expectedISO2022[] = 714 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 715 static const int32_t toISO2022Offs[] = 716 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 717 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ 718 static const int32_t fmISO2022Offs[] = 719 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */ 720 #endif 721 722 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ 723 static const uint8_t expectedIBM930[] = 724 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f }; 725 static const int32_t toIBM930Offs[] = 726 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 }; 727 static const int32_t fmIBM930Offs[] = 728 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; 729 730 /* 1 2 3 0 h1 h2 h3 . MBCS*/ 731 static const uint8_t expectedIBM943[] = 732 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc }; 733 static const int32_t toIBM943Offs [] = 734 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 }; 735 static const int32_t fmIBM943Offs[] = 736 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; 737 738 /* 1 2 3 0 h1 h2 h3 . DBCS*/ 739 static const uint8_t expectedIBM9027[] = 740 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; 741 static const int32_t toIBM9027Offs [] = 742 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; 743 744 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 745 static const uint8_t expectedIBM920[] = 746 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; 747 static const int32_t toIBM920Offs [] = 748 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 749 750 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 751 static const uint8_t expectedISO88593[] = 752 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 753 static const int32_t toISO88593Offs[] = 754 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 755 756 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ 757 static const uint8_t expectedLATIN1[] = 758 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 759 static const int32_t toLATIN1Offs[] = 760 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 761 762 763 /* etc */ 764 static const uint8_t expectedUTF16BE[] = 765 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; 766 static const int32_t toUTF16BEOffs[]= 767 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 768 static const int32_t fmUTF16BEOffs[] = 769 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 770 771 static const uint8_t expectedUTF16LE[] = 772 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; 773 static const int32_t toUTF16LEOffs[]= 774 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 775 static const int32_t fmUTF16LEOffs[] = 776 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 777 778 static const uint8_t expectedUTF32BE[] = 779 { 0x00, 0x00, 0x00, 0x31, 780 0x00, 0x00, 0x00, 0x32, 781 0x00, 0x00, 0x00, 0x33, 782 0x00, 0x00, 0x00, 0x00, 783 0x00, 0x00, 0x4e, 0x00, 784 0x00, 0x00, 0x4e, 0x8c, 785 0x00, 0x00, 0x4e, 0x09, 786 0x00, 0x00, 0x00, 0x2e, 787 0x00, 0x02, 0x00, 0x21 }; 788 static const int32_t toUTF32BEOffs[]= 789 { 0x00, 0x00, 0x00, 0x00, 790 0x01, 0x01, 0x01, 0x01, 791 0x02, 0x02, 0x02, 0x02, 792 0x03, 0x03, 0x03, 0x03, 793 0x04, 0x04, 0x04, 0x04, 794 0x05, 0x05, 0x05, 0x05, 795 0x06, 0x06, 0x06, 0x06, 796 0x07, 0x07, 0x07, 0x07, 797 0x08, 0x08, 0x08, 0x08, 798 0x08, 0x08, 0x08, 0x08 }; 799 static const int32_t fmUTF32BEOffs[] = 800 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 801 802 static const uint8_t expectedUTF32LE[] = 803 { 0x31, 0x00, 0x00, 0x00, 804 0x32, 0x00, 0x00, 0x00, 805 0x33, 0x00, 0x00, 0x00, 806 0x00, 0x00, 0x00, 0x00, 807 0x00, 0x4e, 0x00, 0x00, 808 0x8c, 0x4e, 0x00, 0x00, 809 0x09, 0x4e, 0x00, 0x00, 810 0x2e, 0x00, 0x00, 0x00, 811 0x21, 0x00, 0x02, 0x00 }; 812 static const int32_t toUTF32LEOffs[]= 813 { 0x00, 0x00, 0x00, 0x00, 814 0x01, 0x01, 0x01, 0x01, 815 0x02, 0x02, 0x02, 0x02, 816 0x03, 0x03, 0x03, 0x03, 817 0x04, 0x04, 0x04, 0x04, 818 0x05, 0x05, 0x05, 0x05, 819 0x06, 0x06, 0x06, 0x06, 820 0x07, 0x07, 0x07, 0x07, 821 0x08, 0x08, 0x08, 0x08, 822 0x08, 0x08, 0x08, 0x08 }; 823 static const int32_t fmUTF32LEOffs[] = 824 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 825 826 827 828 829 /** Test chars #2 **/ 830 831 /* Sahha [health], slashed h's */ 832 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; 833 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; 834 835 /* LMBCS */ 836 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; 837 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; 838 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; 839 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; 840 /*********************************** START OF CODE finally *************/ 841 842 gInBufferSize = insize; 843 gOutBufferSize = outsize; 844 845 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); 846 847 848 /*UTF-8*/ 849 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 850 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); 851 852 log_verbose("Test surrogate behaviour for UTF8\n"); 853 { 854 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; 855 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, 856 0xf0, 0x90, 0x90, 0x81, 857 0xef, 0xbf, 0xbd 858 }; 859 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; 860 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), 861 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); 862 863 864 } 865 866 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 867 /*ISO-2022*/ 868 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 869 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); 870 #endif 871 872 /*UTF16 LE*/ 873 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 874 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); 875 /*UTF16 BE*/ 876 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 877 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); 878 /*UTF32 LE*/ 879 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 880 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); 881 /*UTF32 BE*/ 882 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 883 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); 884 885 /*LATIN_1*/ 886 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 887 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); 888 889 #if !UCONFIG_NO_LEGACY_CONVERSION 890 /*EBCDIC_STATEFUL*/ 891 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 892 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); 893 894 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 895 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 896 897 /*MBCS*/ 898 899 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 900 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); 901 /*DBCS*/ 902 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 903 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); 904 /*SBCS*/ 905 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 906 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); 907 /*SBCS*/ 908 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 909 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 910 #endif 911 912 913 /****/ 914 915 /*UTF-8*/ 916 testConvertToU(expectedUTF8, sizeof(expectedUTF8), 917 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE); 918 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 919 /*ISO-2022*/ 920 testConvertToU(expectedISO2022, sizeof(expectedISO2022), 921 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE); 922 #endif 923 924 /*UTF16 LE*/ 925 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 926 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 927 /*UTF16 BE*/ 928 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), 929 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE); 930 /*UTF32 LE*/ 931 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), 932 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE); 933 /*UTF32 BE*/ 934 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), 935 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE); 936 937 #if !UCONFIG_NO_LEGACY_CONVERSION 938 /*EBCDIC_STATEFUL*/ 939 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable, 940 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE); 941 /*MBCS*/ 942 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable, 943 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE); 944 #endif 945 946 /* Try it again to make sure it still works */ 947 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 948 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 949 950 #if !UCONFIG_NO_LEGACY_CONVERSION 951 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), 952 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE); 953 954 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), 955 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); 956 957 /*LMBCS*/ 958 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), 959 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); 960 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), 961 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE); 962 #endif 963 964 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ 965 { 966 /* encode directly set D and set O */ 967 static const uint8_t utf7[] = { 968 /* 969 Hi Mom -+Jjo--! 970 A+ImIDkQ. 971 +- 972 +ZeVnLIqe 973 */ 974 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 975 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 976 0x2b, 0x2d, 977 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 978 }; 979 static const UChar unicode[] = { 980 /* 981 Hi Mom -<WHITE SMILING FACE>-! 982 A<NOT IDENTICAL TO><ALPHA>. 983 + 984 [Japanese word "nihongo"] 985 */ 986 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 987 0x41, 0x2262, 0x0391, 0x2e, 988 0x2b, 989 0x65e5, 0x672c, 0x8a9e 990 }; 991 static const int32_t toUnicodeOffsets[] = { 992 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 993 15, 17, 19, 23, 994 24, 995 27, 29, 32 996 }; 997 static const int32_t fromUnicodeOffsets[] = { 998 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 999 11, 12, 12, 12, 13, 13, 13, 13, 14, 1000 15, 15, 1001 16, 16, 16, 17, 17, 17, 18, 18, 18 1002 }; 1003 1004 /* same but escaping set O (the exclamation mark) */ 1005 static const uint8_t utf7Restricted[] = { 1006 /* 1007 Hi Mom -+Jjo--+ACE- 1008 A+ImIDkQ. 1009 +- 1010 +ZeVnLIqe 1011 */ 1012 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, 1013 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 1014 0x2b, 0x2d, 1015 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 1016 }; 1017 static const int32_t toUnicodeOffsetsR[] = { 1018 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, 1019 19, 21, 23, 27, 1020 28, 1021 31, 33, 36 1022 }; 1023 static const int32_t fromUnicodeOffsetsR[] = { 1024 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, 1025 11, 12, 12, 12, 13, 13, 13, 13, 14, 1026 15, 15, 1027 16, 16, 16, 17, 17, 17, 18, 18, 18 1028 }; 1029 1030 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); 1031 1032 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE); 1033 1034 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); 1035 1036 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); 1037 } 1038 1039 /* 1040 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, 1041 * modified according to RFC 2060, 1042 * and supplemented with the one example in RFC 2060 itself. 1043 */ 1044 { 1045 static const uint8_t imap[] = { 1046 /* Hi Mom -&Jjo--! 1047 A&ImIDkQ-. 1048 &- 1049 &ZeVnLIqe- 1050 \ 1051 ~peter 1052 /mail 1053 /&ZeVnLIqe- 1054 /&U,BTFw- 1055 */ 1056 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 1057 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, 1058 0x26, 0x2d, 1059 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1060 0x5c, 1061 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1062 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1063 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1064 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d 1065 }; 1066 static const UChar unicode[] = { 1067 /* Hi Mom -<WHITE SMILING FACE>-! 1068 A<NOT IDENTICAL TO><ALPHA>. 1069 & 1070 [Japanese word "nihongo"] 1071 \ 1072 ~peter 1073 /mail 1074 /<65e5, 672c, 8a9e> 1075 /<53f0, 5317> 1076 */ 1077 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 1078 0x41, 0x2262, 0x0391, 0x2e, 1079 0x26, 1080 0x65e5, 0x672c, 0x8a9e, 1081 0x5c, 1082 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1083 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1084 0x2f, 0x65e5, 0x672c, 0x8a9e, 1085 0x2f, 0x53f0, 0x5317 1086 }; 1087 static const int32_t toUnicodeOffsets[] = { 1088 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 1089 15, 17, 19, 24, 1090 25, 1091 28, 30, 33, 1092 37, 1093 38, 39, 40, 41, 42, 43, 1094 44, 45, 46, 47, 48, 1095 49, 51, 53, 56, 1096 60, 62, 64 1097 }; 1098 static const int32_t fromUnicodeOffsets[] = { 1099 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 1100 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, 1101 15, 15, 1102 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 1103 19, 1104 20, 21, 22, 23, 24, 25, 1105 26, 27, 28, 29, 30, 1106 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 1107 35, 36, 36, 36, 37, 37, 37, 37, 37 1108 }; 1109 1110 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); 1111 1112 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); 1113 } 1114 1115 /* Test UTF-8 bad data handling*/ 1116 { 1117 static const uint8_t utf8[]={ 1118 0x61, 1119 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1120 0x00, 1121 0x62, 1122 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1123 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1124 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ 1125 0xdf, 0xbf, /* 7ff */ 1126 0xbf, /* truncated tail */ 1127 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ 1128 0x02 1129 }; 1130 1131 static const uint16_t utf8Expected[]={ 1132 0x0061, 1133 0xfffd, 1134 0x0000, 1135 0x0062, 1136 0xfffd, 1137 0xfffd, 1138 0xdbff, 0xdfff, 1139 0x07ff, 1140 0xfffd, 1141 0xfffd, 1142 0x0002 1143 }; 1144 1145 static const int32_t utf8Offsets[]={ 1146 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 1147 }; 1148 testConvertToU(utf8, sizeof(utf8), 1149 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE); 1150 1151 } 1152 1153 /* Test UTF-32BE bad data handling*/ 1154 { 1155 static const uint8_t utf32[]={ 1156 0x00, 0x00, 0x00, 0x61, 1157 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 1158 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1159 0x00, 0x00, 0x00, 0x62, 1160 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1161 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 1162 0x00, 0x00, 0x01, 0x62, 1163 0x00, 0x00, 0x02, 0x62 1164 }; 1165 static const uint16_t utf32Expected[]={ 1166 0x0061, 1167 0xfffd, /* 0x110000 out of range */ 1168 0xDBFF, /* 0x10FFFF in range */ 1169 0xDFFF, 1170 0x0062, 1171 0xfffd, /* 0xffffffff out of range */ 1172 0xfffd, /* 0x7fffffff out of range */ 1173 0x0162, 1174 0x0262 1175 }; 1176 static const int32_t utf32Offsets[]={ 1177 0, 4, 8, 8, 12, 16, 20, 24, 28 1178 }; 1179 static const uint8_t utf32ExpectedBack[]={ 1180 0x00, 0x00, 0x00, 0x61, 1181 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ 1182 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1183 0x00, 0x00, 0x00, 0x62, 1184 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ 1185 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ 1186 0x00, 0x00, 0x01, 0x62, 1187 0x00, 0x00, 0x02, 0x62 1188 }; 1189 static const int32_t utf32OffsetsBack[]={ 1190 0,0,0,0, 1191 1,1,1,1, 1192 2,2,2,2, 1193 4,4,4,4, 1194 5,5,5,5, 1195 6,6,6,6, 1196 7,7,7,7, 1197 8,8,8,8 1198 }; 1199 1200 testConvertToU(utf32, sizeof(utf32), 1201 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE); 1202 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1203 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE); 1204 } 1205 1206 /* Test UTF-32LE bad data handling*/ 1207 { 1208 static const uint8_t utf32[]={ 1209 0x61, 0x00, 0x00, 0x00, 1210 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 1211 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1212 0x62, 0x00, 0x00, 0x00, 1213 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1214 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 1215 0x62, 0x01, 0x00, 0x00, 1216 0x62, 0x02, 0x00, 0x00, 1217 }; 1218 1219 static const uint16_t utf32Expected[]={ 1220 0x0061, 1221 0xfffd, /* 0x110000 out of range */ 1222 0xDBFF, /* 0x10FFFF in range */ 1223 0xDFFF, 1224 0x0062, 1225 0xfffd, /* 0xffffffff out of range */ 1226 0xfffd, /* 0x7fffffff out of range */ 1227 0x0162, 1228 0x0262 1229 }; 1230 static const int32_t utf32Offsets[]={ 1231 0, 4, 8, 8, 12, 16, 20, 24, 28 1232 }; 1233 static const uint8_t utf32ExpectedBack[]={ 1234 0x61, 0x00, 0x00, 0x00, 1235 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ 1236 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1237 0x62, 0x00, 0x00, 0x00, 1238 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ 1239 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ 1240 0x62, 0x01, 0x00, 0x00, 1241 0x62, 0x02, 0x00, 0x00 1242 }; 1243 static const int32_t utf32OffsetsBack[]={ 1244 0,0,0,0, 1245 1,1,1,1, 1246 2,2,2,2, 1247 4,4,4,4, 1248 5,5,5,5, 1249 6,6,6,6, 1250 7,7,7,7, 1251 8,8,8,8 1252 }; 1253 testConvertToU(utf32, sizeof(utf32), 1254 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE ); 1255 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1256 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE); 1257 } 1258 } 1259 1260 static void TestCoverageMBCS(){ 1261 #if 0 1262 UErrorCode status = U_ZERO_ERROR; 1263 const char *directory = loadTestData(&status); 1264 char* tdpath = NULL; 1265 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); 1266 int len = strlen(directory); 1267 char* index=NULL; 1268 1269 tdpath = (char*) malloc(sizeof(char) * (len * 2)); 1270 uprv_strcpy(saveDirectory,u_getDataDirectory()); 1271 log_verbose("Retrieved data directory %s \n",saveDirectory); 1272 uprv_strcpy(tdpath,directory); 1273 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); 1274 1275 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ 1276 *(index+1)=0; 1277 } 1278 u_setDataDirectory(tdpath); 1279 log_verbose("ICU data directory is set to: %s \n" ,tdpath); 1280 #endif 1281 1282 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm 1283 which is test file for MBCS conversion with single-byte codepage data.*/ 1284 { 1285 1286 /* MBCS with single byte codepage data test1.ucm*/ 1287 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; 1288 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; 1289 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; 1290 1291 /*from Unicode*/ 1292 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1293 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); 1294 } 1295 1296 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm 1297 which is test file for MBCS conversion with three-byte codepage data.*/ 1298 { 1299 1300 /* MBCS with three byte codepage data test3.ucm*/ 1301 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1302 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; 1303 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; 1304 1305 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; 1306 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1307 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; 1308 1309 /*from Unicode*/ 1310 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1311 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); 1312 1313 /*to Unicode*/ 1314 testConvertToU(test3input, sizeof(test3input), 1315 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE); 1316 1317 } 1318 1319 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm 1320 which is test file for MBCS conversion with four-byte codepage data.*/ 1321 { 1322 1323 /* MBCS with three byte codepage data test4.ucm*/ 1324 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1325 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; 1326 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; 1327 1328 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; 1329 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1330 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; 1331 1332 /*from Unicode*/ 1333 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1334 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); 1335 1336 /*to Unicode*/ 1337 testConvertToU(test4input, sizeof(test4input), 1338 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE ); 1339 1340 } 1341 #if 0 1342 free(tdpath); 1343 /* restore the original data directory */ 1344 log_verbose("Setting the data directory to %s \n", saveDirectory); 1345 u_setDataDirectory(saveDirectory); 1346 free(saveDirectory); 1347 #endif 1348 1349 } 1350 1351 static void TestConverterType(const char *convName, UConverterType convType) { 1352 UConverter* myConverter; 1353 UErrorCode err = U_ZERO_ERROR; 1354 1355 myConverter = my_ucnv_open(convName, &err); 1356 1357 if (U_FAILURE(err)) { 1358 log_data_err("Failed to create an %s converter\n", convName); 1359 return; 1360 } 1361 else 1362 { 1363 if (ucnv_getType(myConverter)!=convType) { 1364 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", 1365 convName, convType); 1366 } 1367 else { 1368 log_verbose("ucnv_getType %s ok\n", convName); 1369 } 1370 } 1371 ucnv_close(myConverter); 1372 } 1373 1374 static void TestConverterTypesAndStarters() 1375 { 1376 #if !UCONFIG_NO_LEGACY_CONVERSION 1377 UConverter* myConverter; 1378 UErrorCode err = U_ZERO_ERROR; 1379 UBool mystarters[256]; 1380 1381 /* const UBool expectedKSCstarters[256] = { 1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1383 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1384 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1385 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1386 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1387 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1388 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1389 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1390 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1391 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1392 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1393 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1394 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1395 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1396 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1397 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1398 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1399 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, 1400 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1401 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1402 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1403 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1404 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1405 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1406 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1407 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ 1408 1409 1410 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); 1411 1412 myConverter = ucnv_open("ksc", &err); 1413 if (U_FAILURE(err)) { 1414 log_data_err("Failed to create an ibm-ksc converter\n"); 1415 return; 1416 } 1417 else 1418 { 1419 if (ucnv_getType(myConverter)!=UCNV_MBCS) 1420 log_err("ucnv_getType Failed for ibm-949\n"); 1421 else 1422 log_verbose("ucnv_getType ibm-949 ok\n"); 1423 1424 if(myConverter!=NULL) 1425 ucnv_getStarters(myConverter, mystarters, &err); 1426 1427 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) 1428 log_err("Failed ucnv_getStarters for ksc\n"); 1429 else 1430 log_verbose("ucnv_getStarters ok\n");*/ 1431 1432 } 1433 ucnv_close(myConverter); 1434 1435 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); 1436 TestConverterType("ibm-878", UCNV_SBCS); 1437 #endif 1438 1439 TestConverterType("iso-8859-1", UCNV_LATIN_1); 1440 1441 TestConverterType("ibm-1208", UCNV_UTF8); 1442 1443 TestConverterType("utf-8", UCNV_UTF8); 1444 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); 1445 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); 1446 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); 1447 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); 1448 1449 #if !UCONFIG_NO_LEGACY_CONVERSION 1450 1451 #if defined(U_ENABLE_GENERIC_ISO_2022) 1452 TestConverterType("iso-2022", UCNV_ISO_2022); 1453 #endif 1454 1455 TestConverterType("hz", UCNV_HZ); 1456 #endif 1457 1458 TestConverterType("scsu", UCNV_SCSU); 1459 1460 #if !UCONFIG_NO_LEGACY_CONVERSION 1461 TestConverterType("x-iscii-de", UCNV_ISCII); 1462 #endif 1463 1464 TestConverterType("ascii", UCNV_US_ASCII); 1465 TestConverterType("utf-7", UCNV_UTF7); 1466 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); 1467 TestConverterType("bocu-1", UCNV_BOCU1); 1468 } 1469 1470 static void 1471 TestAmbiguousConverter(UConverter *cnv) { 1472 static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; 1473 UChar outUnicode[20]={ 0, 0, 0, 0 }; 1474 1475 const char *s; 1476 UChar *u; 1477 UErrorCode errorCode; 1478 UBool isAmbiguous; 1479 1480 /* try to convert an 'a', a square bracket and a US-ASCII backslash */ 1481 errorCode=U_ZERO_ERROR; 1482 s=inBytes; 1483 u=outUnicode; 1484 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); 1485 if(U_FAILURE(errorCode)) { 1486 /* we do not care about general failures in this test; the input may just not be mappable */ 1487 return; 1488 } 1489 1490 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { 1491 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ 1492 /* There are some encodings that are partially ASCII based, 1493 like the ISO-7 and GSM series of codepages, which we ignore. */ 1494 return; 1495 } 1496 1497 isAmbiguous=ucnv_isAmbiguous(cnv); 1498 1499 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ 1500 if((outUnicode[2]!=0x5c)!=isAmbiguous) { 1501 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", 1502 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); 1503 return; 1504 } 1505 1506 if(outUnicode[2]!=0x5c) { 1507 /* needs fixup, fix it */ 1508 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); 1509 if(outUnicode[2]!=0x5c) { 1510 /* the fix failed */ 1511 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); 1512 return; 1513 } 1514 } 1515 } 1516 1517 static void TestAmbiguous() 1518 { 1519 UErrorCode status = U_ZERO_ERROR; 1520 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; 1521 static const char target[] = { 1522 /* "\\usr\\local\\share\\data\\icutest.txt" */ 1523 0x5c, 0x75, 0x73, 0x72, 1524 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 1525 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, 1526 0x5c, 0x64, 0x61, 0x74, 0x61, 1527 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, 1528 0 1529 }; 1530 UChar asciiResult[200], sjisResult[200]; 1531 int32_t /*asciiLength = 0,*/ sjisLength = 0, i; 1532 const char *name; 1533 1534 /* enumerate all converters */ 1535 status=U_ZERO_ERROR; 1536 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { 1537 cnv=ucnv_open(name, &status); 1538 if(U_SUCCESS(status)) { 1539 /* BEGIN android-changed 1540 To save space, Android does not build full ISO2022 CJK tables. 1541 We skip the tests for ISO-2022. */ 1542 const char* cnvName = ucnv_getName(cnv, &status); 1543 if (strlen(cnvName) < 8 || 1544 strncmp(cnvName, "ISO_2022", 8) != 0) { 1545 TestAmbiguousConverter(cnv); 1546 } 1547 /* END android-changed */ 1548 ucnv_close(cnv); 1549 } else { 1550 log_err("error: unable to open available converter \"%s\"\n", name); 1551 status=U_ZERO_ERROR; 1552 } 1553 } 1554 1555 #if !UCONFIG_NO_LEGACY_CONVERSION 1556 sjis_cnv = ucnv_open("ibm-943", &status); 1557 if (U_FAILURE(status)) 1558 { 1559 log_data_err("Failed to create a SJIS converter\n"); 1560 return; 1561 } 1562 ascii_cnv = ucnv_open("LATIN-1", &status); 1563 if (U_FAILURE(status)) 1564 { 1565 log_data_err("Failed to create a LATIN-1 converter\n"); 1566 ucnv_close(sjis_cnv); 1567 return; 1568 } 1569 /* convert target from SJIS to Unicode */ 1570 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1571 if (U_FAILURE(status)) 1572 { 1573 log_err("Failed to convert the SJIS string.\n"); 1574 ucnv_close(sjis_cnv); 1575 ucnv_close(ascii_cnv); 1576 return; 1577 } 1578 /* convert target from Latin-1 to Unicode */ 1579 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1580 if (U_FAILURE(status)) 1581 { 1582 log_err("Failed to convert the Latin-1 string.\n"); 1583 ucnv_close(sjis_cnv); 1584 ucnv_close(ascii_cnv); 1585 return; 1586 } 1587 if (!ucnv_isAmbiguous(sjis_cnv)) 1588 { 1589 log_err("SJIS converter should contain ambiguous character mappings.\n"); 1590 ucnv_close(sjis_cnv); 1591 ucnv_close(ascii_cnv); 1592 return; 1593 } 1594 if (u_strcmp(sjisResult, asciiResult) == 0) 1595 { 1596 log_err("File separators for SJIS don't need to be fixed.\n"); 1597 } 1598 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); 1599 if (u_strcmp(sjisResult, asciiResult) != 0) 1600 { 1601 log_err("Fixing file separator for SJIS failed.\n"); 1602 } 1603 ucnv_close(sjis_cnv); 1604 ucnv_close(ascii_cnv); 1605 #endif 1606 } 1607 1608 static void 1609 TestSignatureDetection(){ 1610 /* with null terminated strings */ 1611 { 1612 static const char* data[] = { 1613 "\xFE\xFF\x00\x00", /* UTF-16BE */ 1614 "\xFF\xFE\x00\x00", /* UTF-16LE */ 1615 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1616 "\x0E\xFE\xFF\x00", /* SCSU */ 1617 1618 "\xFE\xFF", /* UTF-16BE */ 1619 "\xFF\xFE", /* UTF-16LE */ 1620 "\xEF\xBB\xBF", /* UTF-8 */ 1621 "\x0E\xFE\xFF", /* SCSU */ 1622 1623 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1624 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1625 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1626 "\x0E\xFE\xFF\x41", /* SCSU */ 1627 1628 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ 1629 "\x2B\x2F\x76\x38\x41", /* UTF-7 */ 1630 "\x2B\x2F\x76\x39\x41", /* UTF-7 */ 1631 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ 1632 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ 1633 1634 "\xDD\x73\x66\x73" /* UTF-EBCDIC */ 1635 }; 1636 static const char* expected[] = { 1637 "UTF-16BE", 1638 "UTF-16LE", 1639 "UTF-8", 1640 "SCSU", 1641 1642 "UTF-16BE", 1643 "UTF-16LE", 1644 "UTF-8", 1645 "SCSU", 1646 1647 "UTF-16BE", 1648 "UTF-16LE", 1649 "UTF-8", 1650 "SCSU", 1651 1652 "UTF-7", 1653 "UTF-7", 1654 "UTF-7", 1655 "UTF-7", 1656 "UTF-7", 1657 "UTF-EBCDIC" 1658 }; 1659 static const int32_t expectedLength[] ={ 1660 2, 1661 2, 1662 3, 1663 3, 1664 1665 2, 1666 2, 1667 3, 1668 3, 1669 1670 2, 1671 2, 1672 3, 1673 3, 1674 1675 5, 1676 4, 1677 4, 1678 4, 1679 4, 1680 4 1681 }; 1682 int i=0; 1683 UErrorCode err; 1684 int32_t signatureLength = -1; 1685 const char* source = NULL; 1686 const char* enc = NULL; 1687 for( ; i<sizeof(data)/sizeof(char*); i++){ 1688 err = U_ZERO_ERROR; 1689 source = data[i]; 1690 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err); 1691 if(U_FAILURE(err)){ 1692 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1693 continue; 1694 } 1695 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1696 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1697 continue; 1698 } 1699 if(signatureLength != expectedLength[i]){ 1700 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1701 } 1702 } 1703 } 1704 { 1705 static const char* data[] = { 1706 "\xFE\xFF\x00", /* UTF-16BE */ 1707 "\xFF\xFE\x00", /* UTF-16LE */ 1708 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1709 "\x0E\xFE\xFF\x00", /* SCSU */ 1710 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1711 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1712 "\xFE\xFF", /* UTF-16BE */ 1713 "\xFF\xFE", /* UTF-16LE */ 1714 "\xEF\xBB\xBF", /* UTF-8 */ 1715 "\x0E\xFE\xFF", /* SCSU */ 1716 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1717 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1718 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1719 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1720 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1721 "\x0E\xFE\xFF\x41", /* SCSU */ 1722 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ 1723 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ 1724 "\xFB\xEE\x28", /* BOCU-1 */ 1725 "\xFF\x41\x42" /* NULL */ 1726 }; 1727 static const int len[] = { 1728 3, 1729 3, 1730 4, 1731 4, 1732 4, 1733 4, 1734 2, 1735 2, 1736 3, 1737 3, 1738 4, 1739 4, 1740 4, 1741 4, 1742 4, 1743 4, 1744 5, 1745 5, 1746 3, 1747 3 1748 }; 1749 1750 static const char* expected[] = { 1751 "UTF-16BE", 1752 "UTF-16LE", 1753 "UTF-8", 1754 "SCSU", 1755 "UTF-32BE", 1756 "UTF-32LE", 1757 "UTF-16BE", 1758 "UTF-16LE", 1759 "UTF-8", 1760 "SCSU", 1761 "UTF-32BE", 1762 "UTF-32LE", 1763 "UTF-16BE", 1764 "UTF-16LE", 1765 "UTF-8", 1766 "SCSU", 1767 "UTF-32BE", 1768 "UTF-32LE", 1769 "BOCU-1", 1770 NULL 1771 }; 1772 static const int32_t expectedLength[] ={ 1773 2, 1774 2, 1775 3, 1776 3, 1777 4, 1778 4, 1779 2, 1780 2, 1781 3, 1782 3, 1783 4, 1784 4, 1785 2, 1786 2, 1787 3, 1788 3, 1789 4, 1790 4, 1791 3, 1792 0 1793 }; 1794 int i=0; 1795 UErrorCode err; 1796 int32_t signatureLength = -1; 1797 int32_t sourceLength=-1; 1798 const char* source = NULL; 1799 const char* enc = NULL; 1800 for( ; i<sizeof(data)/sizeof(char*); i++){ 1801 err = U_ZERO_ERROR; 1802 source = data[i]; 1803 sourceLength = len[i]; 1804 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err); 1805 if(U_FAILURE(err)){ 1806 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1807 continue; 1808 } 1809 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1810 if(expected[i] !=NULL){ 1811 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1812 continue; 1813 } 1814 } 1815 if(signatureLength != expectedLength[i]){ 1816 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1817 } 1818 } 1819 } 1820 } 1821 1822 static void TestUTF7() { 1823 /* test input */ 1824 static const uint8_t in[]={ 1825 /* H - +Jjo- - ! +- +2AHcAQ */ 1826 0x48, 1827 0x2d, 1828 0x2b, 0x4a, 0x6a, 0x6f, 1829 0x2d, 0x2d, 1830 0x21, 1831 0x2b, 0x2d, 1832 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 1833 }; 1834 1835 /* expected test results */ 1836 static const int32_t results[]={ 1837 /* number of bytes read, code point */ 1838 1, 0x48, 1839 1, 0x2d, 1840 4, 0x263a, /* <WHITE SMILING FACE> */ 1841 2, 0x2d, 1842 1, 0x21, 1843 2, 0x2b, 1844 7, 0x10401 1845 }; 1846 1847 const char *cnvName; 1848 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1849 UErrorCode errorCode=U_ZERO_ERROR; 1850 UConverter *cnv=ucnv_open("UTF-7", &errorCode); 1851 if(U_FAILURE(errorCode)) { 1852 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1853 return; 1854 } 1855 TestNextUChar(cnv, source, limit, results, "UTF-7"); 1856 /* Test the condition when source >= sourceLimit */ 1857 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1858 cnvName = ucnv_getName(cnv, &errorCode); 1859 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { 1860 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1861 } 1862 ucnv_close(cnv); 1863 } 1864 1865 static void TestIMAP() { 1866 /* test input */ 1867 static const uint8_t in[]={ 1868 /* H - &Jjo- - ! &- &2AHcAQ- \ */ 1869 0x48, 1870 0x2d, 1871 0x26, 0x4a, 0x6a, 0x6f, 1872 0x2d, 0x2d, 1873 0x21, 1874 0x26, 0x2d, 1875 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d 1876 }; 1877 1878 /* expected test results */ 1879 static const int32_t results[]={ 1880 /* number of bytes read, code point */ 1881 1, 0x48, 1882 1, 0x2d, 1883 4, 0x263a, /* <WHITE SMILING FACE> */ 1884 2, 0x2d, 1885 1, 0x21, 1886 2, 0x26, 1887 7, 0x10401 1888 }; 1889 1890 const char *cnvName; 1891 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1892 UErrorCode errorCode=U_ZERO_ERROR; 1893 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); 1894 if(U_FAILURE(errorCode)) { 1895 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1896 return; 1897 } 1898 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); 1899 /* Test the condition when source >= sourceLimit */ 1900 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1901 cnvName = ucnv_getName(cnv, &errorCode); 1902 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { 1903 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1904 } 1905 ucnv_close(cnv); 1906 } 1907 1908 static void TestUTF8() { 1909 /* test input */ 1910 static const uint8_t in[]={ 1911 0x61, 1912 0xc2, 0x80, 1913 0xe0, 0xa0, 0x80, 1914 0xf0, 0x90, 0x80, 0x80, 1915 0xf4, 0x84, 0x8c, 0xa1, 1916 0xf0, 0x90, 0x90, 0x81 1917 }; 1918 1919 /* expected test results */ 1920 static const int32_t results[]={ 1921 /* number of bytes read, code point */ 1922 1, 0x61, 1923 2, 0x80, 1924 3, 0x800, 1925 4, 0x10000, 1926 4, 0x104321, 1927 4, 0x10401 1928 }; 1929 1930 /* error test input */ 1931 static const uint8_t in2[]={ 1932 0x61, 1933 0xc0, 0x80, /* illegal non-shortest form */ 1934 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1935 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1936 0xc0, 0xc0, /* illegal trail byte */ 1937 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1938 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1939 0xfe, /* illegal byte altogether */ 1940 0x62 1941 }; 1942 1943 /* expected error test results */ 1944 static const int32_t results2[]={ 1945 /* number of bytes read, code point */ 1946 1, 0x61, 1947 22, 0x62 1948 }; 1949 1950 UConverterToUCallback cb; 1951 const void *p; 1952 1953 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 1954 UErrorCode errorCode=U_ZERO_ERROR; 1955 UConverter *cnv=ucnv_open("UTF-8", &errorCode); 1956 if(U_FAILURE(errorCode)) { 1957 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); 1958 return; 1959 } 1960 TestNextUChar(cnv, source, limit, results, "UTF-8"); 1961 /* Test the condition when source >= sourceLimit */ 1962 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1963 1964 /* test error behavior with a skip callback */ 1965 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 1966 source=(const char *)in2; 1967 limit=(const char *)(in2+sizeof(in2)); 1968 TestNextUChar(cnv, source, limit, results2, "UTF-8"); 1969 1970 ucnv_close(cnv); 1971 } 1972 1973 static void TestCESU8() { 1974 /* test input */ 1975 static const uint8_t in[]={ 1976 0x61, 1977 0xc2, 0x80, 1978 0xe0, 0xa0, 0x80, 1979 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 1980 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, 1981 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1982 0xef, 0xbf, 0xbc 1983 }; 1984 1985 /* expected test results */ 1986 static const int32_t results[]={ 1987 /* number of bytes read, code point */ 1988 1, 0x61, 1989 2, 0x80, 1990 3, 0x800, 1991 6, 0x10000, 1992 3, 0xdc01, 1993 -1,0xd802, /* may read 3 or 6 bytes */ 1994 -1,0x10ffff,/* may read 0 or 3 bytes */ 1995 3, 0xfffc 1996 }; 1997 1998 /* error test input */ 1999 static const uint8_t in2[]={ 2000 0x61, 2001 0xc0, 0x80, /* illegal non-shortest form */ 2002 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 2003 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 2004 0xc0, 0xc0, /* illegal trail byte */ 2005 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ 2006 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ 2007 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ 2008 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 2009 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 2010 0xfe, /* illegal byte altogether */ 2011 0x62 2012 }; 2013 2014 /* expected error test results */ 2015 static const int32_t results2[]={ 2016 /* number of bytes read, code point */ 2017 1, 0x61, 2018 34, 0x62 2019 }; 2020 2021 UConverterToUCallback cb; 2022 const void *p; 2023 2024 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 2025 UErrorCode errorCode=U_ZERO_ERROR; 2026 UConverter *cnv=ucnv_open("CESU-8", &errorCode); 2027 if(U_FAILURE(errorCode)) { 2028 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); 2029 return; 2030 } 2031 TestNextUChar(cnv, source, limit, results, "CESU-8"); 2032 /* Test the condition when source >= sourceLimit */ 2033 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2034 2035 /* test error behavior with a skip callback */ 2036 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2037 source=(const char *)in2; 2038 limit=(const char *)(in2+sizeof(in2)); 2039 TestNextUChar(cnv, source, limit, results2, "CESU-8"); 2040 2041 ucnv_close(cnv); 2042 } 2043 2044 static void TestUTF16() { 2045 /* test input */ 2046 static const uint8_t in1[]={ 2047 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff 2048 }; 2049 static const uint8_t in2[]={ 2050 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff 2051 }; 2052 static const uint8_t in3[]={ 2053 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 2054 }; 2055 2056 /* expected test results */ 2057 static const int32_t results1[]={ 2058 /* number of bytes read, code point */ 2059 4, 0x4e00, 2060 2, 0xfeff 2061 }; 2062 static const int32_t results2[]={ 2063 /* number of bytes read, code point */ 2064 4, 0x004e, 2065 2, 0xfffe 2066 }; 2067 static const int32_t results3[]={ 2068 /* number of bytes read, code point */ 2069 2, 0xfefe, 2070 2, 0x4e00, 2071 2, 0xfeff, 2072 4, 0x20001 2073 }; 2074 2075 const char *source, *limit; 2076 2077 UErrorCode errorCode=U_ZERO_ERROR; 2078 UConverter *cnv=ucnv_open("UTF-16", &errorCode); 2079 if(U_FAILURE(errorCode)) { 2080 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); 2081 return; 2082 } 2083 2084 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2085 TestNextUChar(cnv, source, limit, results1, "UTF-16"); 2086 2087 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2088 ucnv_resetToUnicode(cnv); 2089 TestNextUChar(cnv, source, limit, results2, "UTF-16"); 2090 2091 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2092 ucnv_resetToUnicode(cnv); 2093 TestNextUChar(cnv, source, limit, results3, "UTF-16"); 2094 2095 /* Test the condition when source >= sourceLimit */ 2096 ucnv_resetToUnicode(cnv); 2097 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2098 2099 ucnv_close(cnv); 2100 } 2101 2102 static void TestUTF16BE() { 2103 /* test input */ 2104 static const uint8_t in[]={ 2105 0x00, 0x61, 2106 0x00, 0xc0, 2107 0x00, 0x31, 2108 0x00, 0xf4, 2109 0xce, 0xfe, 2110 0xd8, 0x01, 0xdc, 0x01 2111 }; 2112 2113 /* expected test results */ 2114 static const int32_t results[]={ 2115 /* number of bytes read, code point */ 2116 2, 0x61, 2117 2, 0xc0, 2118 2, 0x31, 2119 2, 0xf4, 2120 2, 0xcefe, 2121 4, 0x10401 2122 }; 2123 2124 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2125 UErrorCode errorCode=U_ZERO_ERROR; 2126 UConverter *cnv=ucnv_open("utf-16be", &errorCode); 2127 if(U_FAILURE(errorCode)) { 2128 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); 2129 return; 2130 } 2131 TestNextUChar(cnv, source, limit, results, "UTF-16BE"); 2132 /* Test the condition when source >= sourceLimit */ 2133 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2134 /*Test for the condition where there is an invalid character*/ 2135 { 2136 static const uint8_t source2[]={0x61}; 2137 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2138 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2139 } 2140 #if 0 2141 /* 2142 * Test disabled because currently the UTF-16BE/LE converters are supposed 2143 * to not set errors for unpaired surrogates. 2144 * This may change with 2145 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2146 */ 2147 2148 /*Test for the condition where there is a surrogate pair*/ 2149 { 2150 const uint8_t source2[]={0xd8, 0x01}; 2151 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2152 } 2153 #endif 2154 ucnv_close(cnv); 2155 } 2156 2157 static void 2158 TestUTF16LE() { 2159 /* test input */ 2160 static const uint8_t in[]={ 2161 0x61, 0x00, 2162 0x31, 0x00, 2163 0x4e, 0x2e, 2164 0x4e, 0x00, 2165 0x01, 0xd8, 0x01, 0xdc 2166 }; 2167 2168 /* expected test results */ 2169 static const int32_t results[]={ 2170 /* number of bytes read, code point */ 2171 2, 0x61, 2172 2, 0x31, 2173 2, 0x2e4e, 2174 2, 0x4e, 2175 4, 0x10401 2176 }; 2177 2178 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2179 UErrorCode errorCode=U_ZERO_ERROR; 2180 UConverter *cnv=ucnv_open("utf-16le", &errorCode); 2181 if(U_FAILURE(errorCode)) { 2182 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); 2183 return; 2184 } 2185 TestNextUChar(cnv, source, limit, results, "UTF-16LE"); 2186 /* Test the condition when source >= sourceLimit */ 2187 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2188 /*Test for the condition where there is an invalid character*/ 2189 { 2190 static const uint8_t source2[]={0x61}; 2191 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2192 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2193 } 2194 #if 0 2195 /* 2196 * Test disabled because currently the UTF-16BE/LE converters are supposed 2197 * to not set errors for unpaired surrogates. 2198 * This may change with 2199 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2200 */ 2201 2202 /*Test for the condition where there is a surrogate character*/ 2203 { 2204 static const uint8_t source2[]={0x01, 0xd8}; 2205 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2206 } 2207 #endif 2208 2209 ucnv_close(cnv); 2210 } 2211 2212 static void TestUTF32() { 2213 /* test input */ 2214 static const uint8_t in1[]={ 2215 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff 2216 }; 2217 static const uint8_t in2[]={ 2218 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 2219 }; 2220 static const uint8_t in3[]={ 2221 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 2222 }; 2223 2224 /* expected test results */ 2225 static const int32_t results1[]={ 2226 /* number of bytes read, code point */ 2227 8, 0x100f00, 2228 4, 0xfeff 2229 }; 2230 static const int32_t results2[]={ 2231 /* number of bytes read, code point */ 2232 8, 0x0f1000, 2233 4, 0xfffe 2234 }; 2235 static const int32_t results3[]={ 2236 /* number of bytes read, code point */ 2237 4, 0xfefe, 2238 4, 0x100f00, 2239 4, 0xfffd, /* unmatched surrogate */ 2240 4, 0xfffd /* unmatched surrogate */ 2241 }; 2242 2243 const char *source, *limit; 2244 2245 UErrorCode errorCode=U_ZERO_ERROR; 2246 UConverter *cnv=ucnv_open("UTF-32", &errorCode); 2247 if(U_FAILURE(errorCode)) { 2248 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); 2249 return; 2250 } 2251 2252 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2253 TestNextUChar(cnv, source, limit, results1, "UTF-32"); 2254 2255 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2256 ucnv_resetToUnicode(cnv); 2257 TestNextUChar(cnv, source, limit, results2, "UTF-32"); 2258 2259 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2260 ucnv_resetToUnicode(cnv); 2261 TestNextUChar(cnv, source, limit, results3, "UTF-32"); 2262 2263 /* Test the condition when source >= sourceLimit */ 2264 ucnv_resetToUnicode(cnv); 2265 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2266 2267 ucnv_close(cnv); 2268 } 2269 2270 static void 2271 TestUTF32BE() { 2272 /* test input */ 2273 static const uint8_t in[]={ 2274 0x00, 0x00, 0x00, 0x61, 2275 0x00, 0x00, 0x30, 0x61, 2276 0x00, 0x00, 0xdc, 0x00, 2277 0x00, 0x00, 0xd8, 0x00, 2278 0x00, 0x00, 0xdf, 0xff, 2279 0x00, 0x00, 0xff, 0xfe, 2280 0x00, 0x10, 0xab, 0xcd, 2281 0x00, 0x10, 0xff, 0xff 2282 }; 2283 2284 /* expected test results */ 2285 static const int32_t results[]={ 2286 /* number of bytes read, code point */ 2287 4, 0x61, 2288 4, 0x3061, 2289 4, 0xfffd, 2290 4, 0xfffd, 2291 4, 0xfffd, 2292 4, 0xfffe, 2293 4, 0x10abcd, 2294 4, 0x10ffff 2295 }; 2296 2297 /* error test input */ 2298 static const uint8_t in2[]={ 2299 0x00, 0x00, 0x00, 0x61, 2300 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 2301 0x00, 0x00, 0x00, 0x62, 2302 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2303 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 2304 0x00, 0x00, 0x01, 0x62, 2305 0x00, 0x00, 0x02, 0x62 2306 }; 2307 2308 /* expected error test results */ 2309 static const int32_t results2[]={ 2310 /* number of bytes read, code point */ 2311 4, 0x61, 2312 8, 0x62, 2313 12, 0x162, 2314 4, 0x262 2315 }; 2316 2317 UConverterToUCallback cb; 2318 const void *p; 2319 2320 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2321 UErrorCode errorCode=U_ZERO_ERROR; 2322 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); 2323 if(U_FAILURE(errorCode)) { 2324 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); 2325 return; 2326 } 2327 TestNextUChar(cnv, source, limit, results, "UTF-32BE"); 2328 2329 /* Test the condition when source >= sourceLimit */ 2330 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2331 2332 /* test error behavior with a skip callback */ 2333 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2334 source=(const char *)in2; 2335 limit=(const char *)(in2+sizeof(in2)); 2336 TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); 2337 2338 ucnv_close(cnv); 2339 } 2340 2341 static void 2342 TestUTF32LE() { 2343 /* test input */ 2344 static const uint8_t in[]={ 2345 0x61, 0x00, 0x00, 0x00, 2346 0x61, 0x30, 0x00, 0x00, 2347 0x00, 0xdc, 0x00, 0x00, 2348 0x00, 0xd8, 0x00, 0x00, 2349 0xff, 0xdf, 0x00, 0x00, 2350 0xfe, 0xff, 0x00, 0x00, 2351 0xcd, 0xab, 0x10, 0x00, 2352 0xff, 0xff, 0x10, 0x00 2353 }; 2354 2355 /* expected test results */ 2356 static const int32_t results[]={ 2357 /* number of bytes read, code point */ 2358 4, 0x61, 2359 4, 0x3061, 2360 4, 0xfffd, 2361 4, 0xfffd, 2362 4, 0xfffd, 2363 4, 0xfffe, 2364 4, 0x10abcd, 2365 4, 0x10ffff 2366 }; 2367 2368 /* error test input */ 2369 static const uint8_t in2[]={ 2370 0x61, 0x00, 0x00, 0x00, 2371 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 2372 0x62, 0x00, 0x00, 0x00, 2373 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2374 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 2375 0x62, 0x01, 0x00, 0x00, 2376 0x62, 0x02, 0x00, 0x00, 2377 }; 2378 2379 /* expected error test results */ 2380 static const int32_t results2[]={ 2381 /* number of bytes read, code point */ 2382 4, 0x61, 2383 8, 0x62, 2384 12, 0x162, 2385 4, 0x262, 2386 }; 2387 2388 UConverterToUCallback cb; 2389 const void *p; 2390 2391 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2392 UErrorCode errorCode=U_ZERO_ERROR; 2393 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); 2394 if(U_FAILURE(errorCode)) { 2395 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); 2396 return; 2397 } 2398 TestNextUChar(cnv, source, limit, results, "UTF-32LE"); 2399 2400 /* Test the condition when source >= sourceLimit */ 2401 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2402 2403 /* test error behavior with a skip callback */ 2404 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2405 source=(const char *)in2; 2406 limit=(const char *)(in2+sizeof(in2)); 2407 TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); 2408 2409 ucnv_close(cnv); 2410 } 2411 2412 static void 2413 TestLATIN1() { 2414 /* test input */ 2415 static const uint8_t in[]={ 2416 0x61, 2417 0x31, 2418 0x32, 2419 0xc0, 2420 0xf0, 2421 0xf4, 2422 }; 2423 2424 /* expected test results */ 2425 static const int32_t results[]={ 2426 /* number of bytes read, code point */ 2427 1, 0x61, 2428 1, 0x31, 2429 1, 0x32, 2430 1, 0xc0, 2431 1, 0xf0, 2432 1, 0xf4, 2433 }; 2434 static const uint16_t in1[] = { 2435 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2436 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2437 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2438 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2439 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2440 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2441 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2442 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2443 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2444 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2445 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2446 0xcb, 0x82 2447 }; 2448 static const uint8_t out1[] = { 2449 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2450 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2451 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2452 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2453 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2454 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2455 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2456 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2457 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2458 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2459 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2460 0xcb, 0x82 2461 }; 2462 static const uint16_t in2[]={ 2463 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2464 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2465 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2466 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2467 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2468 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2469 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2470 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2471 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2472 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2473 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2474 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2475 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2476 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2477 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2478 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2479 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2480 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2481 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2482 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2483 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2484 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2485 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2486 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2487 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2488 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2489 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2490 0x37, 0x20, 0x2A, 0x2F, 2491 }; 2492 static const unsigned char out2[]={ 2493 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2494 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2495 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2496 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2497 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2498 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2499 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2500 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2501 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2502 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2503 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2504 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2505 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2506 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2507 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2508 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2509 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2510 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2511 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2512 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2513 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2514 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2515 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2516 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2517 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2518 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2519 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2520 0x37, 0x20, 0x2A, 0x2F, 2521 }; 2522 const char *source=(const char *)in; 2523 const char *limit=(const char *)in+sizeof(in); 2524 2525 UErrorCode errorCode=U_ZERO_ERROR; 2526 UConverter *cnv=ucnv_open("LATIN_1", &errorCode); 2527 if(U_FAILURE(errorCode)) { 2528 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode)); 2529 return; 2530 } 2531 TestNextUChar(cnv, source, limit, results, "LATIN_1"); 2532 /* Test the condition when source >= sourceLimit */ 2533 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2534 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1)); 2535 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2)); 2536 2537 ucnv_close(cnv); 2538 } 2539 2540 static void 2541 TestSBCS() { 2542 /* test input */ 2543 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; 2544 /* expected test results */ 2545 static const int32_t results[]={ 2546 /* number of bytes read, code point */ 2547 1, 0x61, 2548 1, 0xbf, 2549 1, 0xc4, 2550 1, 0x2021, 2551 1, 0xf8ff, 2552 1, 0x00d9 2553 }; 2554 2555 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2556 UErrorCode errorCode=U_ZERO_ERROR; 2557 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); 2558 if(U_FAILURE(errorCode)) { 2559 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode)); 2560 return; 2561 } 2562 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); 2563 /* Test the condition when source >= sourceLimit */ 2564 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2565 /*Test for Illegal character */ /* 2566 { 2567 static const uint8_t input1[]={ 0xA1 }; 2568 const char* illegalsource=(const char*)input1; 2569 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte"); 2570 } 2571 */ 2572 ucnv_close(cnv); 2573 } 2574 2575 static void 2576 TestDBCS() { 2577 /* test input */ 2578 static const uint8_t in[]={ 2579 0x44, 0x6a, 2580 0xc4, 0x9c, 2581 0x7a, 0x74, 2582 0x46, 0xab, 2583 0x42, 0x5b, 2584 2585 }; 2586 2587 /* expected test results */ 2588 static const int32_t results[]={ 2589 /* number of bytes read, code point */ 2590 2, 0x00a7, 2591 2, 0xe1d2, 2592 2, 0x6962, 2593 2, 0xf842, 2594 2, 0xffe5, 2595 }; 2596 2597 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2598 UErrorCode errorCode=U_ZERO_ERROR; 2599 2600 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); 2601 if(U_FAILURE(errorCode)) { 2602 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode)); 2603 return; 2604 } 2605 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); 2606 /* Test the condition when source >= sourceLimit */ 2607 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2608 /*Test for the condition where there is an invalid character*/ 2609 { 2610 static const uint8_t source2[]={0x1a, 0x1b}; 2611 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2612 } 2613 /*Test for the condition where we have a truncated char*/ 2614 { 2615 static const uint8_t source1[]={0xc4}; 2616 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2617 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2618 } 2619 ucnv_close(cnv); 2620 } 2621 2622 static void 2623 TestMBCS() { 2624 /* test input */ 2625 static const uint8_t in[]={ 2626 0x01, 2627 0xa6, 0xa3, 2628 0x00, 2629 0xa6, 0xa1, 2630 0x08, 2631 0xc2, 0x76, 2632 0xc2, 0x78, 2633 2634 }; 2635 2636 /* expected test results */ 2637 static const int32_t results[]={ 2638 /* number of bytes read, code point */ 2639 1, 0x0001, 2640 2, 0x250c, 2641 1, 0x0000, 2642 2, 0x2500, 2643 1, 0x0008, 2644 2, 0xd60c, 2645 2, 0xd60e, 2646 }; 2647 2648 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2649 UErrorCode errorCode=U_ZERO_ERROR; 2650 2651 UConverter *cnv=ucnv_open("ibm-1363", &errorCode); 2652 if(U_FAILURE(errorCode)) { 2653 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode)); 2654 return; 2655 } 2656 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); 2657 /* Test the condition when source >= sourceLimit */ 2658 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2659 /*Test for the condition where there is an invalid character*/ 2660 { 2661 static const uint8_t source2[]={0xa1, 0x80}; 2662 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2663 } 2664 /*Test for the condition where we have a truncated char*/ 2665 { 2666 static const uint8_t source1[]={0xc4}; 2667 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2668 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2669 } 2670 ucnv_close(cnv); 2671 2672 } 2673 2674 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 2675 static void 2676 TestICCRunout() { 2677 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */ 2678 2679 const char *cnvName = "ibm-1363"; 2680 UErrorCode status = U_ZERO_ERROR; 2681 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 }; 2682 /* UChar expectUData[] = { 0x00a1, 0x001a }; */ 2683 const char *source = sourceData; 2684 const char *sourceLim = sourceData+sizeof(sourceData); 2685 UChar c1, c2, c3; 2686 UConverter *cnv=ucnv_open(cnvName, &status); 2687 if(U_FAILURE(status)) { 2688 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status)); 2689 return; 2690 } 2691 2692 #if 0 2693 { 2694 UChar targetBuf[256]; 2695 UChar *target = targetBuf; 2696 UChar *targetLim = target+256; 2697 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status); 2698 2699 log_info("After convert: target@%d, source@%d, status%s\n", 2700 target-targetBuf, source-sourceData, u_errorName(status)); 2701 2702 if(U_FAILURE(status)) { 2703 log_err("Failed to convert: %s\n", u_errorName(status)); 2704 } else { 2705 2706 } 2707 } 2708 #endif 2709 2710 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2711 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status)); 2712 2713 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2714 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status)); 2715 2716 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2717 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status)); 2718 2719 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { 2720 log_verbose("OK\n"); 2721 } else { 2722 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n"); 2723 } 2724 2725 ucnv_close(cnv); 2726 2727 } 2728 #endif 2729 2730 #ifdef U_ENABLE_GENERIC_ISO_2022 2731 2732 static void 2733 TestISO_2022() { 2734 /* test input */ 2735 static const uint8_t in[]={ 2736 0x1b, 0x25, 0x42, 2737 0x31, 2738 0x32, 2739 0x61, 2740 0xc2, 0x80, 2741 0xe0, 0xa0, 0x80, 2742 0xf0, 0x90, 0x80, 0x80 2743 }; 2744 2745 2746 2747 /* expected test results */ 2748 static const int32_t results[]={ 2749 /* number of bytes read, code point */ 2750 4, 0x0031, /* 4 bytes including the escape sequence */ 2751 1, 0x0032, 2752 1, 0x61, 2753 2, 0x80, 2754 3, 0x800, 2755 4, 0x10000 2756 }; 2757 2758 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2759 UErrorCode errorCode=U_ZERO_ERROR; 2760 UConverter *cnv; 2761 2762 cnv=ucnv_open("ISO_2022", &errorCode); 2763 if(U_FAILURE(errorCode)) { 2764 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 2765 return; 2766 } 2767 TestNextUChar(cnv, source, limit, results, "ISO_2022"); 2768 2769 /* Test the condition when source >= sourceLimit */ 2770 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source"); 2771 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2772 /*Test for the condition where we have a truncated char*/ 2773 { 2774 static const uint8_t source1[]={0xc4}; 2775 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2776 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2777 } 2778 /*Test for the condition where there is an invalid character*/ 2779 { 2780 static const uint8_t source2[]={0xa1, 0x01}; 2781 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); 2782 } 2783 ucnv_close(cnv); 2784 } 2785 2786 #endif 2787 2788 static void 2789 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2790 const UChar* uSource; 2791 const UChar* uSourceLimit; 2792 const char* cSource; 2793 const char* cSourceLimit; 2794 UChar *uTargetLimit =NULL; 2795 UChar *uTarget; 2796 char *cTarget; 2797 const char *cTargetLimit; 2798 char *cBuf; 2799 UChar *uBuf; /*,*test;*/ 2800 int32_t uBufSize = 120; 2801 int len=0; 2802 int i=2; 2803 UErrorCode errorCode=U_ZERO_ERROR; 2804 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2805 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2806 ucnv_reset(cnv); 2807 for(;--i>0; ){ 2808 uSource = (UChar*) source; 2809 uSourceLimit=(const UChar*)sourceLimit; 2810 cTarget = cBuf; 2811 uTarget = uBuf; 2812 cSource = cBuf; 2813 cTargetLimit = cBuf; 2814 uTargetLimit = uBuf; 2815 2816 do{ 2817 2818 cTargetLimit = cTargetLimit+ i; 2819 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2820 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2821 errorCode=U_ZERO_ERROR; 2822 continue; 2823 } 2824 2825 if(U_FAILURE(errorCode)){ 2826 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2827 return; 2828 } 2829 2830 }while (uSource<uSourceLimit); 2831 2832 cSourceLimit =cTarget; 2833 do{ 2834 uTargetLimit=uTargetLimit+i; 2835 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2836 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2837 errorCode=U_ZERO_ERROR; 2838 continue; 2839 } 2840 if(U_FAILURE(errorCode)){ 2841 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2842 return; 2843 } 2844 }while(cSource<cSourceLimit); 2845 2846 uSource = source; 2847 /*test =uBuf;*/ 2848 for(len=0;len<(int)(source - sourceLimit);len++){ 2849 if(uBuf[len]!=uSource[len]){ 2850 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2851 } 2852 } 2853 } 2854 free(uBuf); 2855 free(cBuf); 2856 } 2857 /* Test for Jitterbug 778 */ 2858 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2859 const UChar* uSource; 2860 const UChar* uSourceLimit; 2861 const char* cSource; 2862 UChar *uTargetLimit =NULL; 2863 UChar *uTarget; 2864 char *cTarget; 2865 const char *cTargetLimit; 2866 char *cBuf; 2867 UChar *uBuf,*test; 2868 int32_t uBufSize = 120; 2869 int numCharsInTarget=0; 2870 UErrorCode errorCode=U_ZERO_ERROR; 2871 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2872 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 2873 uSource = source; 2874 uSourceLimit=sourceLimit; 2875 cTarget = cBuf; 2876 cTargetLimit = cBuf +uBufSize*5; 2877 uTarget = uBuf; 2878 uTargetLimit = uBuf+ uBufSize*5; 2879 ucnv_reset(cnv); 2880 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); 2881 if(U_FAILURE(errorCode)){ 2882 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2883 return; 2884 } 2885 cSource = cBuf; 2886 test =uBuf; 2887 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode); 2888 if(U_FAILURE(errorCode)){ 2889 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode)); 2890 return; 2891 } 2892 uSource = source; 2893 while(uSource<uSourceLimit){ 2894 if(*test!=*uSource){ 2895 2896 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 2897 } 2898 uSource++; 2899 test++; 2900 } 2901 free(uBuf); 2902 free(cBuf); 2903 } 2904 2905 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2906 const UChar* uSource; 2907 const UChar* uSourceLimit; 2908 const char* cSource; 2909 const char* cSourceLimit; 2910 UChar *uTargetLimit =NULL; 2911 UChar *uTarget; 2912 char *cTarget; 2913 const char *cTargetLimit; 2914 char *cBuf; 2915 UChar *uBuf; /*,*test;*/ 2916 int32_t uBufSize = 120; 2917 int len=0; 2918 int i=2; 2919 const UChar *temp = sourceLimit; 2920 UErrorCode errorCode=U_ZERO_ERROR; 2921 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2922 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2923 2924 ucnv_reset(cnv); 2925 for(;--i>0;){ 2926 uSource = (UChar*) source; 2927 cTarget = cBuf; 2928 uTarget = uBuf; 2929 cSource = cBuf; 2930 cTargetLimit = cBuf; 2931 uTargetLimit = uBuf+uBufSize*5; 2932 cTargetLimit = cTargetLimit+uBufSize*10; 2933 uSourceLimit=uSource; 2934 do{ 2935 2936 if (uSourceLimit < sourceLimit) { 2937 uSourceLimit = uSourceLimit+1; 2938 } 2939 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2940 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2941 errorCode=U_ZERO_ERROR; 2942 continue; 2943 } 2944 2945 if(U_FAILURE(errorCode)){ 2946 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2947 return; 2948 } 2949 2950 }while (uSource<temp); 2951 2952 cSourceLimit =cBuf; 2953 do{ 2954 if (cSourceLimit < cBuf + (cTarget - cBuf)) { 2955 cSourceLimit = cSourceLimit+1; 2956 } 2957 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2958 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2959 errorCode=U_ZERO_ERROR; 2960 continue; 2961 } 2962 if(U_FAILURE(errorCode)){ 2963 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2964 return; 2965 } 2966 }while(cSource<cTarget); 2967 2968 uSource = source; 2969 /*test =uBuf;*/ 2970 for(;len<(int)(source - sourceLimit);len++){ 2971 if(uBuf[len]!=uSource[len]){ 2972 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2973 } 2974 } 2975 } 2976 free(uBuf); 2977 free(cBuf); 2978 } 2979 static void 2980 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, 2981 const uint16_t results[], const char* message){ 2982 /* const char* s0; */ 2983 const char* s=(char*)source; 2984 const uint16_t *r=results; 2985 UErrorCode errorCode=U_ZERO_ERROR; 2986 uint32_t c,exC; 2987 ucnv_reset(cnv); 2988 while(s<limit) { 2989 /* s0=s; */ 2990 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 2991 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 2992 break; /* no more significant input */ 2993 } else if(U_FAILURE(errorCode)) { 2994 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 2995 break; 2996 } else { 2997 if(UTF_IS_FIRST_SURROGATE(*r)){ 2998 int i =0, len = 2; 2999 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE); 3000 r++; 3001 }else{ 3002 exC = *r; 3003 } 3004 if(c!=(uint32_t)(exC)) 3005 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c); 3006 } 3007 r++; 3008 } 3009 } 3010 3011 static int TestJitterbug930(const char* enc){ 3012 UErrorCode err = U_ZERO_ERROR; 3013 UConverter*converter; 3014 char out[80]; 3015 char*target = out; 3016 UChar in[4]; 3017 const UChar*source = in; 3018 int32_t off[80]; 3019 int32_t* offsets = off; 3020 int numOffWritten=0; 3021 UBool flush = 0; 3022 converter = my_ucnv_open(enc, &err); 3023 3024 in[0] = 0x41; /* 0x4E00;*/ 3025 in[1] = 0x4E01; 3026 in[2] = 0x4E02; 3027 in[3] = 0x4E03; 3028 3029 memset(off, '*', sizeof(off)); 3030 3031 ucnv_fromUnicode (converter, 3032 &target, 3033 target+2, 3034 &source, 3035 source+3, 3036 offsets, 3037 flush, 3038 &err); 3039 3040 /* writes three bytes into the output buffer: 41 1B 24 3041 * but offsets contains 0 1 1 3042 */ 3043 while(*offsets< off[10]){ 3044 numOffWritten++; 3045 offsets++; 3046 } 3047 log_verbose("Testing Jitterbug 930 for encoding %s",enc); 3048 if(numOffWritten!= (int)(target-out)){ 3049 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten); 3050 } 3051 3052 err = U_ZERO_ERROR; 3053 3054 memset(off,'*' , sizeof(off)); 3055 3056 flush = 1; 3057 offsets=off; 3058 ucnv_fromUnicode (converter, 3059 &target, 3060 target+4, 3061 &source, 3062 source, 3063 offsets, 3064 flush, 3065 &err); 3066 numOffWritten=0; 3067 while(*offsets< off[10]){ 3068 numOffWritten++; 3069 if(*offsets!= -1){ 3070 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ; 3071 } 3072 offsets++; 3073 } 3074 3075 /* writes 42 43 7A into output buffer, 3076 * offsets contains -1 -1 -1 3077 */ 3078 ucnv_close(converter); 3079 return 0; 3080 } 3081 3082 static void 3083 TestHZ() { 3084 /* test input */ 3085 static const uint16_t in[]={ 3086 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 3087 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 3088 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 3089 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 3090 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 3091 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 3092 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 3093 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 3094 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 3095 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 3096 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 3097 0x005A, 0x005B, 0x005C, 0x000A 3098 }; 3099 const UChar* uSource; 3100 const UChar* uSourceLimit; 3101 const char* cSource; 3102 const char* cSourceLimit; 3103 UChar *uTargetLimit =NULL; 3104 UChar *uTarget; 3105 char *cTarget; 3106 const char *cTargetLimit; 3107 char *cBuf; 3108 UChar *uBuf,*test; 3109 int32_t uBufSize = 120; 3110 UErrorCode errorCode=U_ZERO_ERROR; 3111 UConverter *cnv; 3112 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3113 int32_t* myOff= offsets; 3114 cnv=ucnv_open("HZ", &errorCode); 3115 if(U_FAILURE(errorCode)) { 3116 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)); 3117 return; 3118 } 3119 3120 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3121 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3122 uSource = (const UChar*)in; 3123 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3124 cTarget = cBuf; 3125 cTargetLimit = cBuf +uBufSize*5; 3126 uTarget = uBuf; 3127 uTargetLimit = uBuf+ uBufSize*5; 3128 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3129 if(U_FAILURE(errorCode)){ 3130 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3131 return; 3132 } 3133 cSource = cBuf; 3134 cSourceLimit =cTarget; 3135 test =uBuf; 3136 myOff=offsets; 3137 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3138 if(U_FAILURE(errorCode)){ 3139 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3140 return; 3141 } 3142 uSource = (const UChar*)in; 3143 while(uSource<uSourceLimit){ 3144 if(*test!=*uSource){ 3145 3146 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3147 } 3148 uSource++; 3149 test++; 3150 } 3151 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); 3152 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3153 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3154 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3155 TestJitterbug930("csISO2022JP"); 3156 ucnv_close(cnv); 3157 free(offsets); 3158 free(uBuf); 3159 free(cBuf); 3160 } 3161 3162 static void 3163 TestISCII(){ 3164 /* test input */ 3165 static const uint16_t in[]={ 3166 /* test full range of Devanagari */ 3167 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, 3168 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, 3169 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, 3170 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, 3171 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, 3172 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, 3173 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, 3174 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, 3175 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, 3176 0x096D,0x096E,0x096F, 3177 /* test Soft halant*/ 3178 0x0915,0x094d, 0x200D, 3179 /* test explicit halant */ 3180 0x0915,0x094d, 0x200c, 3181 /* test double danda */ 3182 0x965, 3183 /* test ASCII */ 3184 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3185 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3186 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3187 /* tests from Lotus */ 3188 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, 3189 0x0930,0x094D,0x200D, 3190 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, 3191 0x0915,0x0921,0x002B,0x095F, 3192 /* tamil range */ 3193 0x0B86, 0xB87, 0xB88, 3194 /* telugu range */ 3195 0x0C05, 0x0C02, 0x0C03,0x0c31, 3196 /* kannada range */ 3197 0x0C85, 0xC82, 0x0C83, 3198 /* test Abbr sign and Anudatta */ 3199 0x0970, 0x952, 3200 /* 0x0958, 3201 0x0959, 3202 0x095A, 3203 0x095B, 3204 0x095C, 3205 0x095D, 3206 0x095E, 3207 0x095F,*/ 3208 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, 3209 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, 3210 0x090C , 3211 0x0962, 3212 0x0961 /* Vocallic LL 0xa6, 0xE9 */, 3213 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, 3214 0x0950 /* OM Symbol 0xa1, 0xE9,*/, 3215 0x093D /* Avagraha 0xEA, 0xE9*/, 3216 0x0958, 3217 0x0959, 3218 0x095A, 3219 0x095B, 3220 0x095C, 3221 0x095D, 3222 0x095E, 3223 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 3224 }; 3225 static const unsigned char byteArr[]={ 3226 3227 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, 3228 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, 3229 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, 3230 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, 3231 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, 3232 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 3233 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, 3234 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, 3235 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, 3236 0xf8,0xf9,0xfa, 3237 /* test soft halant */ 3238 0xb3, 0xE8, 0xE9, 3239 /* test explicit halant */ 3240 0xb3, 0xE8, 0xE8, 3241 /* test double danda */ 3242 0xea, 0xea, 3243 /* test ASCII */ 3244 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3245 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3246 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3247 /* test ATR code */ 3248 3249 /* tests from Lotus */ 3250 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, 3251 0xEF,0x42,0xCF,0xE8,0xD9, 3252 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, 3253 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, 3254 /* tamil range */ 3255 0xEF, 0x44, 0xa5, 0xa6, 0xa7, 3256 /* telugu range */ 3257 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, 3258 /* kannada range */ 3259 0xEF, 0x48,0xa4, 0xa2, 0xa3, 3260 /* anudatta and abbreviation sign */ 3261 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, 3262 3263 3264 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ 3265 3266 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ 3267 3268 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ 3269 3270 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ 3271 3272 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ 3273 3274 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ 3275 3276 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ 3277 3278 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ 3279 3280 0xB3, 0xE9, /* Ka + NUKTA */ 3281 3282 0xB4, 0xE9, /* Kha + NUKTA */ 3283 3284 0xB5, 0xE9, /* Ga + NUKTA */ 3285 3286 0xBA, 0xE9, 3287 3288 0xBF, 0xE9, 3289 3290 0xC0, 0xE9, 3291 3292 0xC9, 0xE9, 3293 /* INV halant RA */ 3294 0xD9, 0xE8, 0xCF, 3295 0x00, 0x00A0, 3296 /* just consume unhandled codepoints */ 3297 0xEF, 0x30, 3298 3299 }; 3300 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE); 3301 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr)); 3302 3303 } 3304 3305 static void 3306 TestISO_2022_JP() { 3307 /* test input */ 3308 static const uint16_t in[]={ 3309 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, 3310 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3311 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3312 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3313 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3314 0x201D, 0x3014, 0x000D, 0x000A, 3315 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3316 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3317 }; 3318 const UChar* uSource; 3319 const UChar* uSourceLimit; 3320 const char* cSource; 3321 const char* cSourceLimit; 3322 UChar *uTargetLimit =NULL; 3323 UChar *uTarget; 3324 char *cTarget; 3325 const char *cTargetLimit; 3326 char *cBuf; 3327 UChar *uBuf,*test; 3328 int32_t uBufSize = 120; 3329 UErrorCode errorCode=U_ZERO_ERROR; 3330 UConverter *cnv; 3331 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3332 int32_t* myOff= offsets; 3333 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3334 if(U_FAILURE(errorCode)) { 3335 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode)); 3336 return; 3337 } 3338 3339 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3340 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3341 uSource = (const UChar*)in; 3342 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3343 cTarget = cBuf; 3344 cTargetLimit = cBuf +uBufSize*5; 3345 uTarget = uBuf; 3346 uTargetLimit = uBuf+ uBufSize*5; 3347 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3348 if(U_FAILURE(errorCode)){ 3349 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3350 return; 3351 } 3352 cSource = cBuf; 3353 cSourceLimit =cTarget; 3354 test =uBuf; 3355 myOff=offsets; 3356 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3357 if(U_FAILURE(errorCode)){ 3358 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3359 return; 3360 } 3361 3362 uSource = (const UChar*)in; 3363 while(uSource<uSourceLimit){ 3364 if(*test!=*uSource){ 3365 3366 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3367 } 3368 uSource++; 3369 test++; 3370 } 3371 3372 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3373 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3374 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); 3375 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3376 TestJitterbug930("csISO2022JP"); 3377 ucnv_close(cnv); 3378 free(uBuf); 3379 free(cBuf); 3380 free(offsets); 3381 } 3382 3383 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){ 3384 const UChar* uSource; 3385 const UChar* uSourceLimit; 3386 const char* cSource; 3387 const char* cSourceLimit; 3388 UChar *uTargetLimit =NULL; 3389 UChar *uTarget; 3390 char *cTarget; 3391 const char *cTargetLimit; 3392 char *cBuf; 3393 UChar *uBuf,*test; 3394 int32_t uBufSize = 120*10; 3395 UErrorCode errorCode=U_ZERO_ERROR; 3396 UConverter *cnv; 3397 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); 3398 int32_t* myOff= offsets; 3399 cnv=my_ucnv_open(conv, &errorCode); 3400 if(U_FAILURE(errorCode)) { 3401 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode)); 3402 return; 3403 } 3404 3405 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); 3406 cBuf =(char*)malloc(uBufSize * sizeof(char)); 3407 uSource = (const UChar*)in; 3408 uSourceLimit=uSource+len; 3409 cTarget = cBuf; 3410 cTargetLimit = cBuf +uBufSize; 3411 uTarget = uBuf; 3412 uTargetLimit = uBuf+ uBufSize; 3413 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3414 if(U_FAILURE(errorCode)){ 3415 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3416 return; 3417 } 3418 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/ 3419 cSource = cBuf; 3420 cSourceLimit =cTarget; 3421 test =uBuf; 3422 myOff=offsets; 3423 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3424 if(U_FAILURE(errorCode)){ 3425 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode)); 3426 return; 3427 } 3428 3429 uSource = (const UChar*)in; 3430 while(uSource<uSourceLimit){ 3431 if(*test!=*uSource){ 3432 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ; 3433 } 3434 uSource++; 3435 test++; 3436 } 3437 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); 3438 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); 3439 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); 3440 if(byteArr && byteArrLen!=0){ 3441 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); 3442 TestToAndFromUChars(in,(const UChar*)&in[len],cnv); 3443 { 3444 cSource = byteArr; 3445 cSourceLimit = cSource+byteArrLen; 3446 test=uBuf; 3447 myOff = offsets; 3448 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3449 if(U_FAILURE(errorCode)){ 3450 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3451 return; 3452 } 3453 3454 uSource = (const UChar*)in; 3455 while(uSource<uSourceLimit){ 3456 if(*test!=*uSource){ 3457 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3458 } 3459 uSource++; 3460 test++; 3461 } 3462 } 3463 } 3464 3465 ucnv_close(cnv); 3466 free(uBuf); 3467 free(cBuf); 3468 free(offsets); 3469 } 3470 static UChar U_CALLCONV 3471 _charAt(int32_t offset, void *context) { 3472 return ((char*)context)[offset]; 3473 } 3474 3475 static int32_t 3476 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){ 3477 int32_t srcIndex=0; 3478 int32_t dstIndex=0; 3479 if(U_FAILURE(*status)){ 3480 return 0; 3481 } 3482 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ 3483 *status = U_ILLEGAL_ARGUMENT_ERROR; 3484 return 0; 3485 } 3486 if(srcLen==-1){ 3487 srcLen = (int32_t)uprv_strlen(src); 3488 } 3489 3490 for (; srcIndex<srcLen; ) { 3491 UChar32 c = src[srcIndex++]; 3492 if (c == 0x005C /*'\\'*/) { 3493 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/ 3494 if (c == (UChar32)0xFFFFFFFF) { 3495 *status=U_INVALID_CHAR_FOUND; /* return empty string */ 3496 break; /* invalid escape sequence */ 3497 } 3498 } 3499 if(dstIndex < dstLen){ 3500 if(c>0xFFFF){ 3501 dst[dstIndex++] = UTF16_LEAD(c); 3502 if(dstIndex<dstLen){ 3503 dst[dstIndex]=UTF16_TRAIL(c); 3504 }else{ 3505 *status=U_BUFFER_OVERFLOW_ERROR; 3506 } 3507 }else{ 3508 dst[dstIndex]=(UChar)c; 3509 } 3510 3511 }else{ 3512 *status = U_BUFFER_OVERFLOW_ERROR; 3513 } 3514 dstIndex++; /* for preflighting */ 3515 } 3516 return dstIndex; 3517 } 3518 3519 static void 3520 TestFullRoundtrip(const char* cp){ 3521 UChar usource[10] ={0}; 3522 UChar nsrc[10] = {0}; 3523 uint32_t i=1; 3524 int len=0, ulen; 3525 nsrc[0]=0x0061; 3526 /* Test codepoint 0 */ 3527 TestConv(usource,1,cp,"",NULL,0); 3528 TestConv(usource,2,cp,"",NULL,0); 3529 nsrc[2]=0x5555; 3530 TestConv(nsrc,3,cp,"",NULL,0); 3531 3532 for(;i<=0x10FFFF;i++){ 3533 if(i==0xD800){ 3534 i=0xDFFF; 3535 continue; 3536 } 3537 if(i<=0xFFFF){ 3538 usource[0] =(UChar) i; 3539 len=1; 3540 }else{ 3541 usource[0]=UTF16_LEAD(i); 3542 usource[1]=UTF16_TRAIL(i); 3543 len=2; 3544 } 3545 ulen=len; 3546 if(i==0x80) { 3547 usource[2]=0; 3548 } 3549 /* Test only single code points */ 3550 TestConv(usource,ulen,cp,"",NULL,0); 3551 /* Test codepoint repeated twice */ 3552 usource[ulen]=usource[0]; 3553 usource[ulen+1]=usource[1]; 3554 ulen+=len; 3555 TestConv(usource,ulen,cp,"",NULL,0); 3556 /* Test codepoint repeated 3 times */ 3557 usource[ulen]=usource[0]; 3558 usource[ulen+1]=usource[1]; 3559 ulen+=len; 3560 TestConv(usource,ulen,cp,"",NULL,0); 3561 /* Test codepoint in between 2 codepoints */ 3562 nsrc[1]=usource[0]; 3563 nsrc[2]=usource[1]; 3564 nsrc[len+1]=0x5555; 3565 TestConv(nsrc,len+2,cp,"",NULL,0); 3566 uprv_memset(usource,0,sizeof(UChar)*10); 3567 } 3568 } 3569 3570 static void 3571 TestRoundTrippingAllUTF(void){ 3572 if(!getTestOption(QUICK_OPTION)){ 3573 log_verbose("Running exhaustive round trip test for BOCU-1\n"); 3574 TestFullRoundtrip("BOCU-1"); 3575 log_verbose("Running exhaustive round trip test for SCSU\n"); 3576 TestFullRoundtrip("SCSU"); 3577 log_verbose("Running exhaustive round trip test for UTF-8\n"); 3578 TestFullRoundtrip("UTF-8"); 3579 log_verbose("Running exhaustive round trip test for CESU-8\n"); 3580 TestFullRoundtrip("CESU-8"); 3581 log_verbose("Running exhaustive round trip test for UTF-16BE\n"); 3582 TestFullRoundtrip("UTF-16BE"); 3583 log_verbose("Running exhaustive round trip test for UTF-16LE\n"); 3584 TestFullRoundtrip("UTF-16LE"); 3585 log_verbose("Running exhaustive round trip test for UTF-16\n"); 3586 TestFullRoundtrip("UTF-16"); 3587 log_verbose("Running exhaustive round trip test for UTF-32BE\n"); 3588 TestFullRoundtrip("UTF-32BE"); 3589 log_verbose("Running exhaustive round trip test for UTF-32LE\n"); 3590 TestFullRoundtrip("UTF-32LE"); 3591 log_verbose("Running exhaustive round trip test for UTF-32\n"); 3592 TestFullRoundtrip("UTF-32"); 3593 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3594 TestFullRoundtrip("UTF-7"); 3595 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3596 TestFullRoundtrip("UTF-7,version=1"); 3597 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"); 3598 TestFullRoundtrip("IMAP-mailbox-name"); 3599 log_verbose("Running exhaustive round trip test for GB18030\n"); 3600 TestFullRoundtrip("GB18030"); 3601 } 3602 } 3603 3604 static void 3605 TestSCSU() { 3606 3607 static const uint16_t germanUTF16[]={ 3608 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 3609 }; 3610 3611 static const uint8_t germanSCSU[]={ 3612 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 3613 }; 3614 3615 static const uint16_t russianUTF16[]={ 3616 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 3617 }; 3618 3619 static const uint8_t russianSCSU[]={ 3620 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 3621 }; 3622 3623 static const uint16_t japaneseUTF16[]={ 3624 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 3625 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 3626 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 3627 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 3628 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 3629 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 3630 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 3631 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 3632 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 3633 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 3634 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 3635 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 3636 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 3637 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 3638 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 3639 }; 3640 3641 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 3642 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */ 3643 static const uint8_t japaneseSCSU[]={ 3644 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 3645 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 3646 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 3647 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 3648 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 3649 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 3650 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 3651 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 3652 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 3653 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 3654 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 3655 0xcb, 0x82 3656 }; 3657 3658 static const uint16_t allFeaturesUTF16[]={ 3659 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 3660 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 3661 0x01df, 0xf000, 0xdbff, 0xdfff 3662 }; 3663 3664 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter 3665 * result here (34B vs. 35B) 3666 */ 3667 static const uint8_t allFeaturesSCSU[]={ 3668 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, 3669 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, 3670 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, 3671 0xdf, 0x14, 0x80, 0x15, 0xff 3672 }; 3673 static const uint16_t monkeyIn[]={ 3674 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3675 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3676 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3677 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3678 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3679 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3680 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3681 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3682 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3683 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3684 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3685 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3686 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3687 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3688 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3689 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3690 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3691 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3692 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 3693 /* test non-BMP code points */ 3694 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 3695 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 3696 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 3697 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 3698 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 3699 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 3700 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 3701 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 3702 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 3703 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 3704 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 3705 3706 3707 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 3708 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 3709 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 3710 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 3711 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 3712 }; 3713 static const char *fTestCases [] = { 3714 "\\ud800\\udc00", /* smallest surrogate*/ 3715 "\\ud8ff\\udcff", 3716 "\\udBff\\udFff", /* largest surrogate pair*/ 3717 "\\ud834\\udc00", 3718 "\\U0010FFFF", 3719 "Hello \\u9292 \\u9192 World!", 3720 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", 3721 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3722 3723 "\\u0648\\u06c8", /* catch missing reset*/ 3724 "\\u0648\\u06c8", 3725 3726 "\\u4444\\uE001", /* lowest quotable*/ 3727 "\\u4444\\uf2FF", /* highest quotable*/ 3728 "\\u4444\\uf188\\u4444", 3729 "\\u4444\\uf188\\uf288", 3730 "\\u4444\\uf188abc\\u0429\\uf288", 3731 "\\u9292\\u2222", 3732 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", 3733 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3734 "Hello World!123456", 3735 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ 3736 3737 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ 3738 "abc\\u4411d", /* uses SQU*/ 3739 "abc\\u4411\\u4412d",/* uses SCU*/ 3740 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ 3741 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/ 3742 "\\u9292\\u2222", 3743 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", 3744 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c", 3745 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", 3746 3747 "", /* empty input*/ 3748 "\\u0000", /* smallest BMP character*/ 3749 "\\uFFFF", /* largest BMP character*/ 3750 3751 /* regression tests*/ 3752 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", 3753 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff", 3754 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", 3755 "\\u0041\\u00df\\u0401\\u015f", 3756 "\\u9066\\u2123abc", 3757 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5", 3758 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489", 3759 }; 3760 int i=0; 3761 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){ 3762 const char* cSrc = fTestCases[i]; 3763 UErrorCode status = U_ZERO_ERROR; 3764 int32_t cSrcLen,srcLen; 3765 UChar* src; 3766 /* UConverter* cnv = ucnv_open("SCSU",&status); */ 3767 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); 3768 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); 3769 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); 3770 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); 3771 TestConv(src,srcLen,"SCSU","Coverage",NULL,0); 3772 free(src); 3773 } 3774 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3775 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3776 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3777 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3778 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU)); 3779 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU)); 3780 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); 3781 } 3782 3783 #if !UCONFIG_NO_LEGACY_CONVERSION 3784 static void TestJitterbug2346(){ 3785 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, 3786 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; 3787 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; 3788 3789 UChar uTarget[500]={'\0'}; 3790 UChar* utarget=uTarget; 3791 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 3792 3793 char cTarget[500]={'\0'}; 3794 char* ctarget=cTarget; 3795 char* ctargetLimit=cTarget+sizeof(cTarget); 3796 const char* csource=source; 3797 UChar* temp = expected; 3798 UErrorCode err=U_ZERO_ERROR; 3799 3800 UConverter* conv =ucnv_open("ISO_2022_JP",&err); 3801 if(U_FAILURE(err)) { 3802 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 3803 return; 3804 } 3805 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err); 3806 if(U_FAILURE(err)) { 3807 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err)); 3808 return; 3809 } 3810 utargetLimit=utarget; 3811 utarget = uTarget; 3812 while(utarget<utargetLimit){ 3813 if(*temp!=*utarget){ 3814 3815 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ; 3816 } 3817 utarget++; 3818 temp++; 3819 } 3820 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 3821 if(U_FAILURE(err)) { 3822 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err)); 3823 return; 3824 } 3825 ctargetLimit=ctarget; 3826 ctarget =cTarget; 3827 ucnv_close(conv); 3828 3829 3830 } 3831 3832 static void 3833 TestISO_2022_JP_1() { 3834 /* test input */ 3835 static const uint16_t in[]={ 3836 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A, 3837 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3838 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A, 3839 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3840 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3841 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3842 0x201D, 0x000D, 0x000A, 3843 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3844 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A, 3845 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3846 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A, 3847 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A, 3848 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A 3849 }; 3850 const UChar* uSource; 3851 const UChar* uSourceLimit; 3852 const char* cSource; 3853 const char* cSourceLimit; 3854 UChar *uTargetLimit =NULL; 3855 UChar *uTarget; 3856 char *cTarget; 3857 const char *cTargetLimit; 3858 char *cBuf; 3859 UChar *uBuf,*test; 3860 int32_t uBufSize = 120; 3861 UErrorCode errorCode=U_ZERO_ERROR; 3862 UConverter *cnv; 3863 3864 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3865 if(U_FAILURE(errorCode)) { 3866 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3867 return; 3868 } 3869 3870 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3871 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3872 uSource = (const UChar*)in; 3873 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3874 cTarget = cBuf; 3875 cTargetLimit = cBuf +uBufSize*5; 3876 uTarget = uBuf; 3877 uTargetLimit = uBuf+ uBufSize*5; 3878 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode); 3879 if(U_FAILURE(errorCode)){ 3880 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3881 return; 3882 } 3883 cSource = cBuf; 3884 cSourceLimit =cTarget; 3885 test =uBuf; 3886 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode); 3887 if(U_FAILURE(errorCode)){ 3888 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3889 return; 3890 } 3891 uSource = (const UChar*)in; 3892 while(uSource<uSourceLimit){ 3893 if(*test!=*uSource){ 3894 3895 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3896 } 3897 uSource++; 3898 test++; 3899 } 3900 /*ucnv_close(cnv); 3901 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ 3902 /*Test for the condition where there is an invalid character*/ 3903 ucnv_reset(cnv); 3904 { 3905 static const uint8_t source2[]={0x0e,0x24,0x053}; 3906 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); 3907 } 3908 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3909 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3910 ucnv_close(cnv); 3911 free(uBuf); 3912 free(cBuf); 3913 } 3914 3915 static void 3916 TestISO_2022_JP_2() { 3917 /* test input */ 3918 static const uint16_t in[]={ 3919 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3920 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3921 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3922 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3923 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3924 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3925 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3926 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3927 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3928 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3929 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3930 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3931 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3932 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3933 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3934 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3935 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3936 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3937 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A 3938 }; 3939 const UChar* uSource; 3940 const UChar* uSourceLimit; 3941 const char* cSource; 3942 const char* cSourceLimit; 3943 UChar *uTargetLimit =NULL; 3944 UChar *uTarget; 3945 char *cTarget; 3946 const char *cTargetLimit; 3947 char *cBuf; 3948 UChar *uBuf,*test; 3949 int32_t uBufSize = 120; 3950 UErrorCode errorCode=U_ZERO_ERROR; 3951 UConverter *cnv; 3952 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3953 int32_t* myOff= offsets; 3954 cnv=ucnv_open("ISO_2022_JP_2", &errorCode); 3955 if(U_FAILURE(errorCode)) { 3956 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3957 return; 3958 } 3959 3960 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3961 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3962 uSource = (const UChar*)in; 3963 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3964 cTarget = cBuf; 3965 cTargetLimit = cBuf +uBufSize*5; 3966 uTarget = uBuf; 3967 uTargetLimit = uBuf+ uBufSize*5; 3968 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3969 if(U_FAILURE(errorCode)){ 3970 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3971 return; 3972 } 3973 cSource = cBuf; 3974 cSourceLimit =cTarget; 3975 test =uBuf; 3976 myOff=offsets; 3977 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3978 if(U_FAILURE(errorCode)){ 3979 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3980 return; 3981 } 3982 uSource = (const UChar*)in; 3983 while(uSource<uSourceLimit){ 3984 if(*test!=*uSource){ 3985 3986 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3987 } 3988 uSource++; 3989 test++; 3990 } 3991 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3992 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3993 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3994 /*Test for the condition where there is an invalid character*/ 3995 ucnv_reset(cnv); 3996 { 3997 static const uint8_t source2[]={0x0e,0x24,0x053}; 3998 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); 3999 } 4000 ucnv_close(cnv); 4001 free(uBuf); 4002 free(cBuf); 4003 free(offsets); 4004 } 4005 4006 static void 4007 TestISO_2022_KR() { 4008 /* test input */ 4009 static const uint16_t in[]={ 4010 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4011 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4012 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4013 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4014 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4015 ,0x53E3,0x53E4,0x000A,0x000D}; 4016 const UChar* uSource; 4017 const UChar* uSourceLimit; 4018 const char* cSource; 4019 const char* cSourceLimit; 4020 UChar *uTargetLimit =NULL; 4021 UChar *uTarget; 4022 char *cTarget; 4023 const char *cTargetLimit; 4024 char *cBuf; 4025 UChar *uBuf,*test; 4026 int32_t uBufSize = 120; 4027 UErrorCode errorCode=U_ZERO_ERROR; 4028 UConverter *cnv; 4029 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4030 int32_t* myOff= offsets; 4031 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); 4032 if(U_FAILURE(errorCode)) { 4033 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4034 return; 4035 } 4036 4037 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4038 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4039 uSource = (const UChar*)in; 4040 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4041 cTarget = cBuf; 4042 cTargetLimit = cBuf +uBufSize*5; 4043 uTarget = uBuf; 4044 uTargetLimit = uBuf+ uBufSize*5; 4045 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4046 if(U_FAILURE(errorCode)){ 4047 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4048 return; 4049 } 4050 cSource = cBuf; 4051 cSourceLimit =cTarget; 4052 test =uBuf; 4053 myOff=offsets; 4054 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4055 if(U_FAILURE(errorCode)){ 4056 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4057 return; 4058 } 4059 uSource = (const UChar*)in; 4060 while(uSource<uSourceLimit){ 4061 if(*test!=*uSource){ 4062 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4063 } 4064 uSource++; 4065 test++; 4066 } 4067 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4068 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4069 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4070 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4071 TestJitterbug930("csISO2022KR"); 4072 /*Test for the condition where there is an invalid character*/ 4073 ucnv_reset(cnv); 4074 { 4075 static const uint8_t source2[]={0x1b,0x24,0x053}; 4076 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4077 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4078 } 4079 ucnv_close(cnv); 4080 free(uBuf); 4081 free(cBuf); 4082 free(offsets); 4083 } 4084 4085 static void 4086 TestISO_2022_KR_1() { 4087 /* test input */ 4088 static const uint16_t in[]={ 4089 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4090 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4091 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4092 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4093 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4094 ,0x53E3,0x53E4,0x000A,0x000D}; 4095 const UChar* uSource; 4096 const UChar* uSourceLimit; 4097 const char* cSource; 4098 const char* cSourceLimit; 4099 UChar *uTargetLimit =NULL; 4100 UChar *uTarget; 4101 char *cTarget; 4102 const char *cTargetLimit; 4103 char *cBuf; 4104 UChar *uBuf,*test; 4105 int32_t uBufSize = 120; 4106 UErrorCode errorCode=U_ZERO_ERROR; 4107 UConverter *cnv; 4108 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4109 int32_t* myOff= offsets; 4110 cnv=ucnv_open("ibm-25546", &errorCode); 4111 if(U_FAILURE(errorCode)) { 4112 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4113 return; 4114 } 4115 4116 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4117 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4118 uSource = (const UChar*)in; 4119 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4120 cTarget = cBuf; 4121 cTargetLimit = cBuf +uBufSize*5; 4122 uTarget = uBuf; 4123 uTargetLimit = uBuf+ uBufSize*5; 4124 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4125 if(U_FAILURE(errorCode)){ 4126 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4127 return; 4128 } 4129 cSource = cBuf; 4130 cSourceLimit =cTarget; 4131 test =uBuf; 4132 myOff=offsets; 4133 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4134 if(U_FAILURE(errorCode)){ 4135 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4136 return; 4137 } 4138 uSource = (const UChar*)in; 4139 while(uSource<uSourceLimit){ 4140 if(*test!=*uSource){ 4141 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4142 } 4143 uSource++; 4144 test++; 4145 } 4146 ucnv_reset(cnv); 4147 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4148 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4149 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4150 ucnv_reset(cnv); 4151 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4152 /*Test for the condition where there is an invalid character*/ 4153 ucnv_reset(cnv); 4154 { 4155 static const uint8_t source2[]={0x1b,0x24,0x053}; 4156 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4157 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4158 } 4159 ucnv_close(cnv); 4160 free(uBuf); 4161 free(cBuf); 4162 free(offsets); 4163 } 4164 4165 static void TestJitterbug2411(){ 4166 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" 4167 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; 4168 UConverter* kr=NULL, *kr1=NULL; 4169 UErrorCode errorCode = U_ZERO_ERROR; 4170 UChar tgt[100]={'\0'}; 4171 UChar* target = tgt; 4172 UChar* targetLimit = target+100; 4173 kr=ucnv_open("iso-2022-kr", &errorCode); 4174 if(U_FAILURE(errorCode)) { 4175 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode)); 4176 return; 4177 } 4178 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4179 if(U_FAILURE(errorCode)) { 4180 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4181 return; 4182 } 4183 kr1 = ucnv_open("ibm-25546", &errorCode); 4184 if(U_FAILURE(errorCode)) { 4185 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode)); 4186 return; 4187 } 4188 target = tgt; 4189 targetLimit = target+100; 4190 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4191 4192 if(U_FAILURE(errorCode)) { 4193 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4194 return; 4195 } 4196 4197 ucnv_close(kr); 4198 ucnv_close(kr1); 4199 4200 } 4201 4202 static void 4203 TestJIS(){ 4204 /* From Unicode moved to testdata/conversion.txt */ 4205 /*To Unicode*/ 4206 { 4207 static const uint8_t sampleTextJIS[] = { 4208 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ 4209 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4210 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4211 }; 4212 static const uint16_t expectedISO2022JIS[] = { 4213 0x0041, 0x0042, 4214 0xFF81, 0xFF82, 4215 0x3000 4216 }; 4217 static const int32_t toISO2022JISOffs[]={ 4218 3,4, 4219 8,9, 4220 16 4221 }; 4222 4223 static const uint8_t sampleTextJIS7[] = { 4224 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ 4225 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4226 0x1b,0x24,0x42,0x21,0x21, 4227 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ 4228 0x21,0x22, 4229 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4230 }; 4231 static const uint16_t expectedISO2022JIS7[] = { 4232 0x0041, 0x0042, 4233 0xFF81, 0xFF82, 4234 0x3000, 4235 0xFF81, 0xFF82, 4236 0x3001, 4237 0x3000 4238 }; 4239 static const int32_t toISO2022JIS7Offs[]={ 4240 3,4, 4241 8,9, 4242 13,16, 4243 17, 4244 19,27 4245 }; 4246 static const uint8_t sampleTextJIS8[] = { 4247 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ 4248 0xa1,0xc8,0xd9,/*Katakana Set*/ 4249 0x1b,0x28,0x42, 4250 0x41,0x42, 4251 0xb1,0xc3, /*Katakana Set*/ 4252 0x1b,0x24,0x42,0x21,0x21 4253 }; 4254 static const uint16_t expectedISO2022JIS8[] = { 4255 0x0041, 0x0042, 4256 0xff61, 0xff88, 0xff99, 4257 0x0041, 0x0042, 4258 0xff71, 0xff83, 4259 0x3000 4260 }; 4261 static const int32_t toISO2022JIS8Offs[]={ 4262 3, 4, 5, 6, 4263 7, 11, 12, 13, 4264 14, 18, 4265 }; 4266 4267 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, 4268 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE); 4269 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, 4270 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE); 4271 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, 4272 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE); 4273 } 4274 4275 } 4276 4277 4278 #if 0 4279 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 4280 4281 static void TestJitterbug915(){ 4282 /* tests for roundtripping of the below sequence 4283 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / 4284 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / 4285 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / 4286 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / 4287 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / 4288 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / 4289 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / 4290 */ 4291 static const char cSource[]={ 4292 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 4293 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 4294 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 4295 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 4296 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 4297 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 4298 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, 4299 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 4300 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 4301 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 4302 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 4303 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 4304 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 4305 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 4306 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4307 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4308 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 4309 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 4310 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4311 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4312 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 4313 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 4314 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 4315 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 4316 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 4317 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 4318 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 4319 0x37, 0x20, 0x2A, 0x2F 4320 }; 4321 UChar uTarget[500]={'\0'}; 4322 UChar* utarget=uTarget; 4323 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 4324 4325 char cTarget[500]={'\0'}; 4326 char* ctarget=cTarget; 4327 char* ctargetLimit=cTarget+sizeof(cTarget); 4328 const char* csource=cSource; 4329 const char* tempSrc = cSource; 4330 UErrorCode err=U_ZERO_ERROR; 4331 4332 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); 4333 if(U_FAILURE(err)) { 4334 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 4335 return; 4336 } 4337 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err); 4338 if(U_FAILURE(err)) { 4339 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err)); 4340 return; 4341 } 4342 utargetLimit=utarget; 4343 utarget = uTarget; 4344 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 4345 if(U_FAILURE(err)) { 4346 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err)); 4347 return; 4348 } 4349 ctargetLimit=ctarget; 4350 ctarget =cTarget; 4351 while(ctarget<ctargetLimit){ 4352 if(*ctarget != *tempSrc){ 4353 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ; 4354 } 4355 ++ctarget; 4356 ++tempSrc; 4357 } 4358 4359 ucnv_close(conv); 4360 } 4361 4362 static void 4363 TestISO_2022_CN_EXT() { 4364 /* test input */ 4365 static const uint16_t in[]={ 4366 /* test Non-BMP code points */ 4367 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 4368 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 4369 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 4370 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 4371 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 4372 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 4373 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 4374 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 4375 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 4376 0xD869, 0xDED5, 4377 4378 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 4379 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 4380 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 4381 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4382 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4383 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4384 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4385 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4386 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4387 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4388 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4389 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4390 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4391 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A, 4392 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 4393 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A, 4394 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A, 4395 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A, 4396 4397 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A 4398 4399 }; 4400 4401 const UChar* uSource; 4402 const UChar* uSourceLimit; 4403 const char* cSource; 4404 const char* cSourceLimit; 4405 UChar *uTargetLimit =NULL; 4406 UChar *uTarget; 4407 char *cTarget; 4408 const char *cTargetLimit; 4409 char *cBuf; 4410 UChar *uBuf,*test; 4411 int32_t uBufSize = 180; 4412 UErrorCode errorCode=U_ZERO_ERROR; 4413 UConverter *cnv; 4414 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4415 int32_t* myOff= offsets; 4416 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); 4417 if(U_FAILURE(errorCode)) { 4418 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4419 return; 4420 } 4421 4422 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4423 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4424 uSource = (const UChar*)in; 4425 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4426 cTarget = cBuf; 4427 cTargetLimit = cBuf +uBufSize*5; 4428 uTarget = uBuf; 4429 uTargetLimit = uBuf+ uBufSize*5; 4430 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4431 if(U_FAILURE(errorCode)){ 4432 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4433 return; 4434 } 4435 cSource = cBuf; 4436 cSourceLimit =cTarget; 4437 test =uBuf; 4438 myOff=offsets; 4439 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4440 if(U_FAILURE(errorCode)){ 4441 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4442 return; 4443 } 4444 uSource = (const UChar*)in; 4445 while(uSource<uSourceLimit){ 4446 if(*test!=*uSource){ 4447 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4448 } 4449 else{ 4450 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4451 } 4452 uSource++; 4453 test++; 4454 } 4455 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4456 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4457 /*Test for the condition where there is an invalid character*/ 4458 ucnv_reset(cnv); 4459 { 4460 static const uint8_t source2[]={0x0e,0x24,0x053}; 4461 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); 4462 } 4463 ucnv_close(cnv); 4464 free(uBuf); 4465 free(cBuf); 4466 free(offsets); 4467 } 4468 #endif 4469 4470 static void 4471 TestISO_2022_CN() { 4472 /* test input */ 4473 static const uint16_t in[]={ 4474 /* jitterbug 951 */ 4475 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52, 4476 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 4477 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52, 4478 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45, 4479 0x0020, 0x0045, 0x004e, 0x0044, 4480 /**/ 4481 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A, 4482 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A, 4483 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A, 4484 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4485 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4486 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4487 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4488 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4489 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4490 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4491 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4492 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4493 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A, 4494 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A, 4495 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4496 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 4497 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A, 4498 4499 }; 4500 const UChar* uSource; 4501 const UChar* uSourceLimit; 4502 const char* cSource; 4503 const char* cSourceLimit; 4504 UChar *uTargetLimit =NULL; 4505 UChar *uTarget; 4506 char *cTarget; 4507 const char *cTargetLimit; 4508 char *cBuf; 4509 UChar *uBuf,*test; 4510 int32_t uBufSize = 180; 4511 UErrorCode errorCode=U_ZERO_ERROR; 4512 UConverter *cnv; 4513 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4514 int32_t* myOff= offsets; 4515 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); 4516 if(U_FAILURE(errorCode)) { 4517 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4518 return; 4519 } 4520 4521 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4522 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4523 uSource = (const UChar*)in; 4524 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4525 cTarget = cBuf; 4526 cTargetLimit = cBuf +uBufSize*5; 4527 uTarget = uBuf; 4528 uTargetLimit = uBuf+ uBufSize*5; 4529 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4530 if(U_FAILURE(errorCode)){ 4531 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4532 return; 4533 } 4534 cSource = cBuf; 4535 cSourceLimit =cTarget; 4536 test =uBuf; 4537 myOff=offsets; 4538 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4539 if(U_FAILURE(errorCode)){ 4540 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4541 return; 4542 } 4543 uSource = (const UChar*)in; 4544 while(uSource<uSourceLimit){ 4545 if(*test!=*uSource){ 4546 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4547 } 4548 else{ 4549 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4550 } 4551 uSource++; 4552 test++; 4553 } 4554 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); 4555 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4556 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4557 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4558 TestJitterbug930("csISO2022CN"); 4559 /*Test for the condition where there is an invalid character*/ 4560 ucnv_reset(cnv); 4561 { 4562 static const uint8_t source2[]={0x0e,0x24,0x053}; 4563 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); 4564 } 4565 4566 ucnv_close(cnv); 4567 free(uBuf); 4568 free(cBuf); 4569 free(offsets); 4570 } 4571 4572 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */ 4573 typedef struct { 4574 const char * converterName; 4575 const char * inputText; 4576 int inputTextLength; 4577 } EmptySegmentTest; 4578 4579 /* Callback for TestJitterbug6175, should only get called for empty segment errors */ 4580 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, 4581 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) { 4582 if (reason > UCNV_IRREGULAR) { 4583 return; 4584 } 4585 if (reason != UCNV_IRREGULAR) { 4586 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); 4587 } 4588 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ 4589 *err = U_ZERO_ERROR; 4590 ucnv_cbToUWriteSub(toArgs,0,err); 4591 } 4592 4593 enum { kEmptySegmentToUCharsMax = 64 }; 4594 static void TestJitterbug6175(void) { 4595 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; 4596 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; 4597 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; 4598 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; 4599 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; 4600 static const EmptySegmentTest emptySegmentTests[] = { 4601 /* converterName inputText inputTextLength */ 4602 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, 4603 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, 4604 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, 4605 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, 4606 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, 4607 /* terminator: */ 4608 { NULL, NULL, 0, } 4609 }; 4610 const EmptySegmentTest * testPtr; 4611 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { 4612 UErrorCode err = U_ZERO_ERROR; 4613 UConverter * cnv = ucnv_open(testPtr->converterName, &err); 4614 if (U_FAILURE(err)) { 4615 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4616 return; 4617 } 4618 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); 4619 if (U_FAILURE(err)) { 4620 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4621 ucnv_close(cnv); 4622 return; 4623 } 4624 { 4625 UChar toUChars[kEmptySegmentToUCharsMax]; 4626 UChar * toUCharsPtr = toUChars; 4627 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; 4628 const char * inCharsPtr = testPtr->inputText; 4629 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; 4630 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); 4631 } 4632 ucnv_close(cnv); 4633 } 4634 } 4635 4636 static void 4637 TestEBCDIC_STATEFUL() { 4638 /* test input */ 4639 static const uint8_t in[]={ 4640 0x61, 4641 0x1a, 4642 0x0f, 0x4b, 4643 0x42, 4644 0x40, 4645 0x36, 4646 }; 4647 4648 /* expected test results */ 4649 static const int32_t results[]={ 4650 /* number of bytes read, code point */ 4651 1, 0x002f, 4652 1, 0x0092, 4653 2, 0x002e, 4654 1, 0xff62, 4655 1, 0x0020, 4656 1, 0x0096, 4657 4658 }; 4659 static const uint8_t in2[]={ 4660 0x0f, 4661 0xa1, 4662 0x01 4663 }; 4664 4665 /* expected test results */ 4666 static const int32_t results2[]={ 4667 /* number of bytes read, code point */ 4668 2, 0x203E, 4669 1, 0x0001, 4670 }; 4671 4672 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 4673 UErrorCode errorCode=U_ZERO_ERROR; 4674 UConverter *cnv=ucnv_open("ibm-930", &errorCode); 4675 if(U_FAILURE(errorCode)) { 4676 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode)); 4677 return; 4678 } 4679 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); 4680 ucnv_reset(cnv); 4681 /* Test the condition when source >= sourceLimit */ 4682 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 4683 ucnv_reset(cnv); 4684 /*Test for the condition where source > sourcelimit after consuming the shift chracter */ 4685 { 4686 static const uint8_t source1[]={0x0f}; 4687 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); 4688 } 4689 /*Test for the condition where there is an invalid character*/ 4690 ucnv_reset(cnv); 4691 { 4692 static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; 4693 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); 4694 } 4695 ucnv_reset(cnv); 4696 source=(const char*)in2; 4697 limit=(const char*)in2+sizeof(in2); 4698 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); 4699 ucnv_close(cnv); 4700 4701 } 4702 4703 static void 4704 TestGB18030() { 4705 /* test input */ 4706 static const uint8_t in[]={ 4707 0x24, 4708 0x7f, 4709 0x81, 0x30, 0x81, 0x30, 4710 0xa8, 0xbf, 4711 0xa2, 0xe3, 4712 0xd2, 0xbb, 4713 0x82, 0x35, 0x8f, 0x33, 4714 0x84, 0x31, 0xa4, 0x39, 4715 0x90, 0x30, 0x81, 0x30, 4716 0xe3, 0x32, 0x9a, 0x35 4717 #if 0 4718 /* 4719 * Feature removed markus 2000-oct-26 4720 * Only some codepages must match surrogate pairs into supplementary code points - 4721 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c . 4722 * GB 18030 provides direct encodings for supplementary code points, therefore 4723 * it must not combine two single-encoded surrogates into one code point. 4724 */ 4725 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */ 4726 #endif 4727 }; 4728 4729 /* expected test results */ 4730 static const int32_t results[]={ 4731 /* number of bytes read, code point */ 4732 1, 0x24, 4733 1, 0x7f, 4734 4, 0x80, 4735 2, 0x1f9, 4736 2, 0x20ac, 4737 2, 0x4e00, 4738 4, 0x9fa6, 4739 4, 0xffff, 4740 4, 0x10000, 4741 4, 0x10ffff 4742 #if 0 4743 /* Feature removed. See comment above. */ 4744 8, 0x10000 4745 #endif 4746 }; 4747 4748 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ 4749 UErrorCode errorCode=U_ZERO_ERROR; 4750 UConverter *cnv=ucnv_open("gb18030", &errorCode); 4751 if(U_FAILURE(errorCode)) { 4752 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode)); 4753 return; 4754 } 4755 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030"); 4756 ucnv_close(cnv); 4757 } 4758 4759 static void 4760 TestLMBCS() { 4761 /* LMBCS-1 string */ 4762 static const uint8_t pszLMBCS[]={ 4763 0x61, 4764 0x01, 0x29, 4765 0x81, 4766 0xA0, 4767 0x0F, 0x27, 4768 0x0F, 0x91, 4769 0x14, 0x0a, 0x74, 4770 0x14, 0xF6, 0x02, 4771 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ 4772 0x10, 0x88, 0xA0, 4773 }; 4774 4775 /* Unicode UChar32 equivalents */ 4776 static const UChar32 pszUnicode32[]={ 4777 /* code point */ 4778 0x00000061, 4779 0x00002013, 4780 0x000000FC, 4781 0x000000E1, 4782 0x00000007, 4783 0x00000091, 4784 0x00000a74, 4785 0x00000200, 4786 0x00023456, /* code point for surrogate pair */ 4787 0x00005516 4788 }; 4789 4790 /* Unicode UChar equivalents */ 4791 static const UChar pszUnicode[]={ 4792 /* code point */ 4793 0x0061, 4794 0x2013, 4795 0x00FC, 4796 0x00E1, 4797 0x0007, 4798 0x0091, 4799 0x0a74, 4800 0x0200, 4801 0xD84D, /* low surrogate */ 4802 0xDC56, /* high surrogate */ 4803 0x5516 4804 }; 4805 4806 /* expected test results */ 4807 static const int offsets32[]={ 4808 /* number of bytes read, code point */ 4809 0, 4810 1, 4811 3, 4812 4, 4813 5, 4814 7, 4815 9, 4816 12, 4817 15, 4818 21, 4819 24 4820 }; 4821 4822 /* expected test results */ 4823 static const int offsets[]={ 4824 /* number of bytes read, code point */ 4825 0, 4826 1, 4827 3, 4828 4, 4829 5, 4830 7, 4831 9, 4832 12, 4833 15, 4834 18, 4835 21, 4836 24 4837 }; 4838 4839 4840 UConverter *cnv; 4841 4842 #define NAME_LMBCS_1 "LMBCS-1" 4843 #define NAME_LMBCS_2 "LMBCS-2" 4844 4845 4846 /* Some basic open/close/property tests on some LMBCS converters */ 4847 { 4848 4849 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ 4850 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ 4851 char get_subchars [1]; 4852 const char * get_name; 4853 UConverter *cnv1; 4854 UConverter *cnv2; 4855 4856 int8_t len = sizeof(get_subchars); 4857 4858 UErrorCode errorCode=U_ZERO_ERROR; 4859 4860 /* Open */ 4861 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); 4862 if(U_FAILURE(errorCode)) { 4863 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4864 return; 4865 } 4866 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); 4867 if(U_FAILURE(errorCode)) { 4868 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode)); 4869 return; 4870 } 4871 4872 /* Name */ 4873 get_name = ucnv_getName (cnv1, &errorCode); 4874 if (strcmp(NAME_LMBCS_1,get_name)){ 4875 log_err("Unexpected converter name: %s\n", get_name); 4876 } 4877 get_name = ucnv_getName (cnv2, &errorCode); 4878 if (strcmp(NAME_LMBCS_2,get_name)){ 4879 log_err("Unexpected converter name: %s\n", get_name); 4880 } 4881 4882 /* substitution chars */ 4883 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); 4884 if(U_FAILURE(errorCode)) { 4885 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4886 } 4887 if (len!=1){ 4888 log_err("Unexpected length of sub chars\n"); 4889 } 4890 if (get_subchars[0] != expected_subchars[0]){ 4891 log_err("Unexpected value of sub chars\n"); 4892 } 4893 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); 4894 if(U_FAILURE(errorCode)) { 4895 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); 4896 } 4897 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); 4898 if(U_FAILURE(errorCode)) { 4899 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4900 } 4901 if (len!=1){ 4902 log_err("Unexpected length of sub chars\n"); 4903 } 4904 if (get_subchars[0] != new_subchars[0]){ 4905 log_err("Unexpected value of sub chars\n"); 4906 } 4907 ucnv_close(cnv1); 4908 ucnv_close(cnv2); 4909 4910 } 4911 4912 /* LMBCS to Unicode - offsets */ 4913 { 4914 UErrorCode errorCode=U_ZERO_ERROR; 4915 4916 const char * pSource = (const char *)pszLMBCS; 4917 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 4918 4919 UChar Out [sizeof(pszUnicode) + 1]; 4920 UChar * pOut = Out; 4921 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 4922 4923 int32_t off [sizeof(offsets)]; 4924 4925 /* last 'offset' in expected results is just the final size. 4926 (Makes other tests easier). Compensate here: */ 4927 4928 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); 4929 4930 4931 4932 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ 4933 if(U_FAILURE(errorCode)) { 4934 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode)); 4935 return; 4936 } 4937 4938 4939 4940 ucnv_toUnicode (cnv, 4941 &pOut, 4942 OutLimit, 4943 &pSource, 4944 sourceLimit, 4945 off, 4946 TRUE, 4947 &errorCode); 4948 4949 4950 if (memcmp(off,offsets,sizeof(offsets))) 4951 { 4952 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n"); 4953 } 4954 if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) 4955 { 4956 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n"); 4957 } 4958 ucnv_close(cnv); 4959 } 4960 { 4961 /* LMBCS to Unicode - getNextUChar */ 4962 const char * sourceStart; 4963 const char *source=(const char *)pszLMBCS; 4964 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); 4965 const UChar32 *results= pszUnicode32; 4966 const int *off = offsets32; 4967 4968 UErrorCode errorCode=U_ZERO_ERROR; 4969 UChar32 uniChar; 4970 4971 cnv=ucnv_open("LMBCS-1", &errorCode); 4972 if(U_FAILURE(errorCode)) { 4973 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4974 return; 4975 } 4976 else 4977 { 4978 4979 while(source<limit) { 4980 sourceStart=source; 4981 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode); 4982 if(U_FAILURE(errorCode)) { 4983 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode)); 4984 break; 4985 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) { 4986 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 4987 uniChar, (source-sourceStart), *results, *off); 4988 break; 4989 } 4990 results++; 4991 off++; 4992 } 4993 } 4994 ucnv_close(cnv); 4995 } 4996 { /* test locale & optimization group operations: Unicode to LMBCS */ 4997 4998 UErrorCode errorCode=U_ZERO_ERROR; 4999 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); 5000 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); 5001 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); 5002 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ 5003 const UChar * pUniOut = uniString; 5004 UChar * pUniIn = uniString; 5005 uint8_t lmbcsString [4]; 5006 const char * pLMBCSOut = (const char *)lmbcsString; 5007 char * pLMBCSIn = (char *)lmbcsString; 5008 5009 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */ 5010 ucnv_fromUnicode (cnv16he, 5011 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5012 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5013 NULL, 1, &errorCode); 5014 5015 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) 5016 { 5017 log_err("LMBCS-16,locale=he gives unexpected translation\n"); 5018 } 5019 5020 pLMBCSIn= (char *)lmbcsString; 5021 pUniOut = uniString; 5022 ucnv_fromUnicode (cnv01us, 5023 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5024 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5025 NULL, 1, &errorCode); 5026 5027 if (lmbcsString[0] != 0x9F) 5028 { 5029 log_err("LMBCS-1,locale=US gives unexpected translation\n"); 5030 } 5031 5032 /* single byte char from mbcs char set */ 5033 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ 5034 pLMBCSOut = (const char *)lmbcsString; 5035 pUniIn = uniString; 5036 ucnv_toUnicode (cnv16jp, 5037 &pUniIn, pUniIn + 1, 5038 &pLMBCSOut, (pLMBCSOut + 1), 5039 NULL, 1, &errorCode); 5040 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5041 { 5042 log_err("Unexpected results from LMBCS-16 single byte char\n"); 5043 } 5044 /* convert to group 1: should be 3 bytes */ 5045 pLMBCSIn = (char *)lmbcsString; 5046 pUniOut = uniString; 5047 ucnv_fromUnicode (cnv01us, 5048 &pLMBCSIn, (const char *)(pLMBCSIn + 3), 5049 &pUniOut, pUniOut + 1, 5050 NULL, 1, &errorCode); 5051 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1 5052 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE) 5053 { 5054 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); 5055 } 5056 pLMBCSOut = (const char *)lmbcsString; 5057 pUniIn = uniString; 5058 ucnv_toUnicode (cnv01us, 5059 &pUniIn, pUniIn + 1, 5060 &pLMBCSOut, (const char *)(pLMBCSOut + 3), 5061 NULL, 1, &errorCode); 5062 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5063 { 5064 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); 5065 } 5066 pLMBCSIn = (char *)lmbcsString; 5067 pUniOut = uniString; 5068 ucnv_fromUnicode (cnv16jp, 5069 &pLMBCSIn, (const char *)(pLMBCSIn + 1), 5070 &pUniOut, pUniOut + 1, 5071 NULL, 1, &errorCode); 5072 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE) 5073 { 5074 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); 5075 } 5076 ucnv_close(cnv16he); 5077 ucnv_close(cnv16jp); 5078 ucnv_close(cnv01us); 5079 } 5080 { 5081 /* Small source buffer testing, LMBCS -> Unicode */ 5082 5083 UErrorCode errorCode=U_ZERO_ERROR; 5084 5085 const char * pSource = (const char *)pszLMBCS; 5086 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 5087 int codepointCount = 0; 5088 5089 UChar Out [sizeof(pszUnicode) + 1]; 5090 UChar * pOut = Out; 5091 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 5092 5093 5094 cnv = ucnv_open(NAME_LMBCS_1, &errorCode); 5095 if(U_FAILURE(errorCode)) { 5096 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 5097 return; 5098 } 5099 5100 5101 while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) 5102 { 5103 ucnv_toUnicode (cnv, 5104 &pOut, 5105 OutLimit, 5106 &pSource, 5107 (pSource+1), /* claim that this is a 1- byte buffer */ 5108 NULL, 5109 FALSE, /* FALSE means there might be more chars in the next buffer */ 5110 &errorCode); 5111 5112 if (U_SUCCESS (errorCode)) 5113 { 5114 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1]) 5115 { 5116 /* we are on to the next code point: check value */ 5117 5118 if (Out[0] != pszUnicode[codepointCount]){ 5119 log_err("LMBCS->Uni result %lx should have been %lx \n", 5120 Out[0], pszUnicode[codepointCount]); 5121 } 5122 5123 pOut = Out; /* reset for accumulating next code point */ 5124 codepointCount++; 5125 } 5126 } 5127 else 5128 { 5129 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode)); 5130 } 5131 } 5132 { 5133 /* limits & surrogate error testing */ 5134 char LIn [sizeof(pszLMBCS)]; 5135 const char * pLIn = LIn; 5136 5137 char LOut [sizeof(pszLMBCS)]; 5138 char * pLOut = LOut; 5139 5140 UChar UOut [sizeof(pszUnicode)]; 5141 UChar * pUOut = UOut; 5142 5143 UChar UIn [sizeof(pszUnicode)]; 5144 const UChar * pUIn = UIn; 5145 5146 int32_t off [sizeof(offsets)]; 5147 UChar32 uniChar; 5148 5149 errorCode=U_ZERO_ERROR; 5150 5151 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ 5152 pUIn++; 5153 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode); 5154 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5155 { 5156 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); 5157 } 5158 pUIn--; 5159 5160 errorCode=U_ZERO_ERROR; 5161 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); 5162 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5163 { 5164 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode)); 5165 } 5166 errorCode=U_ZERO_ERROR; 5167 5168 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode); 5169 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5170 { 5171 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode)); 5172 } 5173 errorCode=U_ZERO_ERROR; 5174 5175 /* 0 byte source request - no error, no pointer movement */ 5176 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode); 5177 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); 5178 if(U_FAILURE(errorCode)) { 5179 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode)); 5180 } 5181 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn)) 5182 { 5183 log_err("Unexpected pointer move in 0 byte source request \n"); 5184 } 5185 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ 5186 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); 5187 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) 5188 { 5189 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); 5190 } 5191 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ 5192 { 5193 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n"); 5194 } 5195 errorCode = U_ZERO_ERROR; 5196 5197 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ 5198 5199 pUIn = pszUnicode; 5200 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode); 5201 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) 5202 { 5203 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); 5204 } 5205 5206 errorCode = U_ZERO_ERROR; 5207 5208 pLIn = (const char *)pszLMBCS; 5209 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); 5210 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4]) 5211 { 5212 log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); 5213 } 5214 5215 /* unpaired or chopped LMBCS surrogates */ 5216 5217 /* OK high surrogate, Low surrogate is chopped */ 5218 LIn [0] = (char)0x14; 5219 LIn [1] = (char)0xD8; 5220 LIn [2] = (char)0x01; 5221 LIn [3] = (char)0x14; 5222 LIn [4] = (char)0xDC; 5223 pLIn = LIn; 5224 errorCode = U_ZERO_ERROR; 5225 pUOut = UOut; 5226 5227 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 5228 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5229 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5230 { 5231 log_err("Unexpected results on chopped low surrogate\n"); 5232 } 5233 5234 /* chopped at surrogate boundary */ 5235 LIn [0] = (char)0x14; 5236 LIn [1] = (char)0xD8; 5237 LIn [2] = (char)0x01; 5238 pLIn = LIn; 5239 errorCode = U_ZERO_ERROR; 5240 pUOut = UOut; 5241 5242 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); 5243 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) 5244 { 5245 log_err("Unexpected results on chopped at surrogate boundary \n"); 5246 } 5247 5248 /* unpaired surrogate plus valid Unichar */ 5249 LIn [0] = (char)0x14; 5250 LIn [1] = (char)0xD8; 5251 LIn [2] = (char)0x01; 5252 LIn [3] = (char)0x14; 5253 LIn [4] = (char)0xC9; 5254 LIn [5] = (char)0xD0; 5255 pLIn = LIn; 5256 errorCode = U_ZERO_ERROR; 5257 pUOut = UOut; 5258 5259 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); 5260 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) 5261 { 5262 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); 5263 } 5264 5265 /* unpaired surrogate plus chopped Unichar */ 5266 LIn [0] = (char)0x14; 5267 LIn [1] = (char)0xD8; 5268 LIn [2] = (char)0x01; 5269 LIn [3] = (char)0x14; 5270 LIn [4] = (char)0xC9; 5271 5272 pLIn = LIn; 5273 errorCode = U_ZERO_ERROR; 5274 pUOut = UOut; 5275 5276 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5277 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5278 { 5279 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); 5280 } 5281 5282 /* unpaired surrogate plus valid non-Unichar */ 5283 LIn [0] = (char)0x14; 5284 LIn [1] = (char)0xD8; 5285 LIn [2] = (char)0x01; 5286 LIn [3] = (char)0x0F; 5287 LIn [4] = (char)0x3B; 5288 5289 pLIn = LIn; 5290 errorCode = U_ZERO_ERROR; 5291 pUOut = UOut; 5292 5293 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5294 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) 5295 { 5296 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); 5297 } 5298 5299 /* unpaired surrogate plus chopped non-Unichar */ 5300 LIn [0] = (char)0x14; 5301 LIn [1] = (char)0xD8; 5302 LIn [2] = (char)0x01; 5303 LIn [3] = (char)0x0F; 5304 5305 pLIn = LIn; 5306 errorCode = U_ZERO_ERROR; 5307 pUOut = UOut; 5308 5309 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); 5310 5311 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) 5312 { 5313 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n"); 5314 } 5315 } 5316 } 5317 ucnv_close(cnv); /* final cleanup */ 5318 } 5319 5320 5321 static void TestJitterbug255() 5322 { 5323 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; 5324 const char *testBuffer = (const char *)testBytes; 5325 const char *testEnd = (const char *)testBytes + sizeof(testBytes); 5326 UErrorCode status = U_ZERO_ERROR; 5327 /*UChar32 result;*/ 5328 UConverter *cnv = 0; 5329 5330 cnv = ucnv_open("shift-jis", &status); 5331 if (U_FAILURE(status) || cnv == 0) { 5332 log_data_err("Failed to open the converter for SJIS.\n"); 5333 return; 5334 } 5335 while (testBuffer != testEnd) 5336 { 5337 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); 5338 if (U_FAILURE(status)) 5339 { 5340 log_err("Failed to convert the next UChar for SJIS.\n"); 5341 break; 5342 } 5343 } 5344 ucnv_close(cnv); 5345 } 5346 5347 static void TestEBCDICUS4XML() 5348 { 5349 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; 5350 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; 5351 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; 5352 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; 5353 char target_x[] = {0x00, 0x00, 0x00, 0x00}; 5354 UChar *unicodes = unicodes_x; 5355 const UChar *toUnicodeMaps = toUnicodeMaps_x; 5356 char *target = target_x; 5357 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; 5358 UErrorCode status = U_ZERO_ERROR; 5359 UConverter *cnv = 0; 5360 5361 cnv = ucnv_open("ebcdic-xml-us", &status); 5362 if (U_FAILURE(status) || cnv == 0) { 5363 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); 5364 return; 5365 } 5366 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status); 5367 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) { 5368 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", 5369 u_errorName(status)); 5370 printUSeqErr(unicodes_x, 3); 5371 printUSeqErr(toUnicodeMaps, 3); 5372 } 5373 status = U_ZERO_ERROR; 5374 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status); 5375 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) { 5376 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", 5377 u_errorName(status)); 5378 printSeqErr((const unsigned char*)target_x, 3); 5379 printSeqErr((const unsigned char*)fromUnicodeMaps, 3); 5380 } 5381 ucnv_close(cnv); 5382 } 5383 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ 5384 5385 #if !UCONFIG_NO_COLLATION 5386 5387 static void TestJitterbug981(){ 5388 const UChar* rules; 5389 int32_t rules_length, target_cap, bytes_needed, buff_size; 5390 UErrorCode status = U_ZERO_ERROR; 5391 UConverter *utf8cnv; 5392 UCollator* myCollator; 5393 char *buff; 5394 int numNeeded=0; 5395 utf8cnv = ucnv_open ("utf8", &status); 5396 if(U_FAILURE(status)){ 5397 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status)); 5398 return; 5399 } 5400 myCollator = ucol_open("zh", &status); 5401 if(U_FAILURE(status)){ 5402 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status)); 5403 ucnv_close(utf8cnv); 5404 return; 5405 } 5406 5407 rules = ucol_getRules(myCollator, &rules_length); 5408 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); 5409 buff = malloc(buff_size); 5410 5411 target_cap = 0; 5412 do { 5413 ucnv_reset(utf8cnv); 5414 status = U_ZERO_ERROR; 5415 if(target_cap >= buff_size) { 5416 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size); 5417 break; 5418 } 5419 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, 5420 rules, rules_length, &status); 5421 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5422 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5423 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5424 break; 5425 } 5426 numNeeded = bytes_needed; 5427 } while (status == U_BUFFER_OVERFLOW_ERROR); 5428 ucol_close(myCollator); 5429 ucnv_close(utf8cnv); 5430 free(buff); 5431 } 5432 5433 #endif 5434 5435 static void TestJitterbug1293(){ 5436 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; 5437 char target[256]; 5438 UErrorCode status = U_ZERO_ERROR; 5439 UConverter* conv=NULL; 5440 int32_t target_cap, bytes_needed, numNeeded = 0; 5441 conv = ucnv_open("shift-jis",&status); 5442 if(U_FAILURE(status)){ 5443 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status)); 5444 return; 5445 } 5446 5447 do{ 5448 target_cap =0; 5449 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status); 5450 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5451 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5452 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5453 } 5454 numNeeded = bytes_needed; 5455 } while (status == U_BUFFER_OVERFLOW_ERROR); 5456 if(U_FAILURE(status)){ 5457 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status)); 5458 return; 5459 } 5460 ucnv_close(conv); 5461 } 5462 static void TestJB5275_1(){ 5463 5464 static const char* data = "\x3B\xB3\x0A" /* Easy characters */ 5465 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5466 /* Switch script: */ 5467 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */ 5468 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/ 5469 "\xEF\x40\x3B\xB3\x0A"; 5470 static const UChar expected[] ={ 5471 0x003b, 0x0a15, 0x000a, /* Easy characters */ 5472 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */ 5473 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/ 5474 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/ 5475 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ 5476 }; 5477 5478 UErrorCode status = U_ZERO_ERROR; 5479 UConverter* conv = ucnv_open("iscii-gur", &status); 5480 UChar dest[100] = {'\0'}; 5481 UChar* target = dest; 5482 UChar* targetLimit = dest+100; 5483 const char* source = data; 5484 const char* sourceLimit = data+strlen(data); 5485 const UChar* exp = expected; 5486 5487 if (U_FAILURE(status)) { 5488 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status)); 5489 return; 5490 } 5491 5492 log_verbose("Testing switching back to default script when new line is encountered.\n"); 5493 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5494 if(U_FAILURE(status)){ 5495 log_err("conversion failed: %s \n", u_errorName(status)); 5496 } 5497 targetLimit = target; 5498 target = dest; 5499 printUSeq(target, targetLimit-target); 5500 while(target<targetLimit){ 5501 if(*exp!=*target){ 5502 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5503 } 5504 target++; 5505 exp++; 5506 } 5507 ucnv_close(conv); 5508 } 5509 5510 static void TestJB5275(){ 5511 static const char* data = 5512 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */ 5513 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */ 5514 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */ 5515 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5516 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ 5517 "\xEF\x48\x38\xB3\x0A" /* Kannada test */ 5518 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ 5519 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ 5520 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ 5521 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; 5522 static const UChar expected[] ={ 5523 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */ 5524 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */ 5525 0x0038, 0x0C95, 0x000A, /* Kannada test */ 5526 0x0039, 0x0D15, 0x000A, /* Malayalam test */ 5527 0x003A, 0x0A95, 0x000A, /* Gujarati test */ 5528 0x003B, 0x0A15, 0x000A, /* Punjabi test */ 5529 }; 5530 5531 UErrorCode status = U_ZERO_ERROR; 5532 UConverter* conv = ucnv_open("iscii", &status); 5533 UChar dest[100] = {'\0'}; 5534 UChar* target = dest; 5535 UChar* targetLimit = dest+100; 5536 const char* source = data; 5537 const char* sourceLimit = data+strlen(data); 5538 const UChar* exp = expected; 5539 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5540 if(U_FAILURE(status)){ 5541 log_err("conversion failed: %s \n", u_errorName(status)); 5542 } 5543 targetLimit = target; 5544 target = dest; 5545 5546 printUSeq(target, targetLimit-target); 5547 5548 while(target<targetLimit){ 5549 if(*exp!=*target){ 5550 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5551 } 5552 target++; 5553 exp++; 5554 } 5555 ucnv_close(conv); 5556 } 5557