1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************* 7 * 8 * File CCONVTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Steven R. Loomis 7/8/1999 Adding input buffer test 13 ******************************************************************************** 14 */ 15 #include <stdio.h> 16 #include "cstring.h" 17 #include "unicode/uloc.h" 18 #include "unicode/ucnv.h" 19 #include "unicode/ucnv_err.h" 20 #include "unicode/ucnv_cb.h" 21 #include "cintltst.h" 22 #include "unicode/utypes.h" 23 #include "unicode/ustring.h" 24 #include "unicode/ucol.h" 25 #include "cmemory.h" 26 #include "nucnvtst.h" 27 28 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); 29 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); 30 #if !UCONFIG_NO_COLLATION 31 static void TestJitterbug981(void); 32 #endif 33 static void TestJitterbug1293(void); 34 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; 35 static void TestConverterTypesAndStarters(void); 36 static void TestAmbiguous(void); 37 static void TestSignatureDetection(void); 38 static void TestUTF7(void); 39 static void TestIMAP(void); 40 static void TestUTF8(void); 41 static void TestCESU8(void); 42 static void TestUTF16(void); 43 static void TestUTF16BE(void); 44 static void TestUTF16LE(void); 45 static void TestUTF32(void); 46 static void TestUTF32BE(void); 47 static void TestUTF32LE(void); 48 static void TestLATIN1(void); 49 50 #if !UCONFIG_NO_LEGACY_CONVERSION 51 static void TestSBCS(void); 52 static void TestDBCS(void); 53 static void TestMBCS(void); 54 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 55 static void TestICCRunout(void); 56 #endif 57 58 #ifdef U_ENABLE_GENERIC_ISO_2022 59 static void TestISO_2022(void); 60 #endif 61 62 static void TestISO_2022_JP(void); 63 static void TestISO_2022_JP_1(void); 64 static void TestISO_2022_JP_2(void); 65 static void TestISO_2022_KR(void); 66 static void TestISO_2022_KR_1(void); 67 static void TestISO_2022_CN(void); 68 static void TestISO_2022_CN_EXT(void); 69 static void TestJIS(void); 70 static void TestHZ(void); 71 #endif 72 73 static void TestSCSU(void); 74 75 #if !UCONFIG_NO_LEGACY_CONVERSION 76 static void TestEBCDIC_STATEFUL(void); 77 static void TestGB18030(void); 78 static void TestLMBCS(void); 79 static void TestJitterbug255(void); 80 static void TestEBCDICUS4XML(void); 81 static void TestJitterbug915(void); 82 static void TestISCII(void); 83 84 static void TestCoverageMBCS(void); 85 static void TestJitterbug2346(void); 86 static void TestJitterbug2411(void); 87 static void TestJB5275(void); 88 static void TestJB5275_1(void); 89 static void TestJitterbug6175(void); 90 #endif 91 92 static void TestInBufSizes(void); 93 94 static void TestRoundTrippingAllUTF(void); 95 static void TestConv(const uint16_t in[], 96 int len, 97 const char* conv, 98 const char* lang, 99 char byteArr[], 100 int byteArrLen); 101 102 /* open a converter, using test data if it begins with '@' */ 103 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); 104 105 106 #define NEW_MAX_BUFFER 999 107 108 static int32_t gInBufferSize = NEW_MAX_BUFFER; 109 static int32_t gOutBufferSize = NEW_MAX_BUFFER; 110 static char gNuConvTestName[1024]; 111 112 #define nct_min(x,y) ((x<y) ? x : y) 113 114 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) 115 { 116 if(cnv && cnv[0] == '@') { 117 return ucnv_openPackage(loadTestData(err), cnv+1, err); 118 } else { 119 return ucnv_open(cnv, err); 120 } 121 } 122 123 static void printSeq(const unsigned char* a, int len) 124 { 125 int i=0; 126 log_verbose("{"); 127 while (i<len) 128 log_verbose("0x%02x ", a[i++]); 129 log_verbose("}\n"); 130 } 131 132 static void printUSeq(const UChar* a, int len) 133 { 134 int i=0; 135 log_verbose("{U+"); 136 while (i<len) log_verbose("0x%04x ", a[i++]); 137 log_verbose("}\n"); 138 } 139 140 static void printSeqErr(const unsigned char* a, int len) 141 { 142 int i=0; 143 fprintf(stderr, "{"); 144 while (i<len) 145 fprintf(stderr, "0x%02x ", a[i++]); 146 fprintf(stderr, "}\n"); 147 } 148 149 static void printUSeqErr(const UChar* a, int len) 150 { 151 int i=0; 152 fprintf(stderr, "{U+"); 153 while (i<len) 154 fprintf(stderr, "0x%04x ", a[i++]); 155 fprintf(stderr,"}\n"); 156 } 157 158 static void 159 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message) 160 { 161 const char* s0; 162 const char* s=(char*)source; 163 const int32_t *r=results; 164 UErrorCode errorCode=U_ZERO_ERROR; 165 UChar32 c; 166 167 while(s<limit) { 168 s0=s; 169 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 170 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 171 break; /* no more significant input */ 172 } else if(U_FAILURE(errorCode)) { 173 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 174 break; 175 } else if( 176 /* test the expected number of input bytes only if >=0 */ 177 (*r>=0 && (int32_t)(s-s0)!=*r) || 178 c!=*(r+1) 179 ) { 180 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 181 message, c, (s-s0), *(r+1), *r); 182 break; 183 } 184 r+=2; 185 } 186 } 187 188 static void 189 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message) 190 { 191 const char* s=(char*)source; 192 UErrorCode errorCode=U_ZERO_ERROR; 193 uint32_t c; 194 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 195 if(errorCode != expected){ 196 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode)); 197 } 198 if(c != 0xFFFD && c != 0xffff){ 199 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c); 200 } 201 202 } 203 204 static void TestInBufSizes(void) 205 { 206 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); 207 #if 1 208 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); 209 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); 210 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); 211 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); 212 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); 213 TestNewConvertWithBufferSizes(1,1); 214 TestNewConvertWithBufferSizes(2,3); 215 TestNewConvertWithBufferSizes(3,2); 216 #endif 217 } 218 219 static void TestOutBufSizes(void) 220 { 221 #if 1 222 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); 223 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); 224 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); 225 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); 226 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); 227 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); 228 229 #endif 230 } 231 232 233 void addTestNewConvert(TestNode** root) 234 { 235 #if !UCONFIG_NO_FILE_IO 236 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); 237 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); 238 #endif 239 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); 240 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); 241 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); 242 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); 243 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); 244 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); 245 246 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ 247 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); 248 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); 249 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); 250 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); 251 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); 252 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); 253 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); 254 255 #if !UCONFIG_NO_LEGACY_CONVERSION 256 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); 257 #endif 258 259 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); 260 261 #if !UCONFIG_NO_LEGACY_CONVERSION 262 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); 263 #if !UCONFIG_NO_FILE_IO 264 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); 265 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); 266 #endif 267 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); 268 269 #ifdef U_ENABLE_GENERIC_ISO_2022 270 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); 271 #endif 272 273 /* BEGIN android-removed 274 To save space, Android does not build full ISO2022 CJK tables. 275 We turn off the tests here. 276 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); 277 END android-removed */ 278 279 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); /* BEGIN android-removed */ 280 281 /* BEGIN android-removed 282 To save space, Android does not build full ISO2022 CJK tables. 283 We turn off the tests here. 284 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); 285 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); 286 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); 287 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); 288 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); 289 END android-removed */ 290 291 /* 292 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 293 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); 294 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); 295 */ 296 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); 297 #endif 298 299 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); 300 301 #if !UCONFIG_NO_LEGACY_CONVERSION 302 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); 303 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); 304 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); 305 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); 306 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); 307 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); 308 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); 309 #if !UCONFIG_NO_COLLATION 310 /* BEGIN android-removed 311 To save space, Android does not include the collation tailoring rules. 312 Skip the related tests. 313 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); 314 END android-removed */ 315 #endif 316 317 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); 318 #endif 319 320 321 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 322 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); 323 #endif 324 325 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); 326 327 #if !UCONFIG_NO_LEGACY_CONVERSION 328 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); 329 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); 330 /* BEGIN android-removed 331 To save space, Android does not build full ISO2022 CJK tables. 332 We turn off the tests here. 333 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); 334 END android-removed */ 335 #endif 336 337 } 338 339 340 /* Note that this test already makes use of statics, so it's not really 341 multithread safe. 342 This convenience function lets us make the error messages actually useful. 343 */ 344 345 static void setNuConvTestName(const char *codepage, const char *direction) 346 { 347 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 348 codepage, 349 direction, 350 (int)gInBufferSize, 351 (int)gOutBufferSize); 352 } 353 354 typedef enum 355 { 356 TC_OK = 0, /* test was OK */ 357 TC_MISMATCH = 1, /* Match failed - err was printed */ 358 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */ 359 } ETestConvertResult; 360 361 /* Note: This function uses global variables and it will not do offset 362 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 363 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 364 const char *codepage, const int32_t *expectOffsets , UBool useFallback) 365 { 366 UErrorCode status = U_ZERO_ERROR; 367 UConverter *conv = 0; 368 char junkout[NEW_MAX_BUFFER]; /* FIX */ 369 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 370 char *p; 371 const UChar *src; 372 char *end; 373 char *targ; 374 int32_t *offs; 375 int i; 376 int32_t realBufferSize; 377 char *realBufferEnd; 378 const UChar *realSourceEnd; 379 const UChar *sourceLimit; 380 UBool checkOffsets = TRUE; 381 UBool doFlush; 382 383 for(i=0;i<NEW_MAX_BUFFER;i++) 384 junkout[i] = (char)0xF0; 385 for(i=0;i<NEW_MAX_BUFFER;i++) 386 junokout[i] = 0xFF; 387 388 setNuConvTestName(codepage, "FROM"); 389 390 log_verbose("\n========= %s\n", gNuConvTestName); 391 392 conv = my_ucnv_open(codepage, &status); 393 394 if(U_FAILURE(status)) 395 { 396 log_data_err("Couldn't open converter %s\n",codepage); 397 return TC_FAIL; 398 } 399 if(useFallback){ 400 ucnv_setFallback(conv,useFallback); 401 } 402 403 log_verbose("Converter opened..\n"); 404 405 src = source; 406 targ = junkout; 407 offs = junokout; 408 409 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 410 realBufferEnd = junkout + realBufferSize; 411 realSourceEnd = source + sourceLen; 412 413 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 414 checkOffsets = FALSE; 415 416 do 417 { 418 end = nct_min(targ + gOutBufferSize, realBufferEnd); 419 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 420 421 doFlush = (UBool)(sourceLimit == realSourceEnd); 422 423 if(targ == realBufferEnd) { 424 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 425 return TC_FAIL; 426 } 427 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 428 429 430 status = U_ZERO_ERROR; 431 432 ucnv_fromUnicode (conv, 433 &targ, 434 end, 435 &src, 436 sourceLimit, 437 checkOffsets ? offs : NULL, 438 doFlush, /* flush if we're at the end of the input data */ 439 &status); 440 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 441 442 if(U_FAILURE(status)) { 443 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 444 return TC_FAIL; 445 } 446 447 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 448 sourceLen, targ-junkout); 449 450 if(VERBOSITY) 451 { 452 char junk[9999]; 453 char offset_str[9999]; 454 char *ptr; 455 456 junk[0] = 0; 457 offset_str[0] = 0; 458 for(ptr = junkout;ptr<targ;ptr++) { 459 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); 460 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout])); 461 } 462 463 log_verbose(junk); 464 printSeq((const uint8_t *)expect, expectLen); 465 if ( checkOffsets ) { 466 log_verbose("\nOffsets:"); 467 log_verbose(offset_str); 468 } 469 log_verbose("\n"); 470 } 471 ucnv_close(conv); 472 473 if(expectLen != targ-junkout) { 474 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 475 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 476 printf("\nGot:"); 477 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 478 printf("\nExpected:"); 479 printSeqErr((const unsigned char*)expect, expectLen); 480 return TC_MISMATCH; 481 } 482 483 if (checkOffsets && (expectOffsets != 0) ) { 484 log_verbose("comparing %d offsets..\n", targ-junkout); 485 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 486 log_err("did not get the expected offsets. %s\n", gNuConvTestName); 487 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 488 log_err("\n"); 489 log_err("Got : "); 490 for(p=junkout;p<targ;p++) { 491 log_err("%d,", junokout[p-junkout]); 492 } 493 log_err("\n"); 494 log_err("Expected: "); 495 for(i=0; i<(targ-junkout); i++) { 496 log_err("%d,", expectOffsets[i]); 497 } 498 log_err("\n"); 499 } 500 } 501 502 log_verbose("comparing..\n"); 503 if(!memcmp(junkout, expect, expectLen)) { 504 log_verbose("Matches!\n"); 505 return TC_OK; 506 } else { 507 log_err("String does not match u->%s\n", gNuConvTestName); 508 printUSeqErr(source, sourceLen); 509 printf("\nGot:"); 510 printSeqErr((const unsigned char *)junkout, expectLen); 511 printf("\nExpected:"); 512 printSeqErr((const unsigned char *)expect, expectLen); 513 514 return TC_MISMATCH; 515 } 516 } 517 518 /* Note: This function uses global variables and it will not do offset 519 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 520 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 521 const char *codepage, const int32_t *expectOffsets, UBool useFallback) 522 { 523 UErrorCode status = U_ZERO_ERROR; 524 UConverter *conv = 0; 525 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 526 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 527 const char *src; 528 const char *realSourceEnd; 529 const char *srcLimit; 530 UChar *p; 531 UChar *targ; 532 UChar *end; 533 int32_t *offs; 534 int i; 535 UBool checkOffsets = TRUE; 536 537 int32_t realBufferSize; 538 UChar *realBufferEnd; 539 540 541 for(i=0;i<NEW_MAX_BUFFER;i++) 542 junkout[i] = 0xFFFE; 543 544 for(i=0;i<NEW_MAX_BUFFER;i++) 545 junokout[i] = -1; 546 547 setNuConvTestName(codepage, "TO"); 548 549 log_verbose("\n========= %s\n", gNuConvTestName); 550 551 conv = my_ucnv_open(codepage, &status); 552 553 if(U_FAILURE(status)) 554 { 555 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 556 return TC_FAIL; 557 } 558 if(useFallback){ 559 ucnv_setFallback(conv,useFallback); 560 } 561 log_verbose("Converter opened..\n"); 562 563 src = (const char *)source; 564 targ = junkout; 565 offs = junokout; 566 567 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 568 realBufferEnd = junkout + realBufferSize; 569 realSourceEnd = src + sourcelen; 570 571 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 572 checkOffsets = FALSE; 573 574 do 575 { 576 end = nct_min( targ + gOutBufferSize, realBufferEnd); 577 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 578 579 if(targ == realBufferEnd) 580 { 581 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); 582 return TC_FAIL; 583 } 584 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 585 586 /* oldTarg = targ; */ 587 588 status = U_ZERO_ERROR; 589 590 ucnv_toUnicode (conv, 591 &targ, 592 end, 593 &src, 594 srcLimit, 595 checkOffsets ? offs : NULL, 596 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ 597 &status); 598 599 /* offs += (targ-oldTarg); */ 600 601 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 602 603 if(U_FAILURE(status)) 604 { 605 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 606 return TC_FAIL; 607 } 608 609 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 610 sourcelen, targ-junkout); 611 if(VERBOSITY) 612 { 613 char junk[9999]; 614 char offset_str[9999]; 615 UChar *ptr; 616 617 junk[0] = 0; 618 offset_str[0] = 0; 619 620 for(ptr = junkout;ptr<targ;ptr++) 621 { 622 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 623 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 624 } 625 626 log_verbose(junk); 627 printUSeq(expect, expectlen); 628 if ( checkOffsets ) 629 { 630 log_verbose("\nOffsets:"); 631 log_verbose(offset_str); 632 } 633 log_verbose("\n"); 634 } 635 ucnv_close(conv); 636 637 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 638 639 if (checkOffsets && (expectOffsets != 0)) 640 { 641 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 642 log_err("did not get the expected offsets. %s\n",gNuConvTestName); 643 log_err("Got: "); 644 for(p=junkout;p<targ;p++) { 645 log_err("%d,", junokout[p-junkout]); 646 } 647 log_err("\n"); 648 log_err("Expected: "); 649 for(i=0; i<(targ-junkout); i++) { 650 log_err("%d,", expectOffsets[i]); 651 } 652 log_err("\n"); 653 log_err("output: "); 654 for(i=0; i<(targ-junkout); i++) { 655 log_err("%X,", junkout[i]); 656 } 657 log_err("\n"); 658 log_err("input: "); 659 for(i=0; i<(src-(const char *)source); i++) { 660 log_err("%X,", (unsigned char)source[i]); 661 } 662 log_err("\n"); 663 } 664 } 665 666 if(!memcmp(junkout, expect, expectlen*2)) 667 { 668 log_verbose("Matches!\n"); 669 return TC_OK; 670 } 671 else 672 { 673 log_err("String does not match. %s\n", gNuConvTestName); 674 log_verbose("String does not match. %s\n", gNuConvTestName); 675 printf("\nGot:"); 676 printUSeqErr(junkout, expectlen); 677 printf("\nExpected:"); 678 printUSeqErr(expect, expectlen); 679 return TC_MISMATCH; 680 } 681 } 682 683 684 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 685 { 686 /** test chars #1 */ 687 /* 1 2 3 1Han 2Han 3Han . */ 688 static const UChar sampleText[] = 689 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 }; 690 static const UChar sampleTextRoundTripUnmappable[] = 691 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; 692 693 694 static const uint8_t expectedUTF8[] = 695 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; 696 static const int32_t toUTF8Offs[] = 697 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; 698 static const int32_t fmUTF8Offs[] = 699 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e }; 700 701 #ifdef U_ENABLE_GENERIC_ISO_2022 702 /* Same as UTF8, but with ^[%B preceeding */ 703 static const const uint8_t expectedISO2022[] = 704 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 705 static const int32_t toISO2022Offs[] = 706 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 707 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ 708 static const int32_t fmISO2022Offs[] = 709 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */ 710 #endif 711 712 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ 713 static const uint8_t expectedIBM930[] = 714 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f }; 715 static const int32_t toIBM930Offs[] = 716 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 }; 717 static const int32_t fmIBM930Offs[] = 718 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; 719 720 /* 1 2 3 0 h1 h2 h3 . MBCS*/ 721 static const uint8_t expectedIBM943[] = 722 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc }; 723 static const int32_t toIBM943Offs [] = 724 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 }; 725 static const int32_t fmIBM943Offs[] = 726 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; 727 728 /* 1 2 3 0 h1 h2 h3 . DBCS*/ 729 static const uint8_t expectedIBM9027[] = 730 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; 731 static const int32_t toIBM9027Offs [] = 732 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; 733 734 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 735 static const uint8_t expectedIBM920[] = 736 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; 737 static const int32_t toIBM920Offs [] = 738 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 739 740 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 741 static const uint8_t expectedISO88593[] = 742 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 743 static const int32_t toISO88593Offs[] = 744 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 745 746 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ 747 static const uint8_t expectedLATIN1[] = 748 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 749 static const int32_t toLATIN1Offs[] = 750 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 751 752 753 /* etc */ 754 static const uint8_t expectedUTF16BE[] = 755 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; 756 static const int32_t toUTF16BEOffs[]= 757 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 758 static const int32_t fmUTF16BEOffs[] = 759 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 760 761 static const uint8_t expectedUTF16LE[] = 762 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; 763 static const int32_t toUTF16LEOffs[]= 764 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 765 static const int32_t fmUTF16LEOffs[] = 766 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 767 768 static const uint8_t expectedUTF32BE[] = 769 { 0x00, 0x00, 0x00, 0x31, 770 0x00, 0x00, 0x00, 0x32, 771 0x00, 0x00, 0x00, 0x33, 772 0x00, 0x00, 0x00, 0x00, 773 0x00, 0x00, 0x4e, 0x00, 774 0x00, 0x00, 0x4e, 0x8c, 775 0x00, 0x00, 0x4e, 0x09, 776 0x00, 0x00, 0x00, 0x2e, 777 0x00, 0x02, 0x00, 0x21 }; 778 static const int32_t toUTF32BEOffs[]= 779 { 0x00, 0x00, 0x00, 0x00, 780 0x01, 0x01, 0x01, 0x01, 781 0x02, 0x02, 0x02, 0x02, 782 0x03, 0x03, 0x03, 0x03, 783 0x04, 0x04, 0x04, 0x04, 784 0x05, 0x05, 0x05, 0x05, 785 0x06, 0x06, 0x06, 0x06, 786 0x07, 0x07, 0x07, 0x07, 787 0x08, 0x08, 0x08, 0x08, 788 0x08, 0x08, 0x08, 0x08 }; 789 static const int32_t fmUTF32BEOffs[] = 790 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 791 792 static const uint8_t expectedUTF32LE[] = 793 { 0x31, 0x00, 0x00, 0x00, 794 0x32, 0x00, 0x00, 0x00, 795 0x33, 0x00, 0x00, 0x00, 796 0x00, 0x00, 0x00, 0x00, 797 0x00, 0x4e, 0x00, 0x00, 798 0x8c, 0x4e, 0x00, 0x00, 799 0x09, 0x4e, 0x00, 0x00, 800 0x2e, 0x00, 0x00, 0x00, 801 0x21, 0x00, 0x02, 0x00 }; 802 static const int32_t toUTF32LEOffs[]= 803 { 0x00, 0x00, 0x00, 0x00, 804 0x01, 0x01, 0x01, 0x01, 805 0x02, 0x02, 0x02, 0x02, 806 0x03, 0x03, 0x03, 0x03, 807 0x04, 0x04, 0x04, 0x04, 808 0x05, 0x05, 0x05, 0x05, 809 0x06, 0x06, 0x06, 0x06, 810 0x07, 0x07, 0x07, 0x07, 811 0x08, 0x08, 0x08, 0x08, 812 0x08, 0x08, 0x08, 0x08 }; 813 static const int32_t fmUTF32LEOffs[] = 814 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 815 816 817 818 819 /** Test chars #2 **/ 820 821 /* Sahha [health], slashed h's */ 822 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; 823 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; 824 825 /* LMBCS */ 826 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; 827 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; 828 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; 829 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; 830 /*********************************** START OF CODE finally *************/ 831 832 gInBufferSize = insize; 833 gOutBufferSize = outsize; 834 835 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); 836 837 838 /*UTF-8*/ 839 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 840 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); 841 842 log_verbose("Test surrogate behaviour for UTF8\n"); 843 { 844 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; 845 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, 846 0xf0, 0x90, 0x90, 0x81, 847 0xef, 0xbf, 0xbd 848 }; 849 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; 850 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), 851 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); 852 853 854 } 855 856 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 857 /*ISO-2022*/ 858 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 859 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); 860 #endif 861 862 /*UTF16 LE*/ 863 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 864 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); 865 /*UTF16 BE*/ 866 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 867 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); 868 /*UTF32 LE*/ 869 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 870 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); 871 /*UTF32 BE*/ 872 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 873 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); 874 875 /*LATIN_1*/ 876 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 877 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); 878 879 #if !UCONFIG_NO_LEGACY_CONVERSION 880 /*EBCDIC_STATEFUL*/ 881 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 882 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); 883 884 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 885 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 886 887 /*MBCS*/ 888 889 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 890 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); 891 /*DBCS*/ 892 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 893 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); 894 /*SBCS*/ 895 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 896 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); 897 /*SBCS*/ 898 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 899 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 900 #endif 901 902 903 /****/ 904 905 /*UTF-8*/ 906 testConvertToU(expectedUTF8, sizeof(expectedUTF8), 907 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE); 908 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 909 /*ISO-2022*/ 910 testConvertToU(expectedISO2022, sizeof(expectedISO2022), 911 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE); 912 #endif 913 914 /*UTF16 LE*/ 915 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 916 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 917 /*UTF16 BE*/ 918 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), 919 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE); 920 /*UTF32 LE*/ 921 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), 922 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE); 923 /*UTF32 BE*/ 924 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), 925 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE); 926 927 #if !UCONFIG_NO_LEGACY_CONVERSION 928 /*EBCDIC_STATEFUL*/ 929 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable, 930 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE); 931 /*MBCS*/ 932 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable, 933 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE); 934 #endif 935 936 /* Try it again to make sure it still works */ 937 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 938 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 939 940 #if !UCONFIG_NO_LEGACY_CONVERSION 941 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), 942 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE); 943 944 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), 945 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); 946 947 /*LMBCS*/ 948 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), 949 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); 950 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), 951 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE); 952 #endif 953 954 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ 955 { 956 /* encode directly set D and set O */ 957 static const uint8_t utf7[] = { 958 /* 959 Hi Mom -+Jjo--! 960 A+ImIDkQ. 961 +- 962 +ZeVnLIqe 963 */ 964 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 965 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 966 0x2b, 0x2d, 967 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 968 }; 969 static const UChar unicode[] = { 970 /* 971 Hi Mom -<WHITE SMILING FACE>-! 972 A<NOT IDENTICAL TO><ALPHA>. 973 + 974 [Japanese word "nihongo"] 975 */ 976 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 977 0x41, 0x2262, 0x0391, 0x2e, 978 0x2b, 979 0x65e5, 0x672c, 0x8a9e 980 }; 981 static const int32_t toUnicodeOffsets[] = { 982 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 983 15, 17, 19, 23, 984 24, 985 27, 29, 32 986 }; 987 static const int32_t fromUnicodeOffsets[] = { 988 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 989 11, 12, 12, 12, 13, 13, 13, 13, 14, 990 15, 15, 991 16, 16, 16, 17, 17, 17, 18, 18, 18 992 }; 993 994 /* same but escaping set O (the exclamation mark) */ 995 static const uint8_t utf7Restricted[] = { 996 /* 997 Hi Mom -+Jjo--+ACE- 998 A+ImIDkQ. 999 +- 1000 +ZeVnLIqe 1001 */ 1002 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, 1003 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 1004 0x2b, 0x2d, 1005 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 1006 }; 1007 static const int32_t toUnicodeOffsetsR[] = { 1008 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, 1009 19, 21, 23, 27, 1010 28, 1011 31, 33, 36 1012 }; 1013 static const int32_t fromUnicodeOffsetsR[] = { 1014 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, 1015 11, 12, 12, 12, 13, 13, 13, 13, 14, 1016 15, 15, 1017 16, 16, 16, 17, 17, 17, 18, 18, 18 1018 }; 1019 1020 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); 1021 1022 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE); 1023 1024 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); 1025 1026 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); 1027 } 1028 1029 /* 1030 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, 1031 * modified according to RFC 2060, 1032 * and supplemented with the one example in RFC 2060 itself. 1033 */ 1034 { 1035 static const uint8_t imap[] = { 1036 /* Hi Mom -&Jjo--! 1037 A&ImIDkQ-. 1038 &- 1039 &ZeVnLIqe- 1040 \ 1041 ~peter 1042 /mail 1043 /&ZeVnLIqe- 1044 /&U,BTFw- 1045 */ 1046 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 1047 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, 1048 0x26, 0x2d, 1049 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1050 0x5c, 1051 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1052 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1053 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1054 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d 1055 }; 1056 static const UChar unicode[] = { 1057 /* Hi Mom -<WHITE SMILING FACE>-! 1058 A<NOT IDENTICAL TO><ALPHA>. 1059 & 1060 [Japanese word "nihongo"] 1061 \ 1062 ~peter 1063 /mail 1064 /<65e5, 672c, 8a9e> 1065 /<53f0, 5317> 1066 */ 1067 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 1068 0x41, 0x2262, 0x0391, 0x2e, 1069 0x26, 1070 0x65e5, 0x672c, 0x8a9e, 1071 0x5c, 1072 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1073 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1074 0x2f, 0x65e5, 0x672c, 0x8a9e, 1075 0x2f, 0x53f0, 0x5317 1076 }; 1077 static const int32_t toUnicodeOffsets[] = { 1078 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 1079 15, 17, 19, 24, 1080 25, 1081 28, 30, 33, 1082 37, 1083 38, 39, 40, 41, 42, 43, 1084 44, 45, 46, 47, 48, 1085 49, 51, 53, 56, 1086 60, 62, 64 1087 }; 1088 static const int32_t fromUnicodeOffsets[] = { 1089 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 1090 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, 1091 15, 15, 1092 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 1093 19, 1094 20, 21, 22, 23, 24, 25, 1095 26, 27, 28, 29, 30, 1096 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 1097 35, 36, 36, 36, 37, 37, 37, 37, 37 1098 }; 1099 1100 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); 1101 1102 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); 1103 } 1104 1105 /* Test UTF-8 bad data handling*/ 1106 { 1107 static const uint8_t utf8[]={ 1108 0x61, 1109 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1110 0x00, 1111 0x62, 1112 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1113 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1114 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ 1115 0xdf, 0xbf, /* 7ff */ 1116 0xbf, /* truncated tail */ 1117 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ 1118 0x02 1119 }; 1120 1121 static const uint16_t utf8Expected[]={ 1122 0x0061, 1123 0xfffd, 1124 0x0000, 1125 0x0062, 1126 0xfffd, 1127 0xfffd, 1128 0xdbff, 0xdfff, 1129 0x07ff, 1130 0xfffd, 1131 0xfffd, 1132 0x0002 1133 }; 1134 1135 static const int32_t utf8Offsets[]={ 1136 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 1137 }; 1138 testConvertToU(utf8, sizeof(utf8), 1139 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE); 1140 1141 } 1142 1143 /* Test UTF-32BE bad data handling*/ 1144 { 1145 static const uint8_t utf32[]={ 1146 0x00, 0x00, 0x00, 0x61, 1147 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 1148 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1149 0x00, 0x00, 0x00, 0x62, 1150 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1151 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 1152 0x00, 0x00, 0x01, 0x62, 1153 0x00, 0x00, 0x02, 0x62 1154 }; 1155 static const uint16_t utf32Expected[]={ 1156 0x0061, 1157 0xfffd, /* 0x110000 out of range */ 1158 0xDBFF, /* 0x10FFFF in range */ 1159 0xDFFF, 1160 0x0062, 1161 0xfffd, /* 0xffffffff out of range */ 1162 0xfffd, /* 0x7fffffff out of range */ 1163 0x0162, 1164 0x0262 1165 }; 1166 static const int32_t utf32Offsets[]={ 1167 0, 4, 8, 8, 12, 16, 20, 24, 28 1168 }; 1169 static const uint8_t utf32ExpectedBack[]={ 1170 0x00, 0x00, 0x00, 0x61, 1171 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ 1172 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1173 0x00, 0x00, 0x00, 0x62, 1174 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ 1175 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ 1176 0x00, 0x00, 0x01, 0x62, 1177 0x00, 0x00, 0x02, 0x62 1178 }; 1179 static const int32_t utf32OffsetsBack[]={ 1180 0,0,0,0, 1181 1,1,1,1, 1182 2,2,2,2, 1183 4,4,4,4, 1184 5,5,5,5, 1185 6,6,6,6, 1186 7,7,7,7, 1187 8,8,8,8 1188 }; 1189 1190 testConvertToU(utf32, sizeof(utf32), 1191 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE); 1192 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1193 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE); 1194 } 1195 1196 /* Test UTF-32LE bad data handling*/ 1197 { 1198 static const uint8_t utf32[]={ 1199 0x61, 0x00, 0x00, 0x00, 1200 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 1201 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1202 0x62, 0x00, 0x00, 0x00, 1203 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1204 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 1205 0x62, 0x01, 0x00, 0x00, 1206 0x62, 0x02, 0x00, 0x00, 1207 }; 1208 1209 static const uint16_t utf32Expected[]={ 1210 0x0061, 1211 0xfffd, /* 0x110000 out of range */ 1212 0xDBFF, /* 0x10FFFF in range */ 1213 0xDFFF, 1214 0x0062, 1215 0xfffd, /* 0xffffffff out of range */ 1216 0xfffd, /* 0x7fffffff out of range */ 1217 0x0162, 1218 0x0262 1219 }; 1220 static const int32_t utf32Offsets[]={ 1221 0, 4, 8, 8, 12, 16, 20, 24, 28 1222 }; 1223 static const uint8_t utf32ExpectedBack[]={ 1224 0x61, 0x00, 0x00, 0x00, 1225 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ 1226 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1227 0x62, 0x00, 0x00, 0x00, 1228 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ 1229 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ 1230 0x62, 0x01, 0x00, 0x00, 1231 0x62, 0x02, 0x00, 0x00 1232 }; 1233 static const int32_t utf32OffsetsBack[]={ 1234 0,0,0,0, 1235 1,1,1,1, 1236 2,2,2,2, 1237 4,4,4,4, 1238 5,5,5,5, 1239 6,6,6,6, 1240 7,7,7,7, 1241 8,8,8,8 1242 }; 1243 testConvertToU(utf32, sizeof(utf32), 1244 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE ); 1245 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1246 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE); 1247 } 1248 } 1249 1250 static void TestCoverageMBCS(){ 1251 #if 0 1252 UErrorCode status = U_ZERO_ERROR; 1253 const char *directory = loadTestData(&status); 1254 char* tdpath = NULL; 1255 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); 1256 int len = strlen(directory); 1257 char* index=NULL; 1258 1259 tdpath = (char*) malloc(sizeof(char) * (len * 2)); 1260 uprv_strcpy(saveDirectory,u_getDataDirectory()); 1261 log_verbose("Retrieved data directory %s \n",saveDirectory); 1262 uprv_strcpy(tdpath,directory); 1263 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); 1264 1265 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ 1266 *(index+1)=0; 1267 } 1268 u_setDataDirectory(tdpath); 1269 log_verbose("ICU data directory is set to: %s \n" ,tdpath); 1270 #endif 1271 1272 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm 1273 which is test file for MBCS conversion with single-byte codepage data.*/ 1274 { 1275 1276 /* MBCS with single byte codepage data test1.ucm*/ 1277 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; 1278 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; 1279 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; 1280 1281 /*from Unicode*/ 1282 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1283 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); 1284 } 1285 1286 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm 1287 which is test file for MBCS conversion with three-byte codepage data.*/ 1288 { 1289 1290 /* MBCS with three byte codepage data test3.ucm*/ 1291 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1292 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; 1293 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; 1294 1295 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; 1296 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1297 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; 1298 1299 /*from Unicode*/ 1300 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1301 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); 1302 1303 /*to Unicode*/ 1304 testConvertToU(test3input, sizeof(test3input), 1305 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE); 1306 1307 } 1308 1309 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm 1310 which is test file for MBCS conversion with four-byte codepage data.*/ 1311 { 1312 1313 /* MBCS with three byte codepage data test4.ucm*/ 1314 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1315 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; 1316 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; 1317 1318 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; 1319 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1320 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; 1321 1322 /*from Unicode*/ 1323 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1324 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); 1325 1326 /*to Unicode*/ 1327 testConvertToU(test4input, sizeof(test4input), 1328 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE ); 1329 1330 } 1331 #if 0 1332 free(tdpath); 1333 /* restore the original data directory */ 1334 log_verbose("Setting the data directory to %s \n", saveDirectory); 1335 u_setDataDirectory(saveDirectory); 1336 free(saveDirectory); 1337 #endif 1338 1339 } 1340 1341 static void TestConverterType(const char *convName, UConverterType convType) { 1342 UConverter* myConverter; 1343 UErrorCode err = U_ZERO_ERROR; 1344 1345 myConverter = my_ucnv_open(convName, &err); 1346 1347 if (U_FAILURE(err)) { 1348 log_data_err("Failed to create an %s converter\n", convName); 1349 return; 1350 } 1351 else 1352 { 1353 if (ucnv_getType(myConverter)!=convType) { 1354 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", 1355 convName, convType); 1356 } 1357 else { 1358 log_verbose("ucnv_getType %s ok\n", convName); 1359 } 1360 } 1361 ucnv_close(myConverter); 1362 } 1363 1364 static void TestConverterTypesAndStarters() 1365 { 1366 #if !UCONFIG_NO_LEGACY_CONVERSION 1367 UConverter* myConverter; 1368 UErrorCode err = U_ZERO_ERROR; 1369 UBool mystarters[256]; 1370 1371 /* const UBool expectedKSCstarters[256] = { 1372 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1373 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1374 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1375 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1376 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1377 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1378 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1379 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1380 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1383 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1384 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1385 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1386 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1387 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1388 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1389 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, 1390 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1391 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1392 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1393 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1394 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1395 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1396 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1397 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ 1398 1399 1400 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); 1401 1402 myConverter = ucnv_open("ksc", &err); 1403 if (U_FAILURE(err)) { 1404 log_data_err("Failed to create an ibm-ksc converter\n"); 1405 return; 1406 } 1407 else 1408 { 1409 if (ucnv_getType(myConverter)!=UCNV_MBCS) 1410 log_err("ucnv_getType Failed for ibm-949\n"); 1411 else 1412 log_verbose("ucnv_getType ibm-949 ok\n"); 1413 1414 if(myConverter!=NULL) 1415 ucnv_getStarters(myConverter, mystarters, &err); 1416 1417 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) 1418 log_err("Failed ucnv_getStarters for ksc\n"); 1419 else 1420 log_verbose("ucnv_getStarters ok\n");*/ 1421 1422 } 1423 ucnv_close(myConverter); 1424 1425 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); 1426 TestConverterType("ibm-878", UCNV_SBCS); 1427 #endif 1428 1429 TestConverterType("iso-8859-1", UCNV_LATIN_1); 1430 1431 TestConverterType("ibm-1208", UCNV_UTF8); 1432 1433 TestConverterType("utf-8", UCNV_UTF8); 1434 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); 1435 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); 1436 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); 1437 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); 1438 1439 #if !UCONFIG_NO_LEGACY_CONVERSION 1440 1441 #if defined(U_ENABLE_GENERIC_ISO_2022) 1442 TestConverterType("iso-2022", UCNV_ISO_2022); 1443 #endif 1444 1445 TestConverterType("hz", UCNV_HZ); 1446 #endif 1447 1448 TestConverterType("scsu", UCNV_SCSU); 1449 1450 #if !UCONFIG_NO_LEGACY_CONVERSION 1451 TestConverterType("x-iscii-de", UCNV_ISCII); 1452 #endif 1453 1454 TestConverterType("ascii", UCNV_US_ASCII); 1455 TestConverterType("utf-7", UCNV_UTF7); 1456 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); 1457 TestConverterType("bocu-1", UCNV_BOCU1); 1458 } 1459 1460 static void 1461 TestAmbiguousConverter(UConverter *cnv) { 1462 static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; 1463 UChar outUnicode[20]={ 0, 0, 0, 0 }; 1464 1465 const char *s; 1466 UChar *u; 1467 UErrorCode errorCode; 1468 UBool isAmbiguous; 1469 1470 /* try to convert an 'a', a square bracket and a US-ASCII backslash */ 1471 errorCode=U_ZERO_ERROR; 1472 s=inBytes; 1473 u=outUnicode; 1474 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); 1475 if(U_FAILURE(errorCode)) { 1476 /* we do not care about general failures in this test; the input may just not be mappable */ 1477 return; 1478 } 1479 1480 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { 1481 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ 1482 /* There are some encodings that are partially ASCII based, 1483 like the ISO-7 and GSM series of codepages, which we ignore. */ 1484 return; 1485 } 1486 1487 isAmbiguous=ucnv_isAmbiguous(cnv); 1488 1489 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ 1490 if((outUnicode[2]!=0x5c)!=isAmbiguous) { 1491 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", 1492 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); 1493 return; 1494 } 1495 1496 if(outUnicode[2]!=0x5c) { 1497 /* needs fixup, fix it */ 1498 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); 1499 if(outUnicode[2]!=0x5c) { 1500 /* the fix failed */ 1501 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); 1502 return; 1503 } 1504 } 1505 } 1506 1507 static void TestAmbiguous() 1508 { 1509 UErrorCode status = U_ZERO_ERROR; 1510 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; 1511 static const char target[] = { 1512 /* "\\usr\\local\\share\\data\\icutest.txt" */ 1513 0x5c, 0x75, 0x73, 0x72, 1514 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 1515 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, 1516 0x5c, 0x64, 0x61, 0x74, 0x61, 1517 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, 1518 0 1519 }; 1520 UChar asciiResult[200], sjisResult[200]; 1521 int32_t /*asciiLength = 0,*/ sjisLength = 0, i; 1522 const char *name; 1523 1524 /* enumerate all converters */ 1525 status=U_ZERO_ERROR; 1526 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { 1527 cnv=ucnv_open(name, &status); 1528 if(U_SUCCESS(status)) { 1529 /* BEGIN android-changed 1530 To save space, Android does not build full ISO2022 CJK tables. 1531 We skip the tests for ISO-2022. */ 1532 const char* cnvName = ucnv_getName(cnv, &status); 1533 if (strlen(cnvName) < 8 || 1534 strncmp(cnvName, "ISO_2022", 8) != 0) { 1535 TestAmbiguousConverter(cnv); 1536 } 1537 /* END android-changed */ 1538 ucnv_close(cnv); 1539 } else { 1540 log_err("error: unable to open available converter \"%s\"\n", name); 1541 status=U_ZERO_ERROR; 1542 } 1543 } 1544 1545 #if !UCONFIG_NO_LEGACY_CONVERSION 1546 sjis_cnv = ucnv_open("ibm-943", &status); 1547 if (U_FAILURE(status)) 1548 { 1549 log_data_err("Failed to create a SJIS converter\n"); 1550 return; 1551 } 1552 ascii_cnv = ucnv_open("LATIN-1", &status); 1553 if (U_FAILURE(status)) 1554 { 1555 log_data_err("Failed to create a LATIN-1 converter\n"); 1556 ucnv_close(sjis_cnv); 1557 return; 1558 } 1559 /* convert target from SJIS to Unicode */ 1560 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1561 if (U_FAILURE(status)) 1562 { 1563 log_err("Failed to convert the SJIS string.\n"); 1564 ucnv_close(sjis_cnv); 1565 ucnv_close(ascii_cnv); 1566 return; 1567 } 1568 /* convert target from Latin-1 to Unicode */ 1569 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1570 if (U_FAILURE(status)) 1571 { 1572 log_err("Failed to convert the Latin-1 string.\n"); 1573 ucnv_close(sjis_cnv); 1574 ucnv_close(ascii_cnv); 1575 return; 1576 } 1577 if (!ucnv_isAmbiguous(sjis_cnv)) 1578 { 1579 log_err("SJIS converter should contain ambiguous character mappings.\n"); 1580 ucnv_close(sjis_cnv); 1581 ucnv_close(ascii_cnv); 1582 return; 1583 } 1584 if (u_strcmp(sjisResult, asciiResult) == 0) 1585 { 1586 log_err("File separators for SJIS don't need to be fixed.\n"); 1587 } 1588 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); 1589 if (u_strcmp(sjisResult, asciiResult) != 0) 1590 { 1591 log_err("Fixing file separator for SJIS failed.\n"); 1592 } 1593 ucnv_close(sjis_cnv); 1594 ucnv_close(ascii_cnv); 1595 #endif 1596 } 1597 1598 static void 1599 TestSignatureDetection(){ 1600 /* with null terminated strings */ 1601 { 1602 static const char* data[] = { 1603 "\xFE\xFF\x00\x00", /* UTF-16BE */ 1604 "\xFF\xFE\x00\x00", /* UTF-16LE */ 1605 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1606 "\x0E\xFE\xFF\x00", /* SCSU */ 1607 1608 "\xFE\xFF", /* UTF-16BE */ 1609 "\xFF\xFE", /* UTF-16LE */ 1610 "\xEF\xBB\xBF", /* UTF-8 */ 1611 "\x0E\xFE\xFF", /* SCSU */ 1612 1613 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1614 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1615 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1616 "\x0E\xFE\xFF\x41", /* SCSU */ 1617 1618 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ 1619 "\x2B\x2F\x76\x38\x41", /* UTF-7 */ 1620 "\x2B\x2F\x76\x39\x41", /* UTF-7 */ 1621 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ 1622 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ 1623 1624 "\xDD\x73\x66\x73" /* UTF-EBCDIC */ 1625 }; 1626 static const char* expected[] = { 1627 "UTF-16BE", 1628 "UTF-16LE", 1629 "UTF-8", 1630 "SCSU", 1631 1632 "UTF-16BE", 1633 "UTF-16LE", 1634 "UTF-8", 1635 "SCSU", 1636 1637 "UTF-16BE", 1638 "UTF-16LE", 1639 "UTF-8", 1640 "SCSU", 1641 1642 "UTF-7", 1643 "UTF-7", 1644 "UTF-7", 1645 "UTF-7", 1646 "UTF-7", 1647 "UTF-EBCDIC" 1648 }; 1649 static const int32_t expectedLength[] ={ 1650 2, 1651 2, 1652 3, 1653 3, 1654 1655 2, 1656 2, 1657 3, 1658 3, 1659 1660 2, 1661 2, 1662 3, 1663 3, 1664 1665 5, 1666 4, 1667 4, 1668 4, 1669 4, 1670 4 1671 }; 1672 int i=0; 1673 UErrorCode err; 1674 int32_t signatureLength = -1; 1675 const char* source = NULL; 1676 const char* enc = NULL; 1677 for( ; i<sizeof(data)/sizeof(char*); i++){ 1678 err = U_ZERO_ERROR; 1679 source = data[i]; 1680 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err); 1681 if(U_FAILURE(err)){ 1682 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1683 continue; 1684 } 1685 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1686 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1687 continue; 1688 } 1689 if(signatureLength != expectedLength[i]){ 1690 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1691 } 1692 } 1693 } 1694 { 1695 static const char* data[] = { 1696 "\xFE\xFF\x00", /* UTF-16BE */ 1697 "\xFF\xFE\x00", /* UTF-16LE */ 1698 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1699 "\x0E\xFE\xFF\x00", /* SCSU */ 1700 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1701 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1702 "\xFE\xFF", /* UTF-16BE */ 1703 "\xFF\xFE", /* UTF-16LE */ 1704 "\xEF\xBB\xBF", /* UTF-8 */ 1705 "\x0E\xFE\xFF", /* SCSU */ 1706 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1707 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1708 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1709 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1710 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1711 "\x0E\xFE\xFF\x41", /* SCSU */ 1712 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ 1713 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ 1714 "\xFB\xEE\x28", /* BOCU-1 */ 1715 "\xFF\x41\x42" /* NULL */ 1716 }; 1717 static const int len[] = { 1718 3, 1719 3, 1720 4, 1721 4, 1722 4, 1723 4, 1724 2, 1725 2, 1726 3, 1727 3, 1728 4, 1729 4, 1730 4, 1731 4, 1732 4, 1733 4, 1734 5, 1735 5, 1736 3, 1737 3 1738 }; 1739 1740 static const char* expected[] = { 1741 "UTF-16BE", 1742 "UTF-16LE", 1743 "UTF-8", 1744 "SCSU", 1745 "UTF-32BE", 1746 "UTF-32LE", 1747 "UTF-16BE", 1748 "UTF-16LE", 1749 "UTF-8", 1750 "SCSU", 1751 "UTF-32BE", 1752 "UTF-32LE", 1753 "UTF-16BE", 1754 "UTF-16LE", 1755 "UTF-8", 1756 "SCSU", 1757 "UTF-32BE", 1758 "UTF-32LE", 1759 "BOCU-1", 1760 NULL 1761 }; 1762 static const int32_t expectedLength[] ={ 1763 2, 1764 2, 1765 3, 1766 3, 1767 4, 1768 4, 1769 2, 1770 2, 1771 3, 1772 3, 1773 4, 1774 4, 1775 2, 1776 2, 1777 3, 1778 3, 1779 4, 1780 4, 1781 3, 1782 0 1783 }; 1784 int i=0; 1785 UErrorCode err; 1786 int32_t signatureLength = -1; 1787 int32_t sourceLength=-1; 1788 const char* source = NULL; 1789 const char* enc = NULL; 1790 for( ; i<sizeof(data)/sizeof(char*); i++){ 1791 err = U_ZERO_ERROR; 1792 source = data[i]; 1793 sourceLength = len[i]; 1794 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err); 1795 if(U_FAILURE(err)){ 1796 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1797 continue; 1798 } 1799 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1800 if(expected[i] !=NULL){ 1801 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1802 continue; 1803 } 1804 } 1805 if(signatureLength != expectedLength[i]){ 1806 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1807 } 1808 } 1809 } 1810 } 1811 1812 static void TestUTF7() { 1813 /* test input */ 1814 static const uint8_t in[]={ 1815 /* H - +Jjo- - ! +- +2AHcAQ */ 1816 0x48, 1817 0x2d, 1818 0x2b, 0x4a, 0x6a, 0x6f, 1819 0x2d, 0x2d, 1820 0x21, 1821 0x2b, 0x2d, 1822 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 1823 }; 1824 1825 /* expected test results */ 1826 static const int32_t results[]={ 1827 /* number of bytes read, code point */ 1828 1, 0x48, 1829 1, 0x2d, 1830 4, 0x263a, /* <WHITE SMILING FACE> */ 1831 2, 0x2d, 1832 1, 0x21, 1833 2, 0x2b, 1834 7, 0x10401 1835 }; 1836 1837 const char *cnvName; 1838 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1839 UErrorCode errorCode=U_ZERO_ERROR; 1840 UConverter *cnv=ucnv_open("UTF-7", &errorCode); 1841 if(U_FAILURE(errorCode)) { 1842 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1843 return; 1844 } 1845 TestNextUChar(cnv, source, limit, results, "UTF-7"); 1846 /* Test the condition when source >= sourceLimit */ 1847 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1848 cnvName = ucnv_getName(cnv, &errorCode); 1849 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { 1850 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1851 } 1852 ucnv_close(cnv); 1853 } 1854 1855 static void TestIMAP() { 1856 /* test input */ 1857 static const uint8_t in[]={ 1858 /* H - &Jjo- - ! &- &2AHcAQ- \ */ 1859 0x48, 1860 0x2d, 1861 0x26, 0x4a, 0x6a, 0x6f, 1862 0x2d, 0x2d, 1863 0x21, 1864 0x26, 0x2d, 1865 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d 1866 }; 1867 1868 /* expected test results */ 1869 static const int32_t results[]={ 1870 /* number of bytes read, code point */ 1871 1, 0x48, 1872 1, 0x2d, 1873 4, 0x263a, /* <WHITE SMILING FACE> */ 1874 2, 0x2d, 1875 1, 0x21, 1876 2, 0x26, 1877 7, 0x10401 1878 }; 1879 1880 const char *cnvName; 1881 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1882 UErrorCode errorCode=U_ZERO_ERROR; 1883 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); 1884 if(U_FAILURE(errorCode)) { 1885 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1886 return; 1887 } 1888 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); 1889 /* Test the condition when source >= sourceLimit */ 1890 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1891 cnvName = ucnv_getName(cnv, &errorCode); 1892 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { 1893 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1894 } 1895 ucnv_close(cnv); 1896 } 1897 1898 static void TestUTF8() { 1899 /* test input */ 1900 static const uint8_t in[]={ 1901 0x61, 1902 0xc2, 0x80, 1903 0xe0, 0xa0, 0x80, 1904 0xf0, 0x90, 0x80, 0x80, 1905 0xf4, 0x84, 0x8c, 0xa1, 1906 0xf0, 0x90, 0x90, 0x81 1907 }; 1908 1909 /* expected test results */ 1910 static const int32_t results[]={ 1911 /* number of bytes read, code point */ 1912 1, 0x61, 1913 2, 0x80, 1914 3, 0x800, 1915 4, 0x10000, 1916 4, 0x104321, 1917 4, 0x10401 1918 }; 1919 1920 /* error test input */ 1921 static const uint8_t in2[]={ 1922 0x61, 1923 0xc0, 0x80, /* illegal non-shortest form */ 1924 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1925 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1926 0xc0, 0xc0, /* illegal trail byte */ 1927 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1928 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1929 0xfe, /* illegal byte altogether */ 1930 0x62 1931 }; 1932 1933 /* expected error test results */ 1934 static const int32_t results2[]={ 1935 /* number of bytes read, code point */ 1936 1, 0x61, 1937 22, 0x62 1938 }; 1939 1940 UConverterToUCallback cb; 1941 const void *p; 1942 1943 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 1944 UErrorCode errorCode=U_ZERO_ERROR; 1945 UConverter *cnv=ucnv_open("UTF-8", &errorCode); 1946 if(U_FAILURE(errorCode)) { 1947 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); 1948 return; 1949 } 1950 TestNextUChar(cnv, source, limit, results, "UTF-8"); 1951 /* Test the condition when source >= sourceLimit */ 1952 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1953 1954 /* test error behavior with a skip callback */ 1955 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 1956 source=(const char *)in2; 1957 limit=(const char *)(in2+sizeof(in2)); 1958 TestNextUChar(cnv, source, limit, results2, "UTF-8"); 1959 1960 ucnv_close(cnv); 1961 } 1962 1963 static void TestCESU8() { 1964 /* test input */ 1965 static const uint8_t in[]={ 1966 0x61, 1967 0xc2, 0x80, 1968 0xe0, 0xa0, 0x80, 1969 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 1970 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, 1971 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1972 0xef, 0xbf, 0xbc 1973 }; 1974 1975 /* expected test results */ 1976 static const int32_t results[]={ 1977 /* number of bytes read, code point */ 1978 1, 0x61, 1979 2, 0x80, 1980 3, 0x800, 1981 6, 0x10000, 1982 3, 0xdc01, 1983 -1,0xd802, /* may read 3 or 6 bytes */ 1984 -1,0x10ffff,/* may read 0 or 3 bytes */ 1985 3, 0xfffc 1986 }; 1987 1988 /* error test input */ 1989 static const uint8_t in2[]={ 1990 0x61, 1991 0xc0, 0x80, /* illegal non-shortest form */ 1992 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1993 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1994 0xc0, 0xc0, /* illegal trail byte */ 1995 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ 1996 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ 1997 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ 1998 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1999 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 2000 0xfe, /* illegal byte altogether */ 2001 0x62 2002 }; 2003 2004 /* expected error test results */ 2005 static const int32_t results2[]={ 2006 /* number of bytes read, code point */ 2007 1, 0x61, 2008 34, 0x62 2009 }; 2010 2011 UConverterToUCallback cb; 2012 const void *p; 2013 2014 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 2015 UErrorCode errorCode=U_ZERO_ERROR; 2016 UConverter *cnv=ucnv_open("CESU-8", &errorCode); 2017 if(U_FAILURE(errorCode)) { 2018 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); 2019 return; 2020 } 2021 TestNextUChar(cnv, source, limit, results, "CESU-8"); 2022 /* Test the condition when source >= sourceLimit */ 2023 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2024 2025 /* test error behavior with a skip callback */ 2026 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2027 source=(const char *)in2; 2028 limit=(const char *)(in2+sizeof(in2)); 2029 TestNextUChar(cnv, source, limit, results2, "CESU-8"); 2030 2031 ucnv_close(cnv); 2032 } 2033 2034 static void TestUTF16() { 2035 /* test input */ 2036 static const uint8_t in1[]={ 2037 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff 2038 }; 2039 static const uint8_t in2[]={ 2040 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff 2041 }; 2042 static const uint8_t in3[]={ 2043 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 2044 }; 2045 2046 /* expected test results */ 2047 static const int32_t results1[]={ 2048 /* number of bytes read, code point */ 2049 4, 0x4e00, 2050 2, 0xfeff 2051 }; 2052 static const int32_t results2[]={ 2053 /* number of bytes read, code point */ 2054 4, 0x004e, 2055 2, 0xfffe 2056 }; 2057 static const int32_t results3[]={ 2058 /* number of bytes read, code point */ 2059 2, 0xfefe, 2060 2, 0x4e00, 2061 2, 0xfeff, 2062 4, 0x20001 2063 }; 2064 2065 const char *source, *limit; 2066 2067 UErrorCode errorCode=U_ZERO_ERROR; 2068 UConverter *cnv=ucnv_open("UTF-16", &errorCode); 2069 if(U_FAILURE(errorCode)) { 2070 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); 2071 return; 2072 } 2073 2074 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2075 TestNextUChar(cnv, source, limit, results1, "UTF-16"); 2076 2077 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2078 ucnv_resetToUnicode(cnv); 2079 TestNextUChar(cnv, source, limit, results2, "UTF-16"); 2080 2081 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2082 ucnv_resetToUnicode(cnv); 2083 TestNextUChar(cnv, source, limit, results3, "UTF-16"); 2084 2085 /* Test the condition when source >= sourceLimit */ 2086 ucnv_resetToUnicode(cnv); 2087 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2088 2089 ucnv_close(cnv); 2090 } 2091 2092 static void TestUTF16BE() { 2093 /* test input */ 2094 static const uint8_t in[]={ 2095 0x00, 0x61, 2096 0x00, 0xc0, 2097 0x00, 0x31, 2098 0x00, 0xf4, 2099 0xce, 0xfe, 2100 0xd8, 0x01, 0xdc, 0x01 2101 }; 2102 2103 /* expected test results */ 2104 static const int32_t results[]={ 2105 /* number of bytes read, code point */ 2106 2, 0x61, 2107 2, 0xc0, 2108 2, 0x31, 2109 2, 0xf4, 2110 2, 0xcefe, 2111 4, 0x10401 2112 }; 2113 2114 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2115 UErrorCode errorCode=U_ZERO_ERROR; 2116 UConverter *cnv=ucnv_open("utf-16be", &errorCode); 2117 if(U_FAILURE(errorCode)) { 2118 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); 2119 return; 2120 } 2121 TestNextUChar(cnv, source, limit, results, "UTF-16BE"); 2122 /* Test the condition when source >= sourceLimit */ 2123 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2124 /*Test for the condition where there is an invalid character*/ 2125 { 2126 static const uint8_t source2[]={0x61}; 2127 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2128 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2129 } 2130 #if 0 2131 /* 2132 * Test disabled because currently the UTF-16BE/LE converters are supposed 2133 * to not set errors for unpaired surrogates. 2134 * This may change with 2135 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2136 */ 2137 2138 /*Test for the condition where there is a surrogate pair*/ 2139 { 2140 const uint8_t source2[]={0xd8, 0x01}; 2141 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2142 } 2143 #endif 2144 ucnv_close(cnv); 2145 } 2146 2147 static void 2148 TestUTF16LE() { 2149 /* test input */ 2150 static const uint8_t in[]={ 2151 0x61, 0x00, 2152 0x31, 0x00, 2153 0x4e, 0x2e, 2154 0x4e, 0x00, 2155 0x01, 0xd8, 0x01, 0xdc 2156 }; 2157 2158 /* expected test results */ 2159 static const int32_t results[]={ 2160 /* number of bytes read, code point */ 2161 2, 0x61, 2162 2, 0x31, 2163 2, 0x2e4e, 2164 2, 0x4e, 2165 4, 0x10401 2166 }; 2167 2168 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2169 UErrorCode errorCode=U_ZERO_ERROR; 2170 UConverter *cnv=ucnv_open("utf-16le", &errorCode); 2171 if(U_FAILURE(errorCode)) { 2172 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); 2173 return; 2174 } 2175 TestNextUChar(cnv, source, limit, results, "UTF-16LE"); 2176 /* Test the condition when source >= sourceLimit */ 2177 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2178 /*Test for the condition where there is an invalid character*/ 2179 { 2180 static const uint8_t source2[]={0x61}; 2181 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2182 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2183 } 2184 #if 0 2185 /* 2186 * Test disabled because currently the UTF-16BE/LE converters are supposed 2187 * to not set errors for unpaired surrogates. 2188 * This may change with 2189 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2190 */ 2191 2192 /*Test for the condition where there is a surrogate character*/ 2193 { 2194 static const uint8_t source2[]={0x01, 0xd8}; 2195 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2196 } 2197 #endif 2198 2199 ucnv_close(cnv); 2200 } 2201 2202 static void TestUTF32() { 2203 /* test input */ 2204 static const uint8_t in1[]={ 2205 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff 2206 }; 2207 static const uint8_t in2[]={ 2208 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 2209 }; 2210 static const uint8_t in3[]={ 2211 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 2212 }; 2213 2214 /* expected test results */ 2215 static const int32_t results1[]={ 2216 /* number of bytes read, code point */ 2217 8, 0x100f00, 2218 4, 0xfeff 2219 }; 2220 static const int32_t results2[]={ 2221 /* number of bytes read, code point */ 2222 8, 0x0f1000, 2223 4, 0xfffe 2224 }; 2225 static const int32_t results3[]={ 2226 /* number of bytes read, code point */ 2227 4, 0xfefe, 2228 4, 0x100f00, 2229 4, 0xfffd, /* unmatched surrogate */ 2230 4, 0xfffd /* unmatched surrogate */ 2231 }; 2232 2233 const char *source, *limit; 2234 2235 UErrorCode errorCode=U_ZERO_ERROR; 2236 UConverter *cnv=ucnv_open("UTF-32", &errorCode); 2237 if(U_FAILURE(errorCode)) { 2238 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); 2239 return; 2240 } 2241 2242 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2243 TestNextUChar(cnv, source, limit, results1, "UTF-32"); 2244 2245 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2246 ucnv_resetToUnicode(cnv); 2247 TestNextUChar(cnv, source, limit, results2, "UTF-32"); 2248 2249 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2250 ucnv_resetToUnicode(cnv); 2251 TestNextUChar(cnv, source, limit, results3, "UTF-32"); 2252 2253 /* Test the condition when source >= sourceLimit */ 2254 ucnv_resetToUnicode(cnv); 2255 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2256 2257 ucnv_close(cnv); 2258 } 2259 2260 static void 2261 TestUTF32BE() { 2262 /* test input */ 2263 static const uint8_t in[]={ 2264 0x00, 0x00, 0x00, 0x61, 2265 0x00, 0x00, 0x30, 0x61, 2266 0x00, 0x00, 0xdc, 0x00, 2267 0x00, 0x00, 0xd8, 0x00, 2268 0x00, 0x00, 0xdf, 0xff, 2269 0x00, 0x00, 0xff, 0xfe, 2270 0x00, 0x10, 0xab, 0xcd, 2271 0x00, 0x10, 0xff, 0xff 2272 }; 2273 2274 /* expected test results */ 2275 static const int32_t results[]={ 2276 /* number of bytes read, code point */ 2277 4, 0x61, 2278 4, 0x3061, 2279 4, 0xfffd, 2280 4, 0xfffd, 2281 4, 0xfffd, 2282 4, 0xfffe, 2283 4, 0x10abcd, 2284 4, 0x10ffff 2285 }; 2286 2287 /* error test input */ 2288 static const uint8_t in2[]={ 2289 0x00, 0x00, 0x00, 0x61, 2290 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 2291 0x00, 0x00, 0x00, 0x62, 2292 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2293 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 2294 0x00, 0x00, 0x01, 0x62, 2295 0x00, 0x00, 0x02, 0x62 2296 }; 2297 2298 /* expected error test results */ 2299 static const int32_t results2[]={ 2300 /* number of bytes read, code point */ 2301 4, 0x61, 2302 8, 0x62, 2303 12, 0x162, 2304 4, 0x262 2305 }; 2306 2307 UConverterToUCallback cb; 2308 const void *p; 2309 2310 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2311 UErrorCode errorCode=U_ZERO_ERROR; 2312 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); 2313 if(U_FAILURE(errorCode)) { 2314 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); 2315 return; 2316 } 2317 TestNextUChar(cnv, source, limit, results, "UTF-32BE"); 2318 2319 /* Test the condition when source >= sourceLimit */ 2320 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2321 2322 /* test error behavior with a skip callback */ 2323 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2324 source=(const char *)in2; 2325 limit=(const char *)(in2+sizeof(in2)); 2326 TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); 2327 2328 ucnv_close(cnv); 2329 } 2330 2331 static void 2332 TestUTF32LE() { 2333 /* test input */ 2334 static const uint8_t in[]={ 2335 0x61, 0x00, 0x00, 0x00, 2336 0x61, 0x30, 0x00, 0x00, 2337 0x00, 0xdc, 0x00, 0x00, 2338 0x00, 0xd8, 0x00, 0x00, 2339 0xff, 0xdf, 0x00, 0x00, 2340 0xfe, 0xff, 0x00, 0x00, 2341 0xcd, 0xab, 0x10, 0x00, 2342 0xff, 0xff, 0x10, 0x00 2343 }; 2344 2345 /* expected test results */ 2346 static const int32_t results[]={ 2347 /* number of bytes read, code point */ 2348 4, 0x61, 2349 4, 0x3061, 2350 4, 0xfffd, 2351 4, 0xfffd, 2352 4, 0xfffd, 2353 4, 0xfffe, 2354 4, 0x10abcd, 2355 4, 0x10ffff 2356 }; 2357 2358 /* error test input */ 2359 static const uint8_t in2[]={ 2360 0x61, 0x00, 0x00, 0x00, 2361 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 2362 0x62, 0x00, 0x00, 0x00, 2363 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2364 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 2365 0x62, 0x01, 0x00, 0x00, 2366 0x62, 0x02, 0x00, 0x00, 2367 }; 2368 2369 /* expected error test results */ 2370 static const int32_t results2[]={ 2371 /* number of bytes read, code point */ 2372 4, 0x61, 2373 8, 0x62, 2374 12, 0x162, 2375 4, 0x262, 2376 }; 2377 2378 UConverterToUCallback cb; 2379 const void *p; 2380 2381 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2382 UErrorCode errorCode=U_ZERO_ERROR; 2383 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); 2384 if(U_FAILURE(errorCode)) { 2385 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); 2386 return; 2387 } 2388 TestNextUChar(cnv, source, limit, results, "UTF-32LE"); 2389 2390 /* Test the condition when source >= sourceLimit */ 2391 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2392 2393 /* test error behavior with a skip callback */ 2394 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2395 source=(const char *)in2; 2396 limit=(const char *)(in2+sizeof(in2)); 2397 TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); 2398 2399 ucnv_close(cnv); 2400 } 2401 2402 static void 2403 TestLATIN1() { 2404 /* test input */ 2405 static const uint8_t in[]={ 2406 0x61, 2407 0x31, 2408 0x32, 2409 0xc0, 2410 0xf0, 2411 0xf4, 2412 }; 2413 2414 /* expected test results */ 2415 static const int32_t results[]={ 2416 /* number of bytes read, code point */ 2417 1, 0x61, 2418 1, 0x31, 2419 1, 0x32, 2420 1, 0xc0, 2421 1, 0xf0, 2422 1, 0xf4, 2423 }; 2424 static const uint16_t in1[] = { 2425 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2426 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2427 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2428 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2429 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2430 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2431 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2432 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2433 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2434 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2435 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2436 0xcb, 0x82 2437 }; 2438 static const uint8_t out1[] = { 2439 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2440 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2441 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2442 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2443 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2444 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2445 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2446 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2447 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2448 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2449 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2450 0xcb, 0x82 2451 }; 2452 static const uint16_t in2[]={ 2453 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2454 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2455 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2456 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2457 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2458 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2459 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2460 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2461 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2462 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2463 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2464 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2465 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2466 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2467 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2468 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2469 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2470 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2471 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2472 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2473 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2474 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2475 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2476 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2477 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2478 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2479 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2480 0x37, 0x20, 0x2A, 0x2F, 2481 }; 2482 static const unsigned char out2[]={ 2483 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2484 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2485 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2486 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2487 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2488 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2489 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2490 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2491 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2492 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2493 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2494 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2495 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2496 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2497 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2498 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2499 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2500 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2501 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2502 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2503 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2504 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2505 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2506 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2507 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2508 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2509 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2510 0x37, 0x20, 0x2A, 0x2F, 2511 }; 2512 const char *source=(const char *)in; 2513 const char *limit=(const char *)in+sizeof(in); 2514 2515 UErrorCode errorCode=U_ZERO_ERROR; 2516 UConverter *cnv=ucnv_open("LATIN_1", &errorCode); 2517 if(U_FAILURE(errorCode)) { 2518 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode)); 2519 return; 2520 } 2521 TestNextUChar(cnv, source, limit, results, "LATIN_1"); 2522 /* Test the condition when source >= sourceLimit */ 2523 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2524 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1)); 2525 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2)); 2526 2527 ucnv_close(cnv); 2528 } 2529 2530 static void 2531 TestSBCS() { 2532 /* test input */ 2533 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; 2534 /* expected test results */ 2535 static const int32_t results[]={ 2536 /* number of bytes read, code point */ 2537 1, 0x61, 2538 1, 0xbf, 2539 1, 0xc4, 2540 1, 0x2021, 2541 1, 0xf8ff, 2542 1, 0x00d9 2543 }; 2544 2545 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2546 UErrorCode errorCode=U_ZERO_ERROR; 2547 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); 2548 if(U_FAILURE(errorCode)) { 2549 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode)); 2550 return; 2551 } 2552 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); 2553 /* Test the condition when source >= sourceLimit */ 2554 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2555 /*Test for Illegal character */ /* 2556 { 2557 static const uint8_t input1[]={ 0xA1 }; 2558 const char* illegalsource=(const char*)input1; 2559 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte"); 2560 } 2561 */ 2562 ucnv_close(cnv); 2563 } 2564 2565 static void 2566 TestDBCS() { 2567 /* test input */ 2568 static const uint8_t in[]={ 2569 0x44, 0x6a, 2570 0xc4, 0x9c, 2571 0x7a, 0x74, 2572 0x46, 0xab, 2573 0x42, 0x5b, 2574 2575 }; 2576 2577 /* expected test results */ 2578 static const int32_t results[]={ 2579 /* number of bytes read, code point */ 2580 2, 0x00a7, 2581 2, 0xe1d2, 2582 2, 0x6962, 2583 2, 0xf842, 2584 2, 0xffe5, 2585 }; 2586 2587 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2588 UErrorCode errorCode=U_ZERO_ERROR; 2589 2590 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); 2591 if(U_FAILURE(errorCode)) { 2592 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode)); 2593 return; 2594 } 2595 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); 2596 /* Test the condition when source >= sourceLimit */ 2597 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2598 /*Test for the condition where there is an invalid character*/ 2599 { 2600 static const uint8_t source2[]={0x1a, 0x1b}; 2601 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2602 } 2603 /*Test for the condition where we have a truncated char*/ 2604 { 2605 static const uint8_t source1[]={0xc4}; 2606 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2607 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2608 } 2609 ucnv_close(cnv); 2610 } 2611 2612 static void 2613 TestMBCS() { 2614 /* test input */ 2615 static const uint8_t in[]={ 2616 0x01, 2617 0xa6, 0xa3, 2618 0x00, 2619 0xa6, 0xa1, 2620 0x08, 2621 0xc2, 0x76, 2622 0xc2, 0x78, 2623 2624 }; 2625 2626 /* expected test results */ 2627 static const int32_t results[]={ 2628 /* number of bytes read, code point */ 2629 1, 0x0001, 2630 2, 0x250c, 2631 1, 0x0000, 2632 2, 0x2500, 2633 1, 0x0008, 2634 2, 0xd60c, 2635 2, 0xd60e, 2636 }; 2637 2638 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2639 UErrorCode errorCode=U_ZERO_ERROR; 2640 2641 UConverter *cnv=ucnv_open("ibm-1363", &errorCode); 2642 if(U_FAILURE(errorCode)) { 2643 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode)); 2644 return; 2645 } 2646 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); 2647 /* Test the condition when source >= sourceLimit */ 2648 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2649 /*Test for the condition where there is an invalid character*/ 2650 { 2651 static const uint8_t source2[]={0xa1, 0x80}; 2652 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2653 } 2654 /*Test for the condition where we have a truncated char*/ 2655 { 2656 static const uint8_t source1[]={0xc4}; 2657 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2658 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2659 } 2660 ucnv_close(cnv); 2661 2662 } 2663 2664 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 2665 static void 2666 TestICCRunout() { 2667 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */ 2668 2669 const char *cnvName = "ibm-1363"; 2670 UErrorCode status = U_ZERO_ERROR; 2671 const uint8_t sourceData[] = { 0xa2, 0xae, 0xa2 }; 2672 UChar expectUData[] = { 0x00a1, 0x001a }; 2673 const uint8_t *source = sourceData; 2674 const uint8_t *sourceLim = sourceData+sizeof(sourceData); 2675 UChar targetBuf[256]; 2676 UChar *target = targetBuf; 2677 UChar *targetLim = target+256; 2678 UChar c1, c2, c3; 2679 UConverter *cnv=ucnv_open(cnvName, &status); 2680 if(U_FAILURE(status)) { 2681 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status)); 2682 return; 2683 } 2684 2685 #if 0 2686 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status); 2687 2688 log_info("After convert: target@%d, source@%d, status%s\n", 2689 target-targetBuf, source-sourceData, u_errorName(status)); 2690 2691 if(U_FAILURE(status)) { 2692 log_err("Failed to convert: %s\n", u_errorName(status)); 2693 } else { 2694 2695 } 2696 #endif 2697 2698 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2699 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status)); 2700 2701 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2702 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status)); 2703 2704 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2705 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status)); 2706 2707 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { 2708 log_verbose("OK\n"); 2709 } else { 2710 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n"); 2711 } 2712 2713 ucnv_close(cnv); 2714 2715 } 2716 #endif 2717 2718 #ifdef U_ENABLE_GENERIC_ISO_2022 2719 2720 static void 2721 TestISO_2022() { 2722 /* test input */ 2723 static const uint8_t in[]={ 2724 0x1b, 0x25, 0x42, 2725 0x31, 2726 0x32, 2727 0x61, 2728 0xc2, 0x80, 2729 0xe0, 0xa0, 0x80, 2730 0xf0, 0x90, 0x80, 0x80 2731 }; 2732 2733 2734 2735 /* expected test results */ 2736 static const int32_t results[]={ 2737 /* number of bytes read, code point */ 2738 4, 0x0031, /* 4 bytes including the escape sequence */ 2739 1, 0x0032, 2740 1, 0x61, 2741 2, 0x80, 2742 3, 0x800, 2743 4, 0x10000 2744 }; 2745 2746 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2747 UErrorCode errorCode=U_ZERO_ERROR; 2748 UConverter *cnv; 2749 2750 cnv=ucnv_open("ISO_2022", &errorCode); 2751 if(U_FAILURE(errorCode)) { 2752 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 2753 return; 2754 } 2755 TestNextUChar(cnv, source, limit, results, "ISO_2022"); 2756 2757 /* Test the condition when source >= sourceLimit */ 2758 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source"); 2759 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2760 /*Test for the condition where we have a truncated char*/ 2761 { 2762 static const uint8_t source1[]={0xc4}; 2763 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2764 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2765 } 2766 /*Test for the condition where there is an invalid character*/ 2767 { 2768 static const uint8_t source2[]={0xa1, 0x01}; 2769 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); 2770 } 2771 ucnv_close(cnv); 2772 } 2773 2774 #endif 2775 2776 static void 2777 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2778 const UChar* uSource; 2779 const UChar* uSourceLimit; 2780 const char* cSource; 2781 const char* cSourceLimit; 2782 UChar *uTargetLimit =NULL; 2783 UChar *uTarget; 2784 char *cTarget; 2785 const char *cTargetLimit; 2786 char *cBuf; 2787 UChar *uBuf; /*,*test;*/ 2788 int32_t uBufSize = 120; 2789 int len=0; 2790 int i=2; 2791 UErrorCode errorCode=U_ZERO_ERROR; 2792 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2793 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2794 ucnv_reset(cnv); 2795 for(;--i>0; ){ 2796 uSource = (UChar*) source; 2797 uSourceLimit=(const UChar*)sourceLimit; 2798 cTarget = cBuf; 2799 uTarget = uBuf; 2800 cSource = cBuf; 2801 cTargetLimit = cBuf; 2802 uTargetLimit = uBuf; 2803 2804 do{ 2805 2806 cTargetLimit = cTargetLimit+ i; 2807 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2808 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2809 errorCode=U_ZERO_ERROR; 2810 continue; 2811 } 2812 2813 if(U_FAILURE(errorCode)){ 2814 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2815 return; 2816 } 2817 2818 }while (uSource<uSourceLimit); 2819 2820 cSourceLimit =cTarget; 2821 do{ 2822 uTargetLimit=uTargetLimit+i; 2823 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2824 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2825 errorCode=U_ZERO_ERROR; 2826 continue; 2827 } 2828 if(U_FAILURE(errorCode)){ 2829 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2830 return; 2831 } 2832 }while(cSource<cSourceLimit); 2833 2834 uSource = source; 2835 /*test =uBuf;*/ 2836 for(len=0;len<(int)(source - sourceLimit);len++){ 2837 if(uBuf[len]!=uSource[len]){ 2838 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2839 } 2840 } 2841 } 2842 free(uBuf); 2843 free(cBuf); 2844 } 2845 /* Test for Jitterbug 778 */ 2846 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2847 const UChar* uSource; 2848 const UChar* uSourceLimit; 2849 const char* cSource; 2850 UChar *uTargetLimit =NULL; 2851 UChar *uTarget; 2852 char *cTarget; 2853 const char *cTargetLimit; 2854 char *cBuf; 2855 UChar *uBuf,*test; 2856 int32_t uBufSize = 120; 2857 int numCharsInTarget=0; 2858 UErrorCode errorCode=U_ZERO_ERROR; 2859 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2860 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 2861 uSource = source; 2862 uSourceLimit=sourceLimit; 2863 cTarget = cBuf; 2864 cTargetLimit = cBuf +uBufSize*5; 2865 uTarget = uBuf; 2866 uTargetLimit = uBuf+ uBufSize*5; 2867 ucnv_reset(cnv); 2868 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); 2869 if(U_FAILURE(errorCode)){ 2870 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2871 return; 2872 } 2873 cSource = cBuf; 2874 test =uBuf; 2875 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode); 2876 if(U_FAILURE(errorCode)){ 2877 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode)); 2878 return; 2879 } 2880 uSource = source; 2881 while(uSource<uSourceLimit){ 2882 if(*test!=*uSource){ 2883 2884 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 2885 } 2886 uSource++; 2887 test++; 2888 } 2889 free(uBuf); 2890 free(cBuf); 2891 } 2892 2893 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2894 const UChar* uSource; 2895 const UChar* uSourceLimit; 2896 const char* cSource; 2897 const char* cSourceLimit; 2898 UChar *uTargetLimit =NULL; 2899 UChar *uTarget; 2900 char *cTarget; 2901 const char *cTargetLimit; 2902 char *cBuf; 2903 UChar *uBuf; /*,*test;*/ 2904 int32_t uBufSize = 120; 2905 int len=0; 2906 int i=2; 2907 const UChar *temp = sourceLimit; 2908 UErrorCode errorCode=U_ZERO_ERROR; 2909 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2910 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2911 2912 ucnv_reset(cnv); 2913 for(;--i>0;){ 2914 uSource = (UChar*) source; 2915 cTarget = cBuf; 2916 uTarget = uBuf; 2917 cSource = cBuf; 2918 cTargetLimit = cBuf; 2919 uTargetLimit = uBuf+uBufSize*5; 2920 cTargetLimit = cTargetLimit+uBufSize*10; 2921 uSourceLimit=uSource; 2922 do{ 2923 2924 if (uSourceLimit < sourceLimit) { 2925 uSourceLimit = uSourceLimit+1; 2926 } 2927 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2928 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2929 errorCode=U_ZERO_ERROR; 2930 continue; 2931 } 2932 2933 if(U_FAILURE(errorCode)){ 2934 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2935 return; 2936 } 2937 2938 }while (uSource<temp); 2939 2940 cSourceLimit =cBuf; 2941 do{ 2942 if (cSourceLimit < cBuf + (cTarget - cBuf)) { 2943 cSourceLimit = cSourceLimit+1; 2944 } 2945 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2946 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2947 errorCode=U_ZERO_ERROR; 2948 continue; 2949 } 2950 if(U_FAILURE(errorCode)){ 2951 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2952 return; 2953 } 2954 }while(cSource<cTarget); 2955 2956 uSource = source; 2957 /*test =uBuf;*/ 2958 for(;len<(int)(source - sourceLimit);len++){ 2959 if(uBuf[len]!=uSource[len]){ 2960 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2961 } 2962 } 2963 } 2964 free(uBuf); 2965 free(cBuf); 2966 } 2967 static void 2968 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, 2969 const uint16_t results[], const char* message){ 2970 /* const char* s0; */ 2971 const char* s=(char*)source; 2972 const uint16_t *r=results; 2973 UErrorCode errorCode=U_ZERO_ERROR; 2974 uint32_t c,exC; 2975 ucnv_reset(cnv); 2976 while(s<limit) { 2977 /* s0=s; */ 2978 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 2979 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 2980 break; /* no more significant input */ 2981 } else if(U_FAILURE(errorCode)) { 2982 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 2983 break; 2984 } else { 2985 if(UTF_IS_FIRST_SURROGATE(*r)){ 2986 int i =0, len = 2; 2987 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE); 2988 r++; 2989 }else{ 2990 exC = *r; 2991 } 2992 if(c!=(uint32_t)(exC)) 2993 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c); 2994 } 2995 r++; 2996 } 2997 } 2998 2999 static int TestJitterbug930(const char* enc){ 3000 UErrorCode err = U_ZERO_ERROR; 3001 UConverter*converter; 3002 char out[80]; 3003 char*target = out; 3004 UChar in[4]; 3005 const UChar*source = in; 3006 int32_t off[80]; 3007 int32_t* offsets = off; 3008 int numOffWritten=0; 3009 UBool flush = 0; 3010 converter = my_ucnv_open(enc, &err); 3011 3012 in[0] = 0x41; /* 0x4E00;*/ 3013 in[1] = 0x4E01; 3014 in[2] = 0x4E02; 3015 in[3] = 0x4E03; 3016 3017 memset(off, '*', sizeof(off)); 3018 3019 ucnv_fromUnicode (converter, 3020 &target, 3021 target+2, 3022 &source, 3023 source+3, 3024 offsets, 3025 flush, 3026 &err); 3027 3028 /* writes three bytes into the output buffer: 41 1B 24 3029 * but offsets contains 0 1 1 3030 */ 3031 while(*offsets< off[10]){ 3032 numOffWritten++; 3033 offsets++; 3034 } 3035 log_verbose("Testing Jitterbug 930 for encoding %s",enc); 3036 if(numOffWritten!= (int)(target-out)){ 3037 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten); 3038 } 3039 3040 err = U_ZERO_ERROR; 3041 3042 memset(off,'*' , sizeof(off)); 3043 3044 flush = 1; 3045 offsets=off; 3046 ucnv_fromUnicode (converter, 3047 &target, 3048 target+4, 3049 &source, 3050 source, 3051 offsets, 3052 flush, 3053 &err); 3054 numOffWritten=0; 3055 while(*offsets< off[10]){ 3056 numOffWritten++; 3057 if(*offsets!= -1){ 3058 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ; 3059 } 3060 offsets++; 3061 } 3062 3063 /* writes 42 43 7A into output buffer, 3064 * offsets contains -1 -1 -1 3065 */ 3066 ucnv_close(converter); 3067 return 0; 3068 } 3069 3070 static void 3071 TestHZ() { 3072 /* test input */ 3073 static const uint16_t in[]={ 3074 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 3075 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 3076 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 3077 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 3078 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 3079 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 3080 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 3081 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 3082 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 3083 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 3084 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 3085 0x005A, 0x005B, 0x005C, 0x000A 3086 }; 3087 const UChar* uSource; 3088 const UChar* uSourceLimit; 3089 const char* cSource; 3090 const char* cSourceLimit; 3091 UChar *uTargetLimit =NULL; 3092 UChar *uTarget; 3093 char *cTarget; 3094 const char *cTargetLimit; 3095 char *cBuf; 3096 UChar *uBuf,*test; 3097 int32_t uBufSize = 120; 3098 UErrorCode errorCode=U_ZERO_ERROR; 3099 UConverter *cnv; 3100 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3101 int32_t* myOff= offsets; 3102 cnv=ucnv_open("HZ", &errorCode); 3103 if(U_FAILURE(errorCode)) { 3104 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)); 3105 return; 3106 } 3107 3108 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3109 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3110 uSource = (const UChar*)in; 3111 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3112 cTarget = cBuf; 3113 cTargetLimit = cBuf +uBufSize*5; 3114 uTarget = uBuf; 3115 uTargetLimit = uBuf+ uBufSize*5; 3116 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3117 if(U_FAILURE(errorCode)){ 3118 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3119 return; 3120 } 3121 cSource = cBuf; 3122 cSourceLimit =cTarget; 3123 test =uBuf; 3124 myOff=offsets; 3125 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3126 if(U_FAILURE(errorCode)){ 3127 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3128 return; 3129 } 3130 uSource = (const UChar*)in; 3131 while(uSource<uSourceLimit){ 3132 if(*test!=*uSource){ 3133 3134 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3135 } 3136 uSource++; 3137 test++; 3138 } 3139 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); 3140 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3141 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3142 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3143 TestJitterbug930("csISO2022JP"); 3144 ucnv_close(cnv); 3145 free(offsets); 3146 free(uBuf); 3147 free(cBuf); 3148 } 3149 3150 static void 3151 TestISCII(){ 3152 /* test input */ 3153 static const uint16_t in[]={ 3154 /* test full range of Devanagari */ 3155 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, 3156 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, 3157 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, 3158 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, 3159 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, 3160 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, 3161 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, 3162 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, 3163 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, 3164 0x096D,0x096E,0x096F, 3165 /* test Soft halant*/ 3166 0x0915,0x094d, 0x200D, 3167 /* test explicit halant */ 3168 0x0915,0x094d, 0x200c, 3169 /* test double danda */ 3170 0x965, 3171 /* test ASCII */ 3172 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3173 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3174 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3175 /* tests from Lotus */ 3176 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, 3177 0x0930,0x094D,0x200D, 3178 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, 3179 0x0915,0x0921,0x002B,0x095F, 3180 /* tamil range */ 3181 0x0B86, 0xB87, 0xB88, 3182 /* telugu range */ 3183 0x0C05, 0x0C02, 0x0C03,0x0c31, 3184 /* kannada range */ 3185 0x0C85, 0xC82, 0x0C83, 3186 /* test Abbr sign and Anudatta */ 3187 0x0970, 0x952, 3188 /* 0x0958, 3189 0x0959, 3190 0x095A, 3191 0x095B, 3192 0x095C, 3193 0x095D, 3194 0x095E, 3195 0x095F,*/ 3196 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, 3197 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, 3198 0x090C , 3199 0x0962, 3200 0x0961 /* Vocallic LL 0xa6, 0xE9 */, 3201 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, 3202 0x0950 /* OM Symbol 0xa1, 0xE9,*/, 3203 0x093D /* Avagraha 0xEA, 0xE9*/, 3204 0x0958, 3205 0x0959, 3206 0x095A, 3207 0x095B, 3208 0x095C, 3209 0x095D, 3210 0x095E, 3211 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 3212 }; 3213 static const unsigned char byteArr[]={ 3214 3215 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, 3216 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, 3217 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, 3218 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, 3219 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, 3220 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 3221 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, 3222 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, 3223 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, 3224 0xf8,0xf9,0xfa, 3225 /* test soft halant */ 3226 0xb3, 0xE8, 0xE9, 3227 /* test explicit halant */ 3228 0xb3, 0xE8, 0xE8, 3229 /* test double danda */ 3230 0xea, 0xea, 3231 /* test ASCII */ 3232 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3233 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3234 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3235 /* test ATR code */ 3236 3237 /* tests from Lotus */ 3238 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, 3239 0xEF,0x42,0xCF,0xE8,0xD9, 3240 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, 3241 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, 3242 /* tamil range */ 3243 0xEF, 0x44, 0xa5, 0xa6, 0xa7, 3244 /* telugu range */ 3245 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, 3246 /* kannada range */ 3247 0xEF, 0x48,0xa4, 0xa2, 0xa3, 3248 /* anudatta and abbreviation sign */ 3249 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, 3250 3251 3252 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ 3253 3254 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ 3255 3256 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ 3257 3258 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ 3259 3260 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ 3261 3262 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ 3263 3264 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ 3265 3266 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ 3267 3268 0xB3, 0xE9, /* Ka + NUKTA */ 3269 3270 0xB4, 0xE9, /* Kha + NUKTA */ 3271 3272 0xB5, 0xE9, /* Ga + NUKTA */ 3273 3274 0xBA, 0xE9, 3275 3276 0xBF, 0xE9, 3277 3278 0xC0, 0xE9, 3279 3280 0xC9, 0xE9, 3281 /* INV halant RA */ 3282 0xD9, 0xE8, 0xCF, 3283 0x00, 0x00A0, 3284 /* just consume unhandled codepoints */ 3285 0xEF, 0x30, 3286 3287 }; 3288 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE); 3289 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr)); 3290 3291 } 3292 3293 static void 3294 TestISO_2022_JP() { 3295 /* test input */ 3296 static const uint16_t in[]={ 3297 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, 3298 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3299 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3300 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3301 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3302 0x201D, 0x3014, 0x000D, 0x000A, 3303 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3304 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3305 }; 3306 const UChar* uSource; 3307 const UChar* uSourceLimit; 3308 const char* cSource; 3309 const char* cSourceLimit; 3310 UChar *uTargetLimit =NULL; 3311 UChar *uTarget; 3312 char *cTarget; 3313 const char *cTargetLimit; 3314 char *cBuf; 3315 UChar *uBuf,*test; 3316 int32_t uBufSize = 120; 3317 UErrorCode errorCode=U_ZERO_ERROR; 3318 UConverter *cnv; 3319 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3320 int32_t* myOff= offsets; 3321 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3322 if(U_FAILURE(errorCode)) { 3323 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode)); 3324 return; 3325 } 3326 3327 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3328 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3329 uSource = (const UChar*)in; 3330 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3331 cTarget = cBuf; 3332 cTargetLimit = cBuf +uBufSize*5; 3333 uTarget = uBuf; 3334 uTargetLimit = uBuf+ uBufSize*5; 3335 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3336 if(U_FAILURE(errorCode)){ 3337 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3338 return; 3339 } 3340 cSource = cBuf; 3341 cSourceLimit =cTarget; 3342 test =uBuf; 3343 myOff=offsets; 3344 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3345 if(U_FAILURE(errorCode)){ 3346 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3347 return; 3348 } 3349 3350 uSource = (const UChar*)in; 3351 while(uSource<uSourceLimit){ 3352 if(*test!=*uSource){ 3353 3354 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3355 } 3356 uSource++; 3357 test++; 3358 } 3359 3360 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3361 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3362 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); 3363 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3364 TestJitterbug930("csISO2022JP"); 3365 ucnv_close(cnv); 3366 free(uBuf); 3367 free(cBuf); 3368 free(offsets); 3369 } 3370 3371 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){ 3372 const UChar* uSource; 3373 const UChar* uSourceLimit; 3374 const char* cSource; 3375 const char* cSourceLimit; 3376 UChar *uTargetLimit =NULL; 3377 UChar *uTarget; 3378 char *cTarget; 3379 const char *cTargetLimit; 3380 char *cBuf; 3381 UChar *uBuf,*test; 3382 int32_t uBufSize = 120*10; 3383 UErrorCode errorCode=U_ZERO_ERROR; 3384 UConverter *cnv; 3385 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); 3386 int32_t* myOff= offsets; 3387 cnv=my_ucnv_open(conv, &errorCode); 3388 if(U_FAILURE(errorCode)) { 3389 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode)); 3390 return; 3391 } 3392 3393 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); 3394 cBuf =(char*)malloc(uBufSize * sizeof(char)); 3395 uSource = (const UChar*)in; 3396 uSourceLimit=uSource+len; 3397 cTarget = cBuf; 3398 cTargetLimit = cBuf +uBufSize; 3399 uTarget = uBuf; 3400 uTargetLimit = uBuf+ uBufSize; 3401 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3402 if(U_FAILURE(errorCode)){ 3403 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3404 return; 3405 } 3406 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/ 3407 cSource = cBuf; 3408 cSourceLimit =cTarget; 3409 test =uBuf; 3410 myOff=offsets; 3411 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3412 if(U_FAILURE(errorCode)){ 3413 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode)); 3414 return; 3415 } 3416 3417 uSource = (const UChar*)in; 3418 while(uSource<uSourceLimit){ 3419 if(*test!=*uSource){ 3420 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ; 3421 } 3422 uSource++; 3423 test++; 3424 } 3425 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); 3426 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); 3427 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); 3428 if(byteArr && byteArrLen!=0){ 3429 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); 3430 TestToAndFromUChars(in,(const UChar*)&in[len],cnv); 3431 { 3432 cSource = byteArr; 3433 cSourceLimit = cSource+byteArrLen; 3434 test=uBuf; 3435 myOff = offsets; 3436 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3437 if(U_FAILURE(errorCode)){ 3438 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3439 return; 3440 } 3441 3442 uSource = (const UChar*)in; 3443 while(uSource<uSourceLimit){ 3444 if(*test!=*uSource){ 3445 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3446 } 3447 uSource++; 3448 test++; 3449 } 3450 } 3451 } 3452 3453 ucnv_close(cnv); 3454 free(uBuf); 3455 free(cBuf); 3456 free(offsets); 3457 } 3458 static UChar U_CALLCONV 3459 _charAt(int32_t offset, void *context) { 3460 return ((char*)context)[offset]; 3461 } 3462 3463 static int32_t 3464 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){ 3465 int32_t srcIndex=0; 3466 int32_t dstIndex=0; 3467 if(U_FAILURE(*status)){ 3468 return 0; 3469 } 3470 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ 3471 *status = U_ILLEGAL_ARGUMENT_ERROR; 3472 return 0; 3473 } 3474 if(srcLen==-1){ 3475 srcLen = (int32_t)uprv_strlen(src); 3476 } 3477 3478 for (; srcIndex<srcLen; ) { 3479 UChar32 c = src[srcIndex++]; 3480 if (c == 0x005C /*'\\'*/) { 3481 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/ 3482 if (c == (UChar32)0xFFFFFFFF) { 3483 *status=U_INVALID_CHAR_FOUND; /* return empty string */ 3484 break; /* invalid escape sequence */ 3485 } 3486 } 3487 if(dstIndex < dstLen){ 3488 if(c>0xFFFF){ 3489 dst[dstIndex++] = UTF16_LEAD(c); 3490 if(dstIndex<dstLen){ 3491 dst[dstIndex]=UTF16_TRAIL(c); 3492 }else{ 3493 *status=U_BUFFER_OVERFLOW_ERROR; 3494 } 3495 }else{ 3496 dst[dstIndex]=(UChar)c; 3497 } 3498 3499 }else{ 3500 *status = U_BUFFER_OVERFLOW_ERROR; 3501 } 3502 dstIndex++; /* for preflighting */ 3503 } 3504 return dstIndex; 3505 } 3506 3507 static void 3508 TestFullRoundtrip(const char* cp){ 3509 UChar usource[10] ={0}; 3510 UChar nsrc[10] = {0}; 3511 uint32_t i=1; 3512 int len=0, ulen; 3513 nsrc[0]=0x0061; 3514 /* Test codepoint 0 */ 3515 TestConv(usource,1,cp,"",NULL,0); 3516 TestConv(usource,2,cp,"",NULL,0); 3517 nsrc[2]=0x5555; 3518 TestConv(nsrc,3,cp,"",NULL,0); 3519 3520 for(;i<=0x10FFFF;i++){ 3521 if(i==0xD800){ 3522 i=0xDFFF; 3523 continue; 3524 } 3525 if(i<=0xFFFF){ 3526 usource[0] =(UChar) i; 3527 len=1; 3528 }else{ 3529 usource[0]=UTF16_LEAD(i); 3530 usource[1]=UTF16_TRAIL(i); 3531 len=2; 3532 } 3533 ulen=len; 3534 if(i==0x80) { 3535 usource[2]=0; 3536 } 3537 /* Test only single code points */ 3538 TestConv(usource,ulen,cp,"",NULL,0); 3539 /* Test codepoint repeated twice */ 3540 usource[ulen]=usource[0]; 3541 usource[ulen+1]=usource[1]; 3542 ulen+=len; 3543 TestConv(usource,ulen,cp,"",NULL,0); 3544 /* Test codepoint repeated 3 times */ 3545 usource[ulen]=usource[0]; 3546 usource[ulen+1]=usource[1]; 3547 ulen+=len; 3548 TestConv(usource,ulen,cp,"",NULL,0); 3549 /* Test codepoint in between 2 codepoints */ 3550 nsrc[1]=usource[0]; 3551 nsrc[2]=usource[1]; 3552 nsrc[len+1]=0x5555; 3553 TestConv(nsrc,len+2,cp,"",NULL,0); 3554 uprv_memset(usource,0,sizeof(UChar)*10); 3555 } 3556 } 3557 3558 static void 3559 TestRoundTrippingAllUTF(void){ 3560 if(!QUICK){ 3561 log_verbose("Running exhaustive round trip test for BOCU-1\n"); 3562 TestFullRoundtrip("BOCU-1"); 3563 log_verbose("Running exhaustive round trip test for SCSU\n"); 3564 TestFullRoundtrip("SCSU"); 3565 log_verbose("Running exhaustive round trip test for UTF-8\n"); 3566 TestFullRoundtrip("UTF-8"); 3567 log_verbose("Running exhaustive round trip test for CESU-8\n"); 3568 TestFullRoundtrip("CESU-8"); 3569 log_verbose("Running exhaustive round trip test for UTF-16BE\n"); 3570 TestFullRoundtrip("UTF-16BE"); 3571 log_verbose("Running exhaustive round trip test for UTF-16LE\n"); 3572 TestFullRoundtrip("UTF-16LE"); 3573 log_verbose("Running exhaustive round trip test for UTF-16\n"); 3574 TestFullRoundtrip("UTF-16"); 3575 log_verbose("Running exhaustive round trip test for UTF-32BE\n"); 3576 TestFullRoundtrip("UTF-32BE"); 3577 log_verbose("Running exhaustive round trip test for UTF-32LE\n"); 3578 TestFullRoundtrip("UTF-32LE"); 3579 log_verbose("Running exhaustive round trip test for UTF-32\n"); 3580 TestFullRoundtrip("UTF-32"); 3581 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3582 TestFullRoundtrip("UTF-7"); 3583 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3584 TestFullRoundtrip("UTF-7,version=1"); 3585 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"); 3586 TestFullRoundtrip("IMAP-mailbox-name"); 3587 log_verbose("Running exhaustive round trip test for GB18030\n"); 3588 TestFullRoundtrip("GB18030"); 3589 } 3590 } 3591 3592 static void 3593 TestSCSU() { 3594 3595 static const uint16_t germanUTF16[]={ 3596 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 3597 }; 3598 3599 static const uint8_t germanSCSU[]={ 3600 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 3601 }; 3602 3603 static const uint16_t russianUTF16[]={ 3604 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 3605 }; 3606 3607 static const uint8_t russianSCSU[]={ 3608 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 3609 }; 3610 3611 static const uint16_t japaneseUTF16[]={ 3612 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 3613 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 3614 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 3615 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 3616 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 3617 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 3618 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 3619 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 3620 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 3621 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 3622 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 3623 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 3624 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 3625 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 3626 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 3627 }; 3628 3629 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 3630 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */ 3631 static const uint8_t japaneseSCSU[]={ 3632 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 3633 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 3634 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 3635 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 3636 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 3637 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 3638 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 3639 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 3640 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 3641 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 3642 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 3643 0xcb, 0x82 3644 }; 3645 3646 static const uint16_t allFeaturesUTF16[]={ 3647 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 3648 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 3649 0x01df, 0xf000, 0xdbff, 0xdfff 3650 }; 3651 3652 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter 3653 * result here (34B vs. 35B) 3654 */ 3655 static const uint8_t allFeaturesSCSU[]={ 3656 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, 3657 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, 3658 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, 3659 0xdf, 0x14, 0x80, 0x15, 0xff 3660 }; 3661 static const uint16_t monkeyIn[]={ 3662 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3663 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3664 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3665 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3666 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3667 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3668 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3669 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3670 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3671 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3672 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3673 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3674 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3675 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3676 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3677 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3678 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3679 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3680 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 3681 /* test non-BMP code points */ 3682 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 3683 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 3684 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 3685 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 3686 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 3687 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 3688 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 3689 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 3690 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 3691 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 3692 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 3693 3694 3695 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 3696 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 3697 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 3698 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 3699 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 3700 }; 3701 static const char *fTestCases [] = { 3702 "\\ud800\\udc00", /* smallest surrogate*/ 3703 "\\ud8ff\\udcff", 3704 "\\udBff\\udFff", /* largest surrogate pair*/ 3705 "\\ud834\\udc00", 3706 "\\U0010FFFF", 3707 "Hello \\u9292 \\u9192 World!", 3708 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", 3709 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3710 3711 "\\u0648\\u06c8", /* catch missing reset*/ 3712 "\\u0648\\u06c8", 3713 3714 "\\u4444\\uE001", /* lowest quotable*/ 3715 "\\u4444\\uf2FF", /* highest quotable*/ 3716 "\\u4444\\uf188\\u4444", 3717 "\\u4444\\uf188\\uf288", 3718 "\\u4444\\uf188abc\\u0429\\uf288", 3719 "\\u9292\\u2222", 3720 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", 3721 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3722 "Hello World!123456", 3723 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ 3724 3725 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ 3726 "abc\\u4411d", /* uses SQU*/ 3727 "abc\\u4411\\u4412d",/* uses SCU*/ 3728 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ 3729 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/ 3730 "\\u9292\\u2222", 3731 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", 3732 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c", 3733 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", 3734 3735 "", /* empty input*/ 3736 "\\u0000", /* smallest BMP character*/ 3737 "\\uFFFF", /* largest BMP character*/ 3738 3739 /* regression tests*/ 3740 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", 3741 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff", 3742 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", 3743 "\\u0041\\u00df\\u0401\\u015f", 3744 "\\u9066\\u2123abc", 3745 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5", 3746 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489", 3747 }; 3748 int i=0; 3749 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){ 3750 const char* cSrc = fTestCases[i]; 3751 UErrorCode status = U_ZERO_ERROR; 3752 int32_t cSrcLen,srcLen; 3753 UChar* src; 3754 /* UConverter* cnv = ucnv_open("SCSU",&status); */ 3755 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); 3756 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); 3757 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); 3758 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); 3759 TestConv(src,srcLen,"SCSU","Coverage",NULL,0); 3760 free(src); 3761 } 3762 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3763 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3764 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3765 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3766 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU)); 3767 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU)); 3768 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); 3769 } 3770 3771 #if !UCONFIG_NO_LEGACY_CONVERSION 3772 static void TestJitterbug2346(){ 3773 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, 3774 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; 3775 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; 3776 3777 UChar uTarget[500]={'\0'}; 3778 UChar* utarget=uTarget; 3779 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 3780 3781 char cTarget[500]={'\0'}; 3782 char* ctarget=cTarget; 3783 char* ctargetLimit=cTarget+sizeof(cTarget); 3784 const char* csource=source; 3785 UChar* temp = expected; 3786 UErrorCode err=U_ZERO_ERROR; 3787 3788 UConverter* conv =ucnv_open("ISO_2022_JP",&err); 3789 if(U_FAILURE(err)) { 3790 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 3791 return; 3792 } 3793 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err); 3794 if(U_FAILURE(err)) { 3795 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err)); 3796 return; 3797 } 3798 utargetLimit=utarget; 3799 utarget = uTarget; 3800 while(utarget<utargetLimit){ 3801 if(*temp!=*utarget){ 3802 3803 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ; 3804 } 3805 utarget++; 3806 temp++; 3807 } 3808 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 3809 if(U_FAILURE(err)) { 3810 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err)); 3811 return; 3812 } 3813 ctargetLimit=ctarget; 3814 ctarget =cTarget; 3815 ucnv_close(conv); 3816 3817 3818 } 3819 3820 static void 3821 TestISO_2022_JP_1() { 3822 /* test input */ 3823 static const uint16_t in[]={ 3824 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A, 3825 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3826 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A, 3827 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3828 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3829 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3830 0x201D, 0x000D, 0x000A, 3831 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3832 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A, 3833 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3834 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A, 3835 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A, 3836 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A 3837 }; 3838 const UChar* uSource; 3839 const UChar* uSourceLimit; 3840 const char* cSource; 3841 const char* cSourceLimit; 3842 UChar *uTargetLimit =NULL; 3843 UChar *uTarget; 3844 char *cTarget; 3845 const char *cTargetLimit; 3846 char *cBuf; 3847 UChar *uBuf,*test; 3848 int32_t uBufSize = 120; 3849 UErrorCode errorCode=U_ZERO_ERROR; 3850 UConverter *cnv; 3851 3852 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3853 if(U_FAILURE(errorCode)) { 3854 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3855 return; 3856 } 3857 3858 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3859 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3860 uSource = (const UChar*)in; 3861 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3862 cTarget = cBuf; 3863 cTargetLimit = cBuf +uBufSize*5; 3864 uTarget = uBuf; 3865 uTargetLimit = uBuf+ uBufSize*5; 3866 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode); 3867 if(U_FAILURE(errorCode)){ 3868 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3869 return; 3870 } 3871 cSource = cBuf; 3872 cSourceLimit =cTarget; 3873 test =uBuf; 3874 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode); 3875 if(U_FAILURE(errorCode)){ 3876 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3877 return; 3878 } 3879 uSource = (const UChar*)in; 3880 while(uSource<uSourceLimit){ 3881 if(*test!=*uSource){ 3882 3883 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3884 } 3885 uSource++; 3886 test++; 3887 } 3888 /*ucnv_close(cnv); 3889 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ 3890 /*Test for the condition where there is an invalid character*/ 3891 ucnv_reset(cnv); 3892 { 3893 static const uint8_t source2[]={0x0e,0x24,0x053}; 3894 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); 3895 } 3896 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3897 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3898 ucnv_close(cnv); 3899 free(uBuf); 3900 free(cBuf); 3901 } 3902 3903 static void 3904 TestISO_2022_JP_2() { 3905 /* test input */ 3906 static const uint16_t in[]={ 3907 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3908 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3909 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3910 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3911 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3912 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3913 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3914 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3915 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3916 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3917 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3918 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3919 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3920 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3921 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3922 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3923 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3924 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3925 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A 3926 }; 3927 const UChar* uSource; 3928 const UChar* uSourceLimit; 3929 const char* cSource; 3930 const char* cSourceLimit; 3931 UChar *uTargetLimit =NULL; 3932 UChar *uTarget; 3933 char *cTarget; 3934 const char *cTargetLimit; 3935 char *cBuf; 3936 UChar *uBuf,*test; 3937 int32_t uBufSize = 120; 3938 UErrorCode errorCode=U_ZERO_ERROR; 3939 UConverter *cnv; 3940 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3941 int32_t* myOff= offsets; 3942 cnv=ucnv_open("ISO_2022_JP_2", &errorCode); 3943 if(U_FAILURE(errorCode)) { 3944 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3945 return; 3946 } 3947 3948 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3949 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3950 uSource = (const UChar*)in; 3951 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3952 cTarget = cBuf; 3953 cTargetLimit = cBuf +uBufSize*5; 3954 uTarget = uBuf; 3955 uTargetLimit = uBuf+ uBufSize*5; 3956 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3957 if(U_FAILURE(errorCode)){ 3958 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3959 return; 3960 } 3961 cSource = cBuf; 3962 cSourceLimit =cTarget; 3963 test =uBuf; 3964 myOff=offsets; 3965 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3966 if(U_FAILURE(errorCode)){ 3967 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3968 return; 3969 } 3970 uSource = (const UChar*)in; 3971 while(uSource<uSourceLimit){ 3972 if(*test!=*uSource){ 3973 3974 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3975 } 3976 uSource++; 3977 test++; 3978 } 3979 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3980 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3981 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3982 /*Test for the condition where there is an invalid character*/ 3983 ucnv_reset(cnv); 3984 { 3985 static const uint8_t source2[]={0x0e,0x24,0x053}; 3986 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); 3987 } 3988 ucnv_close(cnv); 3989 free(uBuf); 3990 free(cBuf); 3991 free(offsets); 3992 } 3993 3994 static void 3995 TestISO_2022_KR() { 3996 /* test input */ 3997 static const uint16_t in[]={ 3998 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 3999 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4000 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4001 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4002 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4003 ,0x53E3,0x53E4,0x000A,0x000D}; 4004 const UChar* uSource; 4005 const UChar* uSourceLimit; 4006 const char* cSource; 4007 const char* cSourceLimit; 4008 UChar *uTargetLimit =NULL; 4009 UChar *uTarget; 4010 char *cTarget; 4011 const char *cTargetLimit; 4012 char *cBuf; 4013 UChar *uBuf,*test; 4014 int32_t uBufSize = 120; 4015 UErrorCode errorCode=U_ZERO_ERROR; 4016 UConverter *cnv; 4017 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4018 int32_t* myOff= offsets; 4019 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); 4020 if(U_FAILURE(errorCode)) { 4021 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4022 return; 4023 } 4024 4025 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4026 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4027 uSource = (const UChar*)in; 4028 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4029 cTarget = cBuf; 4030 cTargetLimit = cBuf +uBufSize*5; 4031 uTarget = uBuf; 4032 uTargetLimit = uBuf+ uBufSize*5; 4033 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4034 if(U_FAILURE(errorCode)){ 4035 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4036 return; 4037 } 4038 cSource = cBuf; 4039 cSourceLimit =cTarget; 4040 test =uBuf; 4041 myOff=offsets; 4042 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4043 if(U_FAILURE(errorCode)){ 4044 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4045 return; 4046 } 4047 uSource = (const UChar*)in; 4048 while(uSource<uSourceLimit){ 4049 if(*test!=*uSource){ 4050 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4051 } 4052 uSource++; 4053 test++; 4054 } 4055 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4056 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4057 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4058 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4059 TestJitterbug930("csISO2022KR"); 4060 /*Test for the condition where there is an invalid character*/ 4061 ucnv_reset(cnv); 4062 { 4063 static const uint8_t source2[]={0x1b,0x24,0x053}; 4064 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4065 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4066 } 4067 ucnv_close(cnv); 4068 free(uBuf); 4069 free(cBuf); 4070 free(offsets); 4071 } 4072 4073 static void 4074 TestISO_2022_KR_1() { 4075 /* test input */ 4076 static const uint16_t in[]={ 4077 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4078 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4079 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4080 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4081 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4082 ,0x53E3,0x53E4,0x000A,0x000D}; 4083 const UChar* uSource; 4084 const UChar* uSourceLimit; 4085 const char* cSource; 4086 const char* cSourceLimit; 4087 UChar *uTargetLimit =NULL; 4088 UChar *uTarget; 4089 char *cTarget; 4090 const char *cTargetLimit; 4091 char *cBuf; 4092 UChar *uBuf,*test; 4093 int32_t uBufSize = 120; 4094 UErrorCode errorCode=U_ZERO_ERROR; 4095 UConverter *cnv; 4096 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4097 int32_t* myOff= offsets; 4098 cnv=ucnv_open("ibm-25546", &errorCode); 4099 if(U_FAILURE(errorCode)) { 4100 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4101 return; 4102 } 4103 4104 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4105 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4106 uSource = (const UChar*)in; 4107 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4108 cTarget = cBuf; 4109 cTargetLimit = cBuf +uBufSize*5; 4110 uTarget = uBuf; 4111 uTargetLimit = uBuf+ uBufSize*5; 4112 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4113 if(U_FAILURE(errorCode)){ 4114 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4115 return; 4116 } 4117 cSource = cBuf; 4118 cSourceLimit =cTarget; 4119 test =uBuf; 4120 myOff=offsets; 4121 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4122 if(U_FAILURE(errorCode)){ 4123 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4124 return; 4125 } 4126 uSource = (const UChar*)in; 4127 while(uSource<uSourceLimit){ 4128 if(*test!=*uSource){ 4129 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4130 } 4131 uSource++; 4132 test++; 4133 } 4134 ucnv_reset(cnv); 4135 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4136 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4137 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4138 ucnv_reset(cnv); 4139 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4140 /*Test for the condition where there is an invalid character*/ 4141 ucnv_reset(cnv); 4142 { 4143 static const uint8_t source2[]={0x1b,0x24,0x053}; 4144 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4145 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4146 } 4147 ucnv_close(cnv); 4148 free(uBuf); 4149 free(cBuf); 4150 free(offsets); 4151 } 4152 4153 static void TestJitterbug2411(){ 4154 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" 4155 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; 4156 UConverter* kr=NULL, *kr1=NULL; 4157 UErrorCode errorCode = U_ZERO_ERROR; 4158 UChar tgt[100]={'\0'}; 4159 UChar* target = tgt; 4160 UChar* targetLimit = target+100; 4161 kr=ucnv_open("iso-2022-kr", &errorCode); 4162 if(U_FAILURE(errorCode)) { 4163 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode)); 4164 return; 4165 } 4166 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4167 if(U_FAILURE(errorCode)) { 4168 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4169 return; 4170 } 4171 kr1 = ucnv_open("ibm-25546", &errorCode); 4172 if(U_FAILURE(errorCode)) { 4173 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode)); 4174 return; 4175 } 4176 target = tgt; 4177 targetLimit = target+100; 4178 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4179 4180 if(U_FAILURE(errorCode)) { 4181 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4182 return; 4183 } 4184 4185 ucnv_close(kr); 4186 ucnv_close(kr1); 4187 4188 } 4189 4190 static void 4191 TestJIS(){ 4192 /* From Unicode moved to testdata/conversion.txt */ 4193 /*To Unicode*/ 4194 { 4195 static const uint8_t sampleTextJIS[] = { 4196 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ 4197 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4198 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4199 }; 4200 static const uint16_t expectedISO2022JIS[] = { 4201 0x0041, 0x0042, 4202 0xFF81, 0xFF82, 4203 0x3000 4204 }; 4205 static const int32_t toISO2022JISOffs[]={ 4206 3,4, 4207 8,9, 4208 16 4209 }; 4210 4211 static const uint8_t sampleTextJIS7[] = { 4212 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ 4213 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4214 0x1b,0x24,0x42,0x21,0x21, 4215 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ 4216 0x21,0x22, 4217 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4218 }; 4219 static const uint16_t expectedISO2022JIS7[] = { 4220 0x0041, 0x0042, 4221 0xFF81, 0xFF82, 4222 0x3000, 4223 0xFF81, 0xFF82, 4224 0x3001, 4225 0x3000 4226 }; 4227 static const int32_t toISO2022JIS7Offs[]={ 4228 3,4, 4229 8,9, 4230 13,16, 4231 17, 4232 19,27 4233 }; 4234 static const uint8_t sampleTextJIS8[] = { 4235 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ 4236 0xa1,0xc8,0xd9,/*Katakana Set*/ 4237 0x1b,0x28,0x42, 4238 0x41,0x42, 4239 0xb1,0xc3, /*Katakana Set*/ 4240 0x1b,0x24,0x42,0x21,0x21 4241 }; 4242 static const uint16_t expectedISO2022JIS8[] = { 4243 0x0041, 0x0042, 4244 0xff61, 0xff88, 0xff99, 4245 0x0041, 0x0042, 4246 0xff71, 0xff83, 4247 0x3000 4248 }; 4249 static const int32_t toISO2022JIS8Offs[]={ 4250 3, 4, 5, 6, 4251 7, 11, 12, 13, 4252 14, 18, 4253 }; 4254 4255 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, 4256 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE); 4257 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, 4258 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE); 4259 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, 4260 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE); 4261 } 4262 4263 } 4264 4265 static void TestJitterbug915(){ 4266 /* tests for roundtripping of the below sequence 4267 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / 4268 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / 4269 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / 4270 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / 4271 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / 4272 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / 4273 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / 4274 */ 4275 static const char cSource[]={ 4276 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 4277 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 4278 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 4279 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 4280 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 4281 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 4282 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, 4283 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 4284 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 4285 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 4286 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 4287 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 4288 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 4289 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 4290 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4291 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4292 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 4293 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 4294 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4295 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4296 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 4297 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 4298 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 4299 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 4300 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 4301 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 4302 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 4303 0x37, 0x20, 0x2A, 0x2F 4304 }; 4305 UChar uTarget[500]={'\0'}; 4306 UChar* utarget=uTarget; 4307 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 4308 4309 char cTarget[500]={'\0'}; 4310 char* ctarget=cTarget; 4311 char* ctargetLimit=cTarget+sizeof(cTarget); 4312 const char* csource=cSource; 4313 const char* tempSrc = cSource; 4314 UErrorCode err=U_ZERO_ERROR; 4315 4316 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); 4317 if(U_FAILURE(err)) { 4318 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 4319 return; 4320 } 4321 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err); 4322 if(U_FAILURE(err)) { 4323 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err)); 4324 return; 4325 } 4326 utargetLimit=utarget; 4327 utarget = uTarget; 4328 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 4329 if(U_FAILURE(err)) { 4330 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err)); 4331 return; 4332 } 4333 ctargetLimit=ctarget; 4334 ctarget =cTarget; 4335 while(ctarget<ctargetLimit){ 4336 if(*ctarget != *tempSrc){ 4337 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ; 4338 } 4339 ++ctarget; 4340 ++tempSrc; 4341 } 4342 4343 ucnv_close(conv); 4344 } 4345 4346 static void 4347 TestISO_2022_CN_EXT() { 4348 /* test input */ 4349 static const uint16_t in[]={ 4350 /* test Non-BMP code points */ 4351 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 4352 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 4353 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 4354 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 4355 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 4356 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 4357 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 4358 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 4359 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 4360 0xD869, 0xDED5, 4361 4362 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 4363 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 4364 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 4365 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4366 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4367 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4368 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4369 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4370 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4371 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4372 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4373 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4374 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4375 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A, 4376 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 4377 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A, 4378 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A, 4379 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A, 4380 4381 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A 4382 4383 }; 4384 4385 const UChar* uSource; 4386 const UChar* uSourceLimit; 4387 const char* cSource; 4388 const char* cSourceLimit; 4389 UChar *uTargetLimit =NULL; 4390 UChar *uTarget; 4391 char *cTarget; 4392 const char *cTargetLimit; 4393 char *cBuf; 4394 UChar *uBuf,*test; 4395 int32_t uBufSize = 180; 4396 UErrorCode errorCode=U_ZERO_ERROR; 4397 UConverter *cnv; 4398 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4399 int32_t* myOff= offsets; 4400 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); 4401 if(U_FAILURE(errorCode)) { 4402 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4403 return; 4404 } 4405 4406 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4407 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4408 uSource = (const UChar*)in; 4409 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4410 cTarget = cBuf; 4411 cTargetLimit = cBuf +uBufSize*5; 4412 uTarget = uBuf; 4413 uTargetLimit = uBuf+ uBufSize*5; 4414 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4415 if(U_FAILURE(errorCode)){ 4416 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4417 return; 4418 } 4419 cSource = cBuf; 4420 cSourceLimit =cTarget; 4421 test =uBuf; 4422 myOff=offsets; 4423 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4424 if(U_FAILURE(errorCode)){ 4425 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4426 return; 4427 } 4428 uSource = (const UChar*)in; 4429 while(uSource<uSourceLimit){ 4430 if(*test!=*uSource){ 4431 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4432 } 4433 else{ 4434 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4435 } 4436 uSource++; 4437 test++; 4438 } 4439 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4440 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4441 /*Test for the condition where there is an invalid character*/ 4442 ucnv_reset(cnv); 4443 { 4444 static const uint8_t source2[]={0x0e,0x24,0x053}; 4445 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); 4446 } 4447 ucnv_close(cnv); 4448 free(uBuf); 4449 free(cBuf); 4450 free(offsets); 4451 } 4452 4453 static void 4454 TestISO_2022_CN() { 4455 /* test input */ 4456 static const uint16_t in[]={ 4457 /* jitterbug 951 */ 4458 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52, 4459 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 4460 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52, 4461 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45, 4462 0x0020, 0x0045, 0x004e, 0x0044, 4463 /**/ 4464 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A, 4465 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A, 4466 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A, 4467 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4468 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4469 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4470 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4471 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4472 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4473 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4474 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4475 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4476 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A, 4477 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A, 4478 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4479 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 4480 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A, 4481 4482 }; 4483 const UChar* uSource; 4484 const UChar* uSourceLimit; 4485 const char* cSource; 4486 const char* cSourceLimit; 4487 UChar *uTargetLimit =NULL; 4488 UChar *uTarget; 4489 char *cTarget; 4490 const char *cTargetLimit; 4491 char *cBuf; 4492 UChar *uBuf,*test; 4493 int32_t uBufSize = 180; 4494 UErrorCode errorCode=U_ZERO_ERROR; 4495 UConverter *cnv; 4496 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4497 int32_t* myOff= offsets; 4498 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); 4499 if(U_FAILURE(errorCode)) { 4500 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4501 return; 4502 } 4503 4504 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4505 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4506 uSource = (const UChar*)in; 4507 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4508 cTarget = cBuf; 4509 cTargetLimit = cBuf +uBufSize*5; 4510 uTarget = uBuf; 4511 uTargetLimit = uBuf+ uBufSize*5; 4512 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4513 if(U_FAILURE(errorCode)){ 4514 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4515 return; 4516 } 4517 cSource = cBuf; 4518 cSourceLimit =cTarget; 4519 test =uBuf; 4520 myOff=offsets; 4521 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4522 if(U_FAILURE(errorCode)){ 4523 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4524 return; 4525 } 4526 uSource = (const UChar*)in; 4527 while(uSource<uSourceLimit){ 4528 if(*test!=*uSource){ 4529 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4530 } 4531 else{ 4532 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4533 } 4534 uSource++; 4535 test++; 4536 } 4537 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); 4538 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4539 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4540 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4541 TestJitterbug930("csISO2022CN"); 4542 /*Test for the condition where there is an invalid character*/ 4543 ucnv_reset(cnv); 4544 { 4545 static const uint8_t source2[]={0x0e,0x24,0x053}; 4546 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); 4547 } 4548 4549 ucnv_close(cnv); 4550 free(uBuf); 4551 free(cBuf); 4552 free(offsets); 4553 } 4554 4555 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */ 4556 typedef struct { 4557 const char * converterName; 4558 const char * inputText; 4559 int inputTextLength; 4560 } EmptySegmentTest; 4561 4562 /* Callback for TestJitterbug6175, should only get called for empty segment errors */ 4563 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, 4564 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) { 4565 if (reason > UCNV_IRREGULAR) { 4566 return; 4567 } 4568 if (reason != UCNV_IRREGULAR) { 4569 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); 4570 } 4571 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ 4572 *err = U_ZERO_ERROR; 4573 ucnv_cbToUWriteSub(toArgs,0,err); 4574 } 4575 4576 enum { kEmptySegmentToUCharsMax = 64 }; 4577 static void TestJitterbug6175(void) { 4578 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; 4579 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; 4580 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; 4581 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; 4582 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; 4583 static const EmptySegmentTest emptySegmentTests[] = { 4584 /* converterName inputText inputTextLength */ 4585 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, 4586 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, 4587 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, 4588 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, 4589 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, 4590 /* terminator: */ 4591 { NULL, NULL, 0, } 4592 }; 4593 const EmptySegmentTest * testPtr; 4594 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { 4595 UErrorCode err = U_ZERO_ERROR; 4596 UConverter * cnv = ucnv_open(testPtr->converterName, &err); 4597 if (U_FAILURE(err)) { 4598 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4599 return; 4600 } 4601 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); 4602 if (U_FAILURE(err)) { 4603 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4604 ucnv_close(cnv); 4605 return; 4606 } 4607 { 4608 UChar toUChars[kEmptySegmentToUCharsMax]; 4609 UChar * toUCharsPtr = toUChars; 4610 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; 4611 const char * inCharsPtr = testPtr->inputText; 4612 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; 4613 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); 4614 } 4615 ucnv_close(cnv); 4616 } 4617 } 4618 4619 static void 4620 TestEBCDIC_STATEFUL() { 4621 /* test input */ 4622 static const uint8_t in[]={ 4623 0x61, 4624 0x1a, 4625 0x0f, 0x4b, 4626 0x42, 4627 0x40, 4628 0x36, 4629 }; 4630 4631 /* expected test results */ 4632 static const int32_t results[]={ 4633 /* number of bytes read, code point */ 4634 1, 0x002f, 4635 1, 0x0092, 4636 2, 0x002e, 4637 1, 0xff62, 4638 1, 0x0020, 4639 1, 0x0096, 4640 4641 }; 4642 static const uint8_t in2[]={ 4643 0x0f, 4644 0xa1, 4645 0x01 4646 }; 4647 4648 /* expected test results */ 4649 static const int32_t results2[]={ 4650 /* number of bytes read, code point */ 4651 2, 0x203E, 4652 1, 0x0001, 4653 }; 4654 4655 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 4656 UErrorCode errorCode=U_ZERO_ERROR; 4657 UConverter *cnv=ucnv_open("ibm-930", &errorCode); 4658 if(U_FAILURE(errorCode)) { 4659 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode)); 4660 return; 4661 } 4662 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); 4663 ucnv_reset(cnv); 4664 /* Test the condition when source >= sourceLimit */ 4665 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 4666 ucnv_reset(cnv); 4667 /*Test for the condition where source > sourcelimit after consuming the shift chracter */ 4668 { 4669 static const uint8_t source1[]={0x0f}; 4670 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); 4671 } 4672 /*Test for the condition where there is an invalid character*/ 4673 ucnv_reset(cnv); 4674 { 4675 static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; 4676 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); 4677 } 4678 ucnv_reset(cnv); 4679 source=(const char*)in2; 4680 limit=(const char*)in2+sizeof(in2); 4681 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); 4682 ucnv_close(cnv); 4683 4684 } 4685 4686 static void 4687 TestGB18030() { 4688 /* test input */ 4689 static const uint8_t in[]={ 4690 0x24, 4691 0x7f, 4692 0x81, 0x30, 0x81, 0x30, 4693 0xa8, 0xbf, 4694 0xa2, 0xe3, 4695 0xd2, 0xbb, 4696 0x82, 0x35, 0x8f, 0x33, 4697 0x84, 0x31, 0xa4, 0x39, 4698 0x90, 0x30, 0x81, 0x30, 4699 0xe3, 0x32, 0x9a, 0x35 4700 #if 0 4701 /* 4702 * Feature removed markus 2000-oct-26 4703 * Only some codepages must match surrogate pairs into supplementary code points - 4704 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c . 4705 * GB 18030 provides direct encodings for supplementary code points, therefore 4706 * it must not combine two single-encoded surrogates into one code point. 4707 */ 4708 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */ 4709 #endif 4710 }; 4711 4712 /* expected test results */ 4713 static const int32_t results[]={ 4714 /* number of bytes read, code point */ 4715 1, 0x24, 4716 1, 0x7f, 4717 4, 0x80, 4718 2, 0x1f9, 4719 2, 0x20ac, 4720 2, 0x4e00, 4721 4, 0x9fa6, 4722 4, 0xffff, 4723 4, 0x10000, 4724 4, 0x10ffff 4725 #if 0 4726 /* Feature removed. See comment above. */ 4727 8, 0x10000 4728 #endif 4729 }; 4730 4731 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ 4732 UErrorCode errorCode=U_ZERO_ERROR; 4733 UConverter *cnv=ucnv_open("gb18030", &errorCode); 4734 if(U_FAILURE(errorCode)) { 4735 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode)); 4736 return; 4737 } 4738 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030"); 4739 ucnv_close(cnv); 4740 } 4741 4742 static void 4743 TestLMBCS() { 4744 /* LMBCS-1 string */ 4745 static const uint8_t pszLMBCS[]={ 4746 0x61, 4747 0x01, 0x29, 4748 0x81, 4749 0xA0, 4750 0x0F, 0x27, 4751 0x0F, 0x91, 4752 0x14, 0x0a, 0x74, 4753 0x14, 0xF6, 0x02, 4754 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ 4755 0x10, 0x88, 0xA0, 4756 }; 4757 4758 /* Unicode UChar32 equivalents */ 4759 static const UChar32 pszUnicode32[]={ 4760 /* code point */ 4761 0x00000061, 4762 0x00002013, 4763 0x000000FC, 4764 0x000000E1, 4765 0x00000007, 4766 0x00000091, 4767 0x00000a74, 4768 0x00000200, 4769 0x00023456, /* code point for surrogate pair */ 4770 0x00005516 4771 }; 4772 4773 /* Unicode UChar equivalents */ 4774 static const UChar pszUnicode[]={ 4775 /* code point */ 4776 0x0061, 4777 0x2013, 4778 0x00FC, 4779 0x00E1, 4780 0x0007, 4781 0x0091, 4782 0x0a74, 4783 0x0200, 4784 0xD84D, /* low surrogate */ 4785 0xDC56, /* high surrogate */ 4786 0x5516 4787 }; 4788 4789 /* expected test results */ 4790 static const int offsets32[]={ 4791 /* number of bytes read, code point */ 4792 0, 4793 1, 4794 3, 4795 4, 4796 5, 4797 7, 4798 9, 4799 12, 4800 15, 4801 21, 4802 24 4803 }; 4804 4805 /* expected test results */ 4806 static const int offsets[]={ 4807 /* number of bytes read, code point */ 4808 0, 4809 1, 4810 3, 4811 4, 4812 5, 4813 7, 4814 9, 4815 12, 4816 15, 4817 18, 4818 21, 4819 24 4820 }; 4821 4822 4823 UConverter *cnv; 4824 4825 #define NAME_LMBCS_1 "LMBCS-1" 4826 #define NAME_LMBCS_2 "LMBCS-2" 4827 4828 4829 /* Some basic open/close/property tests on some LMBCS converters */ 4830 { 4831 4832 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ 4833 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ 4834 char get_subchars [1]; 4835 const char * get_name; 4836 UConverter *cnv1; 4837 UConverter *cnv2; 4838 4839 int8_t len = sizeof(get_subchars); 4840 4841 UErrorCode errorCode=U_ZERO_ERROR; 4842 4843 /* Open */ 4844 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); 4845 if(U_FAILURE(errorCode)) { 4846 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4847 return; 4848 } 4849 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); 4850 if(U_FAILURE(errorCode)) { 4851 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode)); 4852 return; 4853 } 4854 4855 /* Name */ 4856 get_name = ucnv_getName (cnv1, &errorCode); 4857 if (strcmp(NAME_LMBCS_1,get_name)){ 4858 log_err("Unexpected converter name: %s\n", get_name); 4859 } 4860 get_name = ucnv_getName (cnv2, &errorCode); 4861 if (strcmp(NAME_LMBCS_2,get_name)){ 4862 log_err("Unexpected converter name: %s\n", get_name); 4863 } 4864 4865 /* substitution chars */ 4866 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); 4867 if(U_FAILURE(errorCode)) { 4868 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4869 } 4870 if (len!=1){ 4871 log_err("Unexpected length of sub chars\n"); 4872 } 4873 if (get_subchars[0] != expected_subchars[0]){ 4874 log_err("Unexpected value of sub chars\n"); 4875 } 4876 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); 4877 if(U_FAILURE(errorCode)) { 4878 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); 4879 } 4880 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); 4881 if(U_FAILURE(errorCode)) { 4882 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4883 } 4884 if (len!=1){ 4885 log_err("Unexpected length of sub chars\n"); 4886 } 4887 if (get_subchars[0] != new_subchars[0]){ 4888 log_err("Unexpected value of sub chars\n"); 4889 } 4890 ucnv_close(cnv1); 4891 ucnv_close(cnv2); 4892 4893 } 4894 4895 /* LMBCS to Unicode - offsets */ 4896 { 4897 UErrorCode errorCode=U_ZERO_ERROR; 4898 4899 const char * pSource = (const char *)pszLMBCS; 4900 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 4901 4902 UChar Out [sizeof(pszUnicode) + 1]; 4903 UChar * pOut = Out; 4904 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 4905 4906 int32_t off [sizeof(offsets)]; 4907 4908 /* last 'offset' in expected results is just the final size. 4909 (Makes other tests easier). Compensate here: */ 4910 4911 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); 4912 4913 4914 4915 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ 4916 if(U_FAILURE(errorCode)) { 4917 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode)); 4918 return; 4919 } 4920 4921 4922 4923 ucnv_toUnicode (cnv, 4924 &pOut, 4925 OutLimit, 4926 &pSource, 4927 sourceLimit, 4928 off, 4929 TRUE, 4930 &errorCode); 4931 4932 4933 if (memcmp(off,offsets,sizeof(offsets))) 4934 { 4935 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n"); 4936 } 4937 if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) 4938 { 4939 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n"); 4940 } 4941 ucnv_close(cnv); 4942 } 4943 { 4944 /* LMBCS to Unicode - getNextUChar */ 4945 const char * sourceStart; 4946 const char *source=(const char *)pszLMBCS; 4947 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); 4948 const UChar32 *results= pszUnicode32; 4949 const int *off = offsets32; 4950 4951 UErrorCode errorCode=U_ZERO_ERROR; 4952 UChar32 uniChar; 4953 4954 cnv=ucnv_open("LMBCS-1", &errorCode); 4955 if(U_FAILURE(errorCode)) { 4956 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4957 return; 4958 } 4959 else 4960 { 4961 4962 while(source<limit) { 4963 sourceStart=source; 4964 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode); 4965 if(U_FAILURE(errorCode)) { 4966 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode)); 4967 break; 4968 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) { 4969 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 4970 uniChar, (source-sourceStart), *results, *off); 4971 break; 4972 } 4973 results++; 4974 off++; 4975 } 4976 } 4977 ucnv_close(cnv); 4978 } 4979 { /* test locale & optimization group operations: Unicode to LMBCS */ 4980 4981 UErrorCode errorCode=U_ZERO_ERROR; 4982 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); 4983 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); 4984 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); 4985 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ 4986 const UChar * pUniOut = uniString; 4987 UChar * pUniIn = uniString; 4988 uint8_t lmbcsString [4]; 4989 const char * pLMBCSOut = (const char *)lmbcsString; 4990 char * pLMBCSIn = (char *)lmbcsString; 4991 4992 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */ 4993 ucnv_fromUnicode (cnv16he, 4994 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 4995 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 4996 NULL, 1, &errorCode); 4997 4998 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) 4999 { 5000 log_err("LMBCS-16,locale=he gives unexpected translation\n"); 5001 } 5002 5003 pLMBCSIn= (char *)lmbcsString; 5004 pUniOut = uniString; 5005 ucnv_fromUnicode (cnv01us, 5006 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5007 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5008 NULL, 1, &errorCode); 5009 5010 if (lmbcsString[0] != 0x9F) 5011 { 5012 log_err("LMBCS-1,locale=US gives unexpected translation\n"); 5013 } 5014 5015 /* single byte char from mbcs char set */ 5016 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ 5017 pLMBCSOut = (const char *)lmbcsString; 5018 pUniIn = uniString; 5019 ucnv_toUnicode (cnv16jp, 5020 &pUniIn, pUniIn + 1, 5021 &pLMBCSOut, (pLMBCSOut + 1), 5022 NULL, 1, &errorCode); 5023 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5024 { 5025 log_err("Unexpected results from LMBCS-16 single byte char\n"); 5026 } 5027 /* convert to group 1: should be 3 bytes */ 5028 pLMBCSIn = (char *)lmbcsString; 5029 pUniOut = uniString; 5030 ucnv_fromUnicode (cnv01us, 5031 &pLMBCSIn, (const char *)(pLMBCSIn + 3), 5032 &pUniOut, pUniOut + 1, 5033 NULL, 1, &errorCode); 5034 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1 5035 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE) 5036 { 5037 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); 5038 } 5039 pLMBCSOut = (const char *)lmbcsString; 5040 pUniIn = uniString; 5041 ucnv_toUnicode (cnv01us, 5042 &pUniIn, pUniIn + 1, 5043 &pLMBCSOut, (const char *)(pLMBCSOut + 3), 5044 NULL, 1, &errorCode); 5045 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5046 { 5047 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); 5048 } 5049 pLMBCSIn = (char *)lmbcsString; 5050 pUniOut = uniString; 5051 ucnv_fromUnicode (cnv16jp, 5052 &pLMBCSIn, (const char *)(pLMBCSIn + 1), 5053 &pUniOut, pUniOut + 1, 5054 NULL, 1, &errorCode); 5055 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE) 5056 { 5057 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); 5058 } 5059 ucnv_close(cnv16he); 5060 ucnv_close(cnv16jp); 5061 ucnv_close(cnv01us); 5062 } 5063 { 5064 /* Small source buffer testing, LMBCS -> Unicode */ 5065 5066 UErrorCode errorCode=U_ZERO_ERROR; 5067 5068 const char * pSource = (const char *)pszLMBCS; 5069 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 5070 int codepointCount = 0; 5071 5072 UChar Out [sizeof(pszUnicode) + 1]; 5073 UChar * pOut = Out; 5074 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 5075 5076 5077 cnv = ucnv_open(NAME_LMBCS_1, &errorCode); 5078 if(U_FAILURE(errorCode)) { 5079 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 5080 return; 5081 } 5082 5083 5084 while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) 5085 { 5086 ucnv_toUnicode (cnv, 5087 &pOut, 5088 OutLimit, 5089 &pSource, 5090 (pSource+1), /* claim that this is a 1- byte buffer */ 5091 NULL, 5092 FALSE, /* FALSE means there might be more chars in the next buffer */ 5093 &errorCode); 5094 5095 if (U_SUCCESS (errorCode)) 5096 { 5097 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1]) 5098 { 5099 /* we are on to the next code point: check value */ 5100 5101 if (Out[0] != pszUnicode[codepointCount]){ 5102 log_err("LMBCS->Uni result %lx should have been %lx \n", 5103 Out[0], pszUnicode[codepointCount]); 5104 } 5105 5106 pOut = Out; /* reset for accumulating next code point */ 5107 codepointCount++; 5108 } 5109 } 5110 else 5111 { 5112 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode)); 5113 } 5114 } 5115 { 5116 /* limits & surrogate error testing */ 5117 char LIn [sizeof(pszLMBCS)]; 5118 const char * pLIn = LIn; 5119 5120 char LOut [sizeof(pszLMBCS)]; 5121 char * pLOut = LOut; 5122 5123 UChar UOut [sizeof(pszUnicode)]; 5124 UChar * pUOut = UOut; 5125 5126 UChar UIn [sizeof(pszUnicode)]; 5127 const UChar * pUIn = UIn; 5128 5129 int32_t off [sizeof(offsets)]; 5130 UChar32 uniChar; 5131 5132 errorCode=U_ZERO_ERROR; 5133 5134 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ 5135 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode); 5136 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5137 { 5138 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); 5139 } 5140 errorCode=U_ZERO_ERROR; 5141 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); 5142 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5143 { 5144 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode)); 5145 } 5146 errorCode=U_ZERO_ERROR; 5147 5148 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode); 5149 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5150 { 5151 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode)); 5152 } 5153 errorCode=U_ZERO_ERROR; 5154 5155 /* 0 byte source request - no error, no pointer movement */ 5156 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode); 5157 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); 5158 if(U_FAILURE(errorCode)) { 5159 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode)); 5160 } 5161 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn)) 5162 { 5163 log_err("Unexpected pointer move in 0 byte source request \n"); 5164 } 5165 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ 5166 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); 5167 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) 5168 { 5169 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); 5170 } 5171 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ 5172 { 5173 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n"); 5174 } 5175 errorCode = U_ZERO_ERROR; 5176 5177 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ 5178 5179 pUIn = pszUnicode; 5180 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode); 5181 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) 5182 { 5183 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); 5184 } 5185 5186 errorCode = U_ZERO_ERROR; 5187 5188 pLIn = (const char *)pszLMBCS; 5189 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); 5190 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4]) 5191 { 5192 log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); 5193 } 5194 5195 /* unpaired or chopped LMBCS surrogates */ 5196 5197 /* OK high surrogate, Low surrogate is chopped */ 5198 LIn [0] = (char)0x14; 5199 LIn [1] = (char)0xD8; 5200 LIn [2] = (char)0x01; 5201 LIn [3] = (char)0x14; 5202 LIn [4] = (char)0xDC; 5203 pLIn = LIn; 5204 errorCode = U_ZERO_ERROR; 5205 pUOut = UOut; 5206 5207 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 5208 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5209 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5210 { 5211 log_err("Unexpected results on chopped low surrogate\n"); 5212 } 5213 5214 /* chopped at surrogate boundary */ 5215 LIn [0] = (char)0x14; 5216 LIn [1] = (char)0xD8; 5217 LIn [2] = (char)0x01; 5218 pLIn = LIn; 5219 errorCode = U_ZERO_ERROR; 5220 pUOut = UOut; 5221 5222 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); 5223 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) 5224 { 5225 log_err("Unexpected results on chopped at surrogate boundary \n"); 5226 } 5227 5228 /* unpaired surrogate plus valid Unichar */ 5229 LIn [0] = (char)0x14; 5230 LIn [1] = (char)0xD8; 5231 LIn [2] = (char)0x01; 5232 LIn [3] = (char)0x14; 5233 LIn [4] = (char)0xC9; 5234 LIn [5] = (char)0xD0; 5235 pLIn = LIn; 5236 errorCode = U_ZERO_ERROR; 5237 pUOut = UOut; 5238 5239 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); 5240 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) 5241 { 5242 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); 5243 } 5244 5245 /* unpaired surrogate plus chopped Unichar */ 5246 LIn [0] = (char)0x14; 5247 LIn [1] = (char)0xD8; 5248 LIn [2] = (char)0x01; 5249 LIn [3] = (char)0x14; 5250 LIn [4] = (char)0xC9; 5251 5252 pLIn = LIn; 5253 errorCode = U_ZERO_ERROR; 5254 pUOut = UOut; 5255 5256 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5257 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5258 { 5259 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); 5260 } 5261 5262 /* unpaired surrogate plus valid non-Unichar */ 5263 LIn [0] = (char)0x14; 5264 LIn [1] = (char)0xD8; 5265 LIn [2] = (char)0x01; 5266 LIn [3] = (char)0x0F; 5267 LIn [4] = (char)0x3B; 5268 5269 pLIn = LIn; 5270 errorCode = U_ZERO_ERROR; 5271 pUOut = UOut; 5272 5273 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5274 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) 5275 { 5276 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); 5277 } 5278 5279 /* unpaired surrogate plus chopped non-Unichar */ 5280 LIn [0] = (char)0x14; 5281 LIn [1] = (char)0xD8; 5282 LIn [2] = (char)0x01; 5283 LIn [3] = (char)0x0F; 5284 5285 pLIn = LIn; 5286 errorCode = U_ZERO_ERROR; 5287 pUOut = UOut; 5288 5289 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); 5290 5291 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) 5292 { 5293 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n"); 5294 } 5295 } 5296 } 5297 ucnv_close(cnv); /* final cleanup */ 5298 } 5299 5300 5301 static void TestJitterbug255() 5302 { 5303 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; 5304 const char *testBuffer = (const char *)testBytes; 5305 const char *testEnd = (const char *)testBytes + sizeof(testBytes); 5306 UErrorCode status = U_ZERO_ERROR; 5307 /*UChar32 result;*/ 5308 UConverter *cnv = 0; 5309 5310 cnv = ucnv_open("shift-jis", &status); 5311 if (U_FAILURE(status) || cnv == 0) { 5312 log_data_err("Failed to open the converter for SJIS.\n"); 5313 return; 5314 } 5315 while (testBuffer != testEnd) 5316 { 5317 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); 5318 if (U_FAILURE(status)) 5319 { 5320 log_err("Failed to convert the next UChar for SJIS.\n"); 5321 break; 5322 } 5323 } 5324 ucnv_close(cnv); 5325 } 5326 5327 static void TestEBCDICUS4XML() 5328 { 5329 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; 5330 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; 5331 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; 5332 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; 5333 char target_x[] = {0x00, 0x00, 0x00, 0x00}; 5334 UChar *unicodes = unicodes_x; 5335 const UChar *toUnicodeMaps = toUnicodeMaps_x; 5336 char *target = target_x; 5337 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; 5338 UErrorCode status = U_ZERO_ERROR; 5339 UConverter *cnv = 0; 5340 5341 cnv = ucnv_open("ebcdic-xml-us", &status); 5342 if (U_FAILURE(status) || cnv == 0) { 5343 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); 5344 return; 5345 } 5346 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status); 5347 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) { 5348 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", 5349 u_errorName(status)); 5350 printUSeqErr(unicodes_x, 3); 5351 printUSeqErr(toUnicodeMaps, 3); 5352 } 5353 status = U_ZERO_ERROR; 5354 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status); 5355 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) { 5356 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", 5357 u_errorName(status)); 5358 printSeqErr((const unsigned char*)target_x, 3); 5359 printSeqErr((const unsigned char*)fromUnicodeMaps, 3); 5360 } 5361 ucnv_close(cnv); 5362 } 5363 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ 5364 5365 #if !UCONFIG_NO_COLLATION 5366 5367 static void TestJitterbug981(){ 5368 const UChar* rules; 5369 int32_t rules_length, target_cap, bytes_needed, buff_size; 5370 UErrorCode status = U_ZERO_ERROR; 5371 UConverter *utf8cnv; 5372 UCollator* myCollator; 5373 char *buff; 5374 int numNeeded=0; 5375 utf8cnv = ucnv_open ("utf8", &status); 5376 if(U_FAILURE(status)){ 5377 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status)); 5378 return; 5379 } 5380 myCollator = ucol_open("zh", &status); 5381 if(U_FAILURE(status)){ 5382 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status)); 5383 ucnv_close(utf8cnv); 5384 return; 5385 } 5386 5387 rules = ucol_getRules(myCollator, &rules_length); 5388 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); 5389 buff = malloc(buff_size); 5390 5391 target_cap = 0; 5392 do { 5393 ucnv_reset(utf8cnv); 5394 status = U_ZERO_ERROR; 5395 if(target_cap >= buff_size) { 5396 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size); 5397 break; 5398 } 5399 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, 5400 rules, rules_length, &status); 5401 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5402 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5403 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5404 break; 5405 } 5406 numNeeded = bytes_needed; 5407 } while (status == U_BUFFER_OVERFLOW_ERROR); 5408 ucol_close(myCollator); 5409 ucnv_close(utf8cnv); 5410 free(buff); 5411 } 5412 5413 #endif 5414 5415 static void TestJitterbug1293(){ 5416 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; 5417 char target[256]; 5418 UErrorCode status = U_ZERO_ERROR; 5419 UConverter* conv=NULL; 5420 int32_t target_cap, bytes_needed, numNeeded = 0; 5421 conv = ucnv_open("shift-jis",&status); 5422 if(U_FAILURE(status)){ 5423 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status)); 5424 return; 5425 } 5426 5427 do{ 5428 target_cap =0; 5429 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status); 5430 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5431 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5432 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5433 } 5434 numNeeded = bytes_needed; 5435 } while (status == U_BUFFER_OVERFLOW_ERROR); 5436 if(U_FAILURE(status)){ 5437 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status)); 5438 return; 5439 } 5440 ucnv_close(conv); 5441 } 5442 static void TestJB5275_1(){ 5443 5444 static const char* data = "\x3B\xB3\x0A" /* Easy characters */ 5445 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5446 /* Switch script: */ 5447 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */ 5448 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/ 5449 "\xEF\x40\x3B\xB3\x0A"; 5450 static const UChar expected[] ={ 5451 0x003b, 0x0a15, 0x000a, /* Easy characters */ 5452 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */ 5453 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/ 5454 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/ 5455 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ 5456 }; 5457 5458 UErrorCode status = U_ZERO_ERROR; 5459 UConverter* conv = ucnv_open("iscii-gur", &status); 5460 UChar dest[100] = {'\0'}; 5461 UChar* target = dest; 5462 UChar* targetLimit = dest+100; 5463 const char* source = data; 5464 const char* sourceLimit = data+strlen(data); 5465 const UChar* exp = expected; 5466 5467 if (U_FAILURE(status)) { 5468 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status)); 5469 return; 5470 } 5471 5472 log_verbose("Testing switching back to default script when new line is encountered.\n"); 5473 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5474 if(U_FAILURE(status)){ 5475 log_err("conversion failed: %s \n", u_errorName(status)); 5476 } 5477 targetLimit = target; 5478 target = dest; 5479 printUSeq(target, targetLimit-target); 5480 while(target<targetLimit){ 5481 if(*exp!=*target){ 5482 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5483 } 5484 target++; 5485 exp++; 5486 } 5487 ucnv_close(conv); 5488 } 5489 5490 static void TestJB5275(){ 5491 static const char* data = 5492 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */ 5493 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */ 5494 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */ 5495 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5496 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ 5497 "\xEF\x48\x38\xB3\x0A" /* Kannada test */ 5498 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ 5499 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ 5500 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ 5501 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; 5502 static const UChar expected[] ={ 5503 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */ 5504 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */ 5505 0x0038, 0x0C95, 0x000A, /* Kannada test */ 5506 0x0039, 0x0D15, 0x000A, /* Malayalam test */ 5507 0x003A, 0x0A95, 0x000A, /* Gujarati test */ 5508 0x003B, 0x0A15, 0x000A, /* Punjabi test */ 5509 }; 5510 5511 UErrorCode status = U_ZERO_ERROR; 5512 UConverter* conv = ucnv_open("iscii", &status); 5513 UChar dest[100] = {'\0'}; 5514 UChar* target = dest; 5515 UChar* targetLimit = dest+100; 5516 const char* source = data; 5517 const char* sourceLimit = data+strlen(data); 5518 const UChar* exp = expected; 5519 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5520 if(U_FAILURE(status)){ 5521 log_err("conversion failed: %s \n", u_errorName(status)); 5522 } 5523 targetLimit = target; 5524 target = dest; 5525 5526 printUSeq(target, targetLimit-target); 5527 5528 while(target<targetLimit){ 5529 if(*exp!=*target){ 5530 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5531 } 5532 target++; 5533 exp++; 5534 } 5535 ucnv_close(conv); 5536 } 5537