1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************* 7 * 8 * File CCONVTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Steven R. Loomis 7/8/1999 Adding input buffer test 13 ******************************************************************************** 14 */ 15 #include <stdio.h> 16 #include "cstring.h" 17 #include "unicode/uloc.h" 18 #include "unicode/ucnv.h" 19 #include "unicode/ucnv_err.h" 20 #include "unicode/ucnv_cb.h" 21 #include "cintltst.h" 22 #include "unicode/utypes.h" 23 #include "unicode/ustring.h" 24 #include "unicode/ucol.h" 25 #include "cmemory.h" 26 #include "nucnvtst.h" 27 28 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); 29 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); 30 #if !UCONFIG_NO_COLLATION 31 static void TestJitterbug981(void); 32 #endif 33 static void TestJitterbug1293(void); 34 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; 35 static void TestConverterTypesAndStarters(void); 36 static void TestAmbiguous(void); 37 static void TestSignatureDetection(void); 38 static void TestUTF7(void); 39 static void TestIMAP(void); 40 static void TestUTF8(void); 41 static void TestCESU8(void); 42 static void TestUTF16(void); 43 static void TestUTF16BE(void); 44 static void TestUTF16LE(void); 45 static void TestUTF32(void); 46 static void TestUTF32BE(void); 47 static void TestUTF32LE(void); 48 static void TestLATIN1(void); 49 50 #if !UCONFIG_NO_LEGACY_CONVERSION 51 static void TestSBCS(void); 52 static void TestDBCS(void); 53 static void TestMBCS(void); 54 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 55 static void TestICCRunout(void); 56 #endif 57 58 #ifdef U_ENABLE_GENERIC_ISO_2022 59 static void TestISO_2022(void); 60 #endif 61 62 static void TestISO_2022_JP(void); 63 static void TestISO_2022_JP_1(void); 64 static void TestISO_2022_JP_2(void); 65 static void TestISO_2022_KR(void); 66 static void TestISO_2022_KR_1(void); 67 static void TestISO_2022_CN(void); 68 #if 0 69 /* 70 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 71 */ 72 static void TestISO_2022_CN_EXT(void); 73 #endif 74 static void TestJIS(void); 75 static void TestHZ(void); 76 #endif 77 78 static void TestSCSU(void); 79 80 #if !UCONFIG_NO_LEGACY_CONVERSION 81 static void TestEBCDIC_STATEFUL(void); 82 static void TestGB18030(void); 83 static void TestLMBCS(void); 84 static void TestJitterbug255(void); 85 static void TestEBCDICUS4XML(void); 86 #if 0 87 /* 88 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 89 */ 90 static void TestJitterbug915(void); 91 #endif 92 static void TestISCII(void); 93 94 static void TestCoverageMBCS(void); 95 static void TestJitterbug2346(void); 96 static void TestJitterbug2411(void); 97 static void TestJB5275(void); 98 static void TestJB5275_1(void); 99 static void TestJitterbug6175(void); 100 #endif 101 102 static void TestInBufSizes(void); 103 104 static void TestRoundTrippingAllUTF(void); 105 static void TestConv(const uint16_t in[], 106 int len, 107 const char* conv, 108 const char* lang, 109 char byteArr[], 110 int byteArrLen); 111 112 /* open a converter, using test data if it begins with '@' */ 113 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); 114 115 116 #define NEW_MAX_BUFFER 999 117 118 static int32_t gInBufferSize = NEW_MAX_BUFFER; 119 static int32_t gOutBufferSize = NEW_MAX_BUFFER; 120 static char gNuConvTestName[1024]; 121 122 #define nct_min(x,y) ((x<y) ? x : y) 123 124 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) 125 { 126 if(cnv && cnv[0] == '@') { 127 return ucnv_openPackage(loadTestData(err), cnv+1, err); 128 } else { 129 return ucnv_open(cnv, err); 130 } 131 } 132 133 static void printSeq(const unsigned char* a, int len) 134 { 135 int i=0; 136 log_verbose("{"); 137 while (i<len) 138 log_verbose("0x%02x ", a[i++]); 139 log_verbose("}\n"); 140 } 141 142 static void printUSeq(const UChar* a, int len) 143 { 144 int i=0; 145 log_verbose("{U+"); 146 while (i<len) log_verbose("0x%04x ", a[i++]); 147 log_verbose("}\n"); 148 } 149 150 static void printSeqErr(const unsigned char* a, int len) 151 { 152 int i=0; 153 fprintf(stderr, "{"); 154 while (i<len) 155 fprintf(stderr, "0x%02x ", a[i++]); 156 fprintf(stderr, "}\n"); 157 } 158 159 static void printUSeqErr(const UChar* a, int len) 160 { 161 int i=0; 162 fprintf(stderr, "{U+"); 163 while (i<len) 164 fprintf(stderr, "0x%04x ", a[i++]); 165 fprintf(stderr,"}\n"); 166 } 167 168 static void 169 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message) 170 { 171 const char* s0; 172 const char* s=(char*)source; 173 const int32_t *r=results; 174 UErrorCode errorCode=U_ZERO_ERROR; 175 UChar32 c; 176 177 while(s<limit) { 178 s0=s; 179 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 180 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 181 break; /* no more significant input */ 182 } else if(U_FAILURE(errorCode)) { 183 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 184 break; 185 } else if( 186 /* test the expected number of input bytes only if >=0 */ 187 (*r>=0 && (int32_t)(s-s0)!=*r) || 188 c!=*(r+1) 189 ) { 190 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 191 message, c, (s-s0), *(r+1), *r); 192 break; 193 } 194 r+=2; 195 } 196 } 197 198 static void 199 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message) 200 { 201 const char* s=(char*)source; 202 UErrorCode errorCode=U_ZERO_ERROR; 203 uint32_t c; 204 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 205 if(errorCode != expected){ 206 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode)); 207 } 208 if(c != 0xFFFD && c != 0xffff){ 209 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c); 210 } 211 212 } 213 214 static void TestInBufSizes(void) 215 { 216 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); 217 #if 1 218 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); 219 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); 220 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); 221 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); 222 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); 223 TestNewConvertWithBufferSizes(1,1); 224 TestNewConvertWithBufferSizes(2,3); 225 TestNewConvertWithBufferSizes(3,2); 226 #endif 227 } 228 229 static void TestOutBufSizes(void) 230 { 231 #if 1 232 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); 233 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); 234 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); 235 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); 236 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); 237 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); 238 239 #endif 240 } 241 242 243 void addTestNewConvert(TestNode** root) 244 { 245 #if !UCONFIG_NO_FILE_IO 246 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); 247 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); 248 #endif 249 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); 250 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); 251 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); 252 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); 253 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); 254 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); 255 256 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ 257 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); 258 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); 259 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); 260 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); 261 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); 262 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); 263 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); 264 265 #if !UCONFIG_NO_LEGACY_CONVERSION 266 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); 267 #endif 268 269 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); 270 271 #if !UCONFIG_NO_LEGACY_CONVERSION 272 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); 273 #if !UCONFIG_NO_FILE_IO 274 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); 275 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); 276 #endif 277 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); 278 279 #ifdef U_ENABLE_GENERIC_ISO_2022 280 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); 281 #endif 282 283 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); 284 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); 285 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); 286 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); 287 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); 288 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); 289 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); 290 /* 291 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 292 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); 293 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); 294 */ 295 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); 296 #endif 297 298 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); 299 300 #if !UCONFIG_NO_LEGACY_CONVERSION 301 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); 302 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); 303 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); 304 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); 305 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); 306 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); 307 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); 308 #if !UCONFIG_NO_COLLATION 309 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); 310 #endif 311 312 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); 313 #endif 314 315 316 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 317 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); 318 #endif 319 320 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); 321 322 #if !UCONFIG_NO_LEGACY_CONVERSION 323 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); 324 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); 325 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); 326 #endif 327 328 } 329 330 331 /* Note that this test already makes use of statics, so it's not really 332 multithread safe. 333 This convenience function lets us make the error messages actually useful. 334 */ 335 336 static void setNuConvTestName(const char *codepage, const char *direction) 337 { 338 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 339 codepage, 340 direction, 341 (int)gInBufferSize, 342 (int)gOutBufferSize); 343 } 344 345 typedef enum 346 { 347 TC_OK = 0, /* test was OK */ 348 TC_MISMATCH = 1, /* Match failed - err was printed */ 349 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */ 350 } ETestConvertResult; 351 352 /* Note: This function uses global variables and it will not do offset 353 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 354 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 355 const char *codepage, const int32_t *expectOffsets , UBool useFallback) 356 { 357 UErrorCode status = U_ZERO_ERROR; 358 UConverter *conv = 0; 359 char junkout[NEW_MAX_BUFFER]; /* FIX */ 360 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 361 char *p; 362 const UChar *src; 363 char *end; 364 char *targ; 365 int32_t *offs; 366 int i; 367 int32_t realBufferSize; 368 char *realBufferEnd; 369 const UChar *realSourceEnd; 370 const UChar *sourceLimit; 371 UBool checkOffsets = TRUE; 372 UBool doFlush; 373 374 for(i=0;i<NEW_MAX_BUFFER;i++) 375 junkout[i] = (char)0xF0; 376 for(i=0;i<NEW_MAX_BUFFER;i++) 377 junokout[i] = 0xFF; 378 379 setNuConvTestName(codepage, "FROM"); 380 381 log_verbose("\n========= %s\n", gNuConvTestName); 382 383 conv = my_ucnv_open(codepage, &status); 384 385 if(U_FAILURE(status)) 386 { 387 log_data_err("Couldn't open converter %s\n",codepage); 388 return TC_FAIL; 389 } 390 if(useFallback){ 391 ucnv_setFallback(conv,useFallback); 392 } 393 394 log_verbose("Converter opened..\n"); 395 396 src = source; 397 targ = junkout; 398 offs = junokout; 399 400 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 401 realBufferEnd = junkout + realBufferSize; 402 realSourceEnd = source + sourceLen; 403 404 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 405 checkOffsets = FALSE; 406 407 do 408 { 409 end = nct_min(targ + gOutBufferSize, realBufferEnd); 410 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 411 412 doFlush = (UBool)(sourceLimit == realSourceEnd); 413 414 if(targ == realBufferEnd) { 415 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 416 return TC_FAIL; 417 } 418 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 419 420 421 status = U_ZERO_ERROR; 422 423 ucnv_fromUnicode (conv, 424 &targ, 425 end, 426 &src, 427 sourceLimit, 428 checkOffsets ? offs : NULL, 429 doFlush, /* flush if we're at the end of the input data */ 430 &status); 431 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 432 433 if(U_FAILURE(status)) { 434 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 435 return TC_FAIL; 436 } 437 438 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 439 sourceLen, targ-junkout); 440 441 if(getTestOption(VERBOSITY_OPTION)) 442 { 443 char junk[9999]; 444 char offset_str[9999]; 445 char *ptr; 446 447 junk[0] = 0; 448 offset_str[0] = 0; 449 for(ptr = junkout;ptr<targ;ptr++) { 450 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); 451 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout])); 452 } 453 454 log_verbose(junk); 455 printSeq((const uint8_t *)expect, expectLen); 456 if ( checkOffsets ) { 457 log_verbose("\nOffsets:"); 458 log_verbose(offset_str); 459 } 460 log_verbose("\n"); 461 } 462 ucnv_close(conv); 463 464 if(expectLen != targ-junkout) { 465 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 466 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 467 printf("\nGot:"); 468 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 469 printf("\nExpected:"); 470 printSeqErr((const unsigned char*)expect, expectLen); 471 return TC_MISMATCH; 472 } 473 474 if (checkOffsets && (expectOffsets != 0) ) { 475 log_verbose("comparing %d offsets..\n", targ-junkout); 476 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 477 log_err("did not get the expected offsets. %s\n", gNuConvTestName); 478 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 479 log_err("\n"); 480 log_err("Got : "); 481 for(p=junkout;p<targ;p++) { 482 log_err("%d,", junokout[p-junkout]); 483 } 484 log_err("\n"); 485 log_err("Expected: "); 486 for(i=0; i<(targ-junkout); i++) { 487 log_err("%d,", expectOffsets[i]); 488 } 489 log_err("\n"); 490 } 491 } 492 493 log_verbose("comparing..\n"); 494 if(!memcmp(junkout, expect, expectLen)) { 495 log_verbose("Matches!\n"); 496 return TC_OK; 497 } else { 498 log_err("String does not match u->%s\n", gNuConvTestName); 499 printUSeqErr(source, sourceLen); 500 printf("\nGot:"); 501 printSeqErr((const unsigned char *)junkout, expectLen); 502 printf("\nExpected:"); 503 printSeqErr((const unsigned char *)expect, expectLen); 504 505 return TC_MISMATCH; 506 } 507 } 508 509 /* Note: This function uses global variables and it will not do offset 510 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 511 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 512 const char *codepage, const int32_t *expectOffsets, UBool useFallback) 513 { 514 UErrorCode status = U_ZERO_ERROR; 515 UConverter *conv = 0; 516 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 517 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 518 const char *src; 519 const char *realSourceEnd; 520 const char *srcLimit; 521 UChar *p; 522 UChar *targ; 523 UChar *end; 524 int32_t *offs; 525 int i; 526 UBool checkOffsets = TRUE; 527 528 int32_t realBufferSize; 529 UChar *realBufferEnd; 530 531 532 for(i=0;i<NEW_MAX_BUFFER;i++) 533 junkout[i] = 0xFFFE; 534 535 for(i=0;i<NEW_MAX_BUFFER;i++) 536 junokout[i] = -1; 537 538 setNuConvTestName(codepage, "TO"); 539 540 log_verbose("\n========= %s\n", gNuConvTestName); 541 542 conv = my_ucnv_open(codepage, &status); 543 544 if(U_FAILURE(status)) 545 { 546 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 547 return TC_FAIL; 548 } 549 if(useFallback){ 550 ucnv_setFallback(conv,useFallback); 551 } 552 log_verbose("Converter opened..\n"); 553 554 src = (const char *)source; 555 targ = junkout; 556 offs = junokout; 557 558 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 559 realBufferEnd = junkout + realBufferSize; 560 realSourceEnd = src + sourcelen; 561 562 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 563 checkOffsets = FALSE; 564 565 do 566 { 567 end = nct_min( targ + gOutBufferSize, realBufferEnd); 568 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 569 570 if(targ == realBufferEnd) 571 { 572 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); 573 return TC_FAIL; 574 } 575 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 576 577 /* oldTarg = targ; */ 578 579 status = U_ZERO_ERROR; 580 581 ucnv_toUnicode (conv, 582 &targ, 583 end, 584 &src, 585 srcLimit, 586 checkOffsets ? offs : NULL, 587 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ 588 &status); 589 590 /* offs += (targ-oldTarg); */ 591 592 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 593 594 if(U_FAILURE(status)) 595 { 596 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 597 return TC_FAIL; 598 } 599 600 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 601 sourcelen, targ-junkout); 602 if(getTestOption(VERBOSITY_OPTION)) 603 { 604 char junk[9999]; 605 char offset_str[9999]; 606 UChar *ptr; 607 608 junk[0] = 0; 609 offset_str[0] = 0; 610 611 for(ptr = junkout;ptr<targ;ptr++) 612 { 613 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 614 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 615 } 616 617 log_verbose(junk); 618 printUSeq(expect, expectlen); 619 if ( checkOffsets ) 620 { 621 log_verbose("\nOffsets:"); 622 log_verbose(offset_str); 623 } 624 log_verbose("\n"); 625 } 626 ucnv_close(conv); 627 628 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 629 630 if (checkOffsets && (expectOffsets != 0)) 631 { 632 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 633 log_err("did not get the expected offsets. %s\n",gNuConvTestName); 634 log_err("Got: "); 635 for(p=junkout;p<targ;p++) { 636 log_err("%d,", junokout[p-junkout]); 637 } 638 log_err("\n"); 639 log_err("Expected: "); 640 for(i=0; i<(targ-junkout); i++) { 641 log_err("%d,", expectOffsets[i]); 642 } 643 log_err("\n"); 644 log_err("output: "); 645 for(i=0; i<(targ-junkout); i++) { 646 log_err("%X,", junkout[i]); 647 } 648 log_err("\n"); 649 log_err("input: "); 650 for(i=0; i<(src-(const char *)source); i++) { 651 log_err("%X,", (unsigned char)source[i]); 652 } 653 log_err("\n"); 654 } 655 } 656 657 if(!memcmp(junkout, expect, expectlen*2)) 658 { 659 log_verbose("Matches!\n"); 660 return TC_OK; 661 } 662 else 663 { 664 log_err("String does not match. %s\n", gNuConvTestName); 665 log_verbose("String does not match. %s\n", gNuConvTestName); 666 printf("\nGot:"); 667 printUSeqErr(junkout, expectlen); 668 printf("\nExpected:"); 669 printUSeqErr(expect, expectlen); 670 return TC_MISMATCH; 671 } 672 } 673 674 675 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 676 { 677 /** test chars #1 */ 678 /* 1 2 3 1Han 2Han 3Han . */ 679 static const UChar sampleText[] = 680 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 }; 681 static const UChar sampleTextRoundTripUnmappable[] = 682 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; 683 684 685 static const uint8_t expectedUTF8[] = 686 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; 687 static const int32_t toUTF8Offs[] = 688 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; 689 static const int32_t fmUTF8Offs[] = 690 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e }; 691 692 #ifdef U_ENABLE_GENERIC_ISO_2022 693 /* Same as UTF8, but with ^[%B preceeding */ 694 static const const uint8_t expectedISO2022[] = 695 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 696 static const int32_t toISO2022Offs[] = 697 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 698 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ 699 static const int32_t fmISO2022Offs[] = 700 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */ 701 #endif 702 703 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ 704 static const uint8_t expectedIBM930[] = 705 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f }; 706 static const int32_t toIBM930Offs[] = 707 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 }; 708 static const int32_t fmIBM930Offs[] = 709 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; 710 711 /* 1 2 3 0 h1 h2 h3 . MBCS*/ 712 static const uint8_t expectedIBM943[] = 713 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc }; 714 static const int32_t toIBM943Offs [] = 715 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 }; 716 static const int32_t fmIBM943Offs[] = 717 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; 718 719 /* 1 2 3 0 h1 h2 h3 . DBCS*/ 720 static const uint8_t expectedIBM9027[] = 721 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; 722 static const int32_t toIBM9027Offs [] = 723 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; 724 725 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 726 static const uint8_t expectedIBM920[] = 727 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; 728 static const int32_t toIBM920Offs [] = 729 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 730 731 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 732 static const uint8_t expectedISO88593[] = 733 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 734 static const int32_t toISO88593Offs[] = 735 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 736 737 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ 738 static const uint8_t expectedLATIN1[] = 739 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 740 static const int32_t toLATIN1Offs[] = 741 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 742 743 744 /* etc */ 745 static const uint8_t expectedUTF16BE[] = 746 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; 747 static const int32_t toUTF16BEOffs[]= 748 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 749 static const int32_t fmUTF16BEOffs[] = 750 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 751 752 static const uint8_t expectedUTF16LE[] = 753 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; 754 static const int32_t toUTF16LEOffs[]= 755 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 756 static const int32_t fmUTF16LEOffs[] = 757 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 758 759 static const uint8_t expectedUTF32BE[] = 760 { 0x00, 0x00, 0x00, 0x31, 761 0x00, 0x00, 0x00, 0x32, 762 0x00, 0x00, 0x00, 0x33, 763 0x00, 0x00, 0x00, 0x00, 764 0x00, 0x00, 0x4e, 0x00, 765 0x00, 0x00, 0x4e, 0x8c, 766 0x00, 0x00, 0x4e, 0x09, 767 0x00, 0x00, 0x00, 0x2e, 768 0x00, 0x02, 0x00, 0x21 }; 769 static const int32_t toUTF32BEOffs[]= 770 { 0x00, 0x00, 0x00, 0x00, 771 0x01, 0x01, 0x01, 0x01, 772 0x02, 0x02, 0x02, 0x02, 773 0x03, 0x03, 0x03, 0x03, 774 0x04, 0x04, 0x04, 0x04, 775 0x05, 0x05, 0x05, 0x05, 776 0x06, 0x06, 0x06, 0x06, 777 0x07, 0x07, 0x07, 0x07, 778 0x08, 0x08, 0x08, 0x08, 779 0x08, 0x08, 0x08, 0x08 }; 780 static const int32_t fmUTF32BEOffs[] = 781 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 782 783 static const uint8_t expectedUTF32LE[] = 784 { 0x31, 0x00, 0x00, 0x00, 785 0x32, 0x00, 0x00, 0x00, 786 0x33, 0x00, 0x00, 0x00, 787 0x00, 0x00, 0x00, 0x00, 788 0x00, 0x4e, 0x00, 0x00, 789 0x8c, 0x4e, 0x00, 0x00, 790 0x09, 0x4e, 0x00, 0x00, 791 0x2e, 0x00, 0x00, 0x00, 792 0x21, 0x00, 0x02, 0x00 }; 793 static const int32_t toUTF32LEOffs[]= 794 { 0x00, 0x00, 0x00, 0x00, 795 0x01, 0x01, 0x01, 0x01, 796 0x02, 0x02, 0x02, 0x02, 797 0x03, 0x03, 0x03, 0x03, 798 0x04, 0x04, 0x04, 0x04, 799 0x05, 0x05, 0x05, 0x05, 800 0x06, 0x06, 0x06, 0x06, 801 0x07, 0x07, 0x07, 0x07, 802 0x08, 0x08, 0x08, 0x08, 803 0x08, 0x08, 0x08, 0x08 }; 804 static const int32_t fmUTF32LEOffs[] = 805 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 806 807 808 809 810 /** Test chars #2 **/ 811 812 /* Sahha [health], slashed h's */ 813 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; 814 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; 815 816 /* LMBCS */ 817 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; 818 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; 819 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; 820 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; 821 /*********************************** START OF CODE finally *************/ 822 823 gInBufferSize = insize; 824 gOutBufferSize = outsize; 825 826 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); 827 828 829 /*UTF-8*/ 830 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 831 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); 832 833 log_verbose("Test surrogate behaviour for UTF8\n"); 834 { 835 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; 836 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, 837 0xf0, 0x90, 0x90, 0x81, 838 0xef, 0xbf, 0xbd 839 }; 840 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; 841 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), 842 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); 843 844 845 } 846 847 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 848 /*ISO-2022*/ 849 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 850 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); 851 #endif 852 853 /*UTF16 LE*/ 854 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 855 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); 856 /*UTF16 BE*/ 857 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 858 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); 859 /*UTF32 LE*/ 860 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 861 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); 862 /*UTF32 BE*/ 863 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 864 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); 865 866 /*LATIN_1*/ 867 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 868 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); 869 870 #if !UCONFIG_NO_LEGACY_CONVERSION 871 /*EBCDIC_STATEFUL*/ 872 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 873 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); 874 875 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 876 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 877 878 /*MBCS*/ 879 880 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 881 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); 882 /*DBCS*/ 883 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 884 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); 885 /*SBCS*/ 886 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 887 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); 888 /*SBCS*/ 889 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 890 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 891 #endif 892 893 894 /****/ 895 896 /*UTF-8*/ 897 testConvertToU(expectedUTF8, sizeof(expectedUTF8), 898 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE); 899 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 900 /*ISO-2022*/ 901 testConvertToU(expectedISO2022, sizeof(expectedISO2022), 902 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE); 903 #endif 904 905 /*UTF16 LE*/ 906 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 907 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 908 /*UTF16 BE*/ 909 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), 910 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE); 911 /*UTF32 LE*/ 912 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), 913 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE); 914 /*UTF32 BE*/ 915 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), 916 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE); 917 918 #if !UCONFIG_NO_LEGACY_CONVERSION 919 /*EBCDIC_STATEFUL*/ 920 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable, 921 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE); 922 /*MBCS*/ 923 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable, 924 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE); 925 #endif 926 927 /* Try it again to make sure it still works */ 928 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 929 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 930 931 #if !UCONFIG_NO_LEGACY_CONVERSION 932 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), 933 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE); 934 935 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), 936 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); 937 938 /*LMBCS*/ 939 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), 940 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); 941 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), 942 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE); 943 #endif 944 945 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ 946 { 947 /* encode directly set D and set O */ 948 static const uint8_t utf7[] = { 949 /* 950 Hi Mom -+Jjo--! 951 A+ImIDkQ. 952 +- 953 +ZeVnLIqe 954 */ 955 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 956 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 957 0x2b, 0x2d, 958 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 959 }; 960 static const UChar unicode[] = { 961 /* 962 Hi Mom -<WHITE SMILING FACE>-! 963 A<NOT IDENTICAL TO><ALPHA>. 964 + 965 [Japanese word "nihongo"] 966 */ 967 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 968 0x41, 0x2262, 0x0391, 0x2e, 969 0x2b, 970 0x65e5, 0x672c, 0x8a9e 971 }; 972 static const int32_t toUnicodeOffsets[] = { 973 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 974 15, 17, 19, 23, 975 24, 976 27, 29, 32 977 }; 978 static const int32_t fromUnicodeOffsets[] = { 979 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 980 11, 12, 12, 12, 13, 13, 13, 13, 14, 981 15, 15, 982 16, 16, 16, 17, 17, 17, 18, 18, 18 983 }; 984 985 /* same but escaping set O (the exclamation mark) */ 986 static const uint8_t utf7Restricted[] = { 987 /* 988 Hi Mom -+Jjo--+ACE- 989 A+ImIDkQ. 990 +- 991 +ZeVnLIqe 992 */ 993 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, 994 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 995 0x2b, 0x2d, 996 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 997 }; 998 static const int32_t toUnicodeOffsetsR[] = { 999 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, 1000 19, 21, 23, 27, 1001 28, 1002 31, 33, 36 1003 }; 1004 static const int32_t fromUnicodeOffsetsR[] = { 1005 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, 1006 11, 12, 12, 12, 13, 13, 13, 13, 14, 1007 15, 15, 1008 16, 16, 16, 17, 17, 17, 18, 18, 18 1009 }; 1010 1011 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); 1012 1013 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE); 1014 1015 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); 1016 1017 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); 1018 } 1019 1020 /* 1021 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, 1022 * modified according to RFC 2060, 1023 * and supplemented with the one example in RFC 2060 itself. 1024 */ 1025 { 1026 static const uint8_t imap[] = { 1027 /* Hi Mom -&Jjo--! 1028 A&ImIDkQ-. 1029 &- 1030 &ZeVnLIqe- 1031 \ 1032 ~peter 1033 /mail 1034 /&ZeVnLIqe- 1035 /&U,BTFw- 1036 */ 1037 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 1038 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, 1039 0x26, 0x2d, 1040 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1041 0x5c, 1042 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1043 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1044 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1045 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d 1046 }; 1047 static const UChar unicode[] = { 1048 /* Hi Mom -<WHITE SMILING FACE>-! 1049 A<NOT IDENTICAL TO><ALPHA>. 1050 & 1051 [Japanese word "nihongo"] 1052 \ 1053 ~peter 1054 /mail 1055 /<65e5, 672c, 8a9e> 1056 /<53f0, 5317> 1057 */ 1058 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 1059 0x41, 0x2262, 0x0391, 0x2e, 1060 0x26, 1061 0x65e5, 0x672c, 0x8a9e, 1062 0x5c, 1063 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1064 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1065 0x2f, 0x65e5, 0x672c, 0x8a9e, 1066 0x2f, 0x53f0, 0x5317 1067 }; 1068 static const int32_t toUnicodeOffsets[] = { 1069 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 1070 15, 17, 19, 24, 1071 25, 1072 28, 30, 33, 1073 37, 1074 38, 39, 40, 41, 42, 43, 1075 44, 45, 46, 47, 48, 1076 49, 51, 53, 56, 1077 60, 62, 64 1078 }; 1079 static const int32_t fromUnicodeOffsets[] = { 1080 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 1081 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, 1082 15, 15, 1083 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 1084 19, 1085 20, 21, 22, 23, 24, 25, 1086 26, 27, 28, 29, 30, 1087 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 1088 35, 36, 36, 36, 37, 37, 37, 37, 37 1089 }; 1090 1091 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); 1092 1093 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); 1094 } 1095 1096 /* Test UTF-8 bad data handling*/ 1097 { 1098 static const uint8_t utf8[]={ 1099 0x61, 1100 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1101 0x00, 1102 0x62, 1103 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1104 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1105 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ 1106 0xdf, 0xbf, /* 7ff */ 1107 0xbf, /* truncated tail */ 1108 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ 1109 0x02 1110 }; 1111 1112 static const uint16_t utf8Expected[]={ 1113 0x0061, 1114 0xfffd, 1115 0x0000, 1116 0x0062, 1117 0xfffd, 1118 0xfffd, 1119 0xdbff, 0xdfff, 1120 0x07ff, 1121 0xfffd, 1122 0xfffd, 1123 0x0002 1124 }; 1125 1126 static const int32_t utf8Offsets[]={ 1127 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 1128 }; 1129 testConvertToU(utf8, sizeof(utf8), 1130 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE); 1131 1132 } 1133 1134 /* Test UTF-32BE bad data handling*/ 1135 { 1136 static const uint8_t utf32[]={ 1137 0x00, 0x00, 0x00, 0x61, 1138 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 1139 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1140 0x00, 0x00, 0x00, 0x62, 1141 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1142 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 1143 0x00, 0x00, 0x01, 0x62, 1144 0x00, 0x00, 0x02, 0x62 1145 }; 1146 static const uint16_t utf32Expected[]={ 1147 0x0061, 1148 0xfffd, /* 0x110000 out of range */ 1149 0xDBFF, /* 0x10FFFF in range */ 1150 0xDFFF, 1151 0x0062, 1152 0xfffd, /* 0xffffffff out of range */ 1153 0xfffd, /* 0x7fffffff out of range */ 1154 0x0162, 1155 0x0262 1156 }; 1157 static const int32_t utf32Offsets[]={ 1158 0, 4, 8, 8, 12, 16, 20, 24, 28 1159 }; 1160 static const uint8_t utf32ExpectedBack[]={ 1161 0x00, 0x00, 0x00, 0x61, 1162 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ 1163 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1164 0x00, 0x00, 0x00, 0x62, 1165 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ 1166 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ 1167 0x00, 0x00, 0x01, 0x62, 1168 0x00, 0x00, 0x02, 0x62 1169 }; 1170 static const int32_t utf32OffsetsBack[]={ 1171 0,0,0,0, 1172 1,1,1,1, 1173 2,2,2,2, 1174 4,4,4,4, 1175 5,5,5,5, 1176 6,6,6,6, 1177 7,7,7,7, 1178 8,8,8,8 1179 }; 1180 1181 testConvertToU(utf32, sizeof(utf32), 1182 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE); 1183 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1184 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE); 1185 } 1186 1187 /* Test UTF-32LE bad data handling*/ 1188 { 1189 static const uint8_t utf32[]={ 1190 0x61, 0x00, 0x00, 0x00, 1191 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 1192 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1193 0x62, 0x00, 0x00, 0x00, 1194 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1195 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 1196 0x62, 0x01, 0x00, 0x00, 1197 0x62, 0x02, 0x00, 0x00, 1198 }; 1199 1200 static const uint16_t utf32Expected[]={ 1201 0x0061, 1202 0xfffd, /* 0x110000 out of range */ 1203 0xDBFF, /* 0x10FFFF in range */ 1204 0xDFFF, 1205 0x0062, 1206 0xfffd, /* 0xffffffff out of range */ 1207 0xfffd, /* 0x7fffffff out of range */ 1208 0x0162, 1209 0x0262 1210 }; 1211 static const int32_t utf32Offsets[]={ 1212 0, 4, 8, 8, 12, 16, 20, 24, 28 1213 }; 1214 static const uint8_t utf32ExpectedBack[]={ 1215 0x61, 0x00, 0x00, 0x00, 1216 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ 1217 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1218 0x62, 0x00, 0x00, 0x00, 1219 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ 1220 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ 1221 0x62, 0x01, 0x00, 0x00, 1222 0x62, 0x02, 0x00, 0x00 1223 }; 1224 static const int32_t utf32OffsetsBack[]={ 1225 0,0,0,0, 1226 1,1,1,1, 1227 2,2,2,2, 1228 4,4,4,4, 1229 5,5,5,5, 1230 6,6,6,6, 1231 7,7,7,7, 1232 8,8,8,8 1233 }; 1234 testConvertToU(utf32, sizeof(utf32), 1235 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE ); 1236 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1237 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE); 1238 } 1239 } 1240 1241 static void TestCoverageMBCS(){ 1242 #if 0 1243 UErrorCode status = U_ZERO_ERROR; 1244 const char *directory = loadTestData(&status); 1245 char* tdpath = NULL; 1246 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); 1247 int len = strlen(directory); 1248 char* index=NULL; 1249 1250 tdpath = (char*) malloc(sizeof(char) * (len * 2)); 1251 uprv_strcpy(saveDirectory,u_getDataDirectory()); 1252 log_verbose("Retrieved data directory %s \n",saveDirectory); 1253 uprv_strcpy(tdpath,directory); 1254 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); 1255 1256 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ 1257 *(index+1)=0; 1258 } 1259 u_setDataDirectory(tdpath); 1260 log_verbose("ICU data directory is set to: %s \n" ,tdpath); 1261 #endif 1262 1263 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm 1264 which is test file for MBCS conversion with single-byte codepage data.*/ 1265 { 1266 1267 /* MBCS with single byte codepage data test1.ucm*/ 1268 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; 1269 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; 1270 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; 1271 1272 /*from Unicode*/ 1273 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1274 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); 1275 } 1276 1277 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm 1278 which is test file for MBCS conversion with three-byte codepage data.*/ 1279 { 1280 1281 /* MBCS with three byte codepage data test3.ucm*/ 1282 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1283 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; 1284 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; 1285 1286 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; 1287 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1288 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; 1289 1290 /*from Unicode*/ 1291 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1292 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); 1293 1294 /*to Unicode*/ 1295 testConvertToU(test3input, sizeof(test3input), 1296 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE); 1297 1298 } 1299 1300 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm 1301 which is test file for MBCS conversion with four-byte codepage data.*/ 1302 { 1303 1304 /* MBCS with three byte codepage data test4.ucm*/ 1305 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1306 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; 1307 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; 1308 1309 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; 1310 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1311 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; 1312 1313 /*from Unicode*/ 1314 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1315 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); 1316 1317 /*to Unicode*/ 1318 testConvertToU(test4input, sizeof(test4input), 1319 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE ); 1320 1321 } 1322 #if 0 1323 free(tdpath); 1324 /* restore the original data directory */ 1325 log_verbose("Setting the data directory to %s \n", saveDirectory); 1326 u_setDataDirectory(saveDirectory); 1327 free(saveDirectory); 1328 #endif 1329 1330 } 1331 1332 static void TestConverterType(const char *convName, UConverterType convType) { 1333 UConverter* myConverter; 1334 UErrorCode err = U_ZERO_ERROR; 1335 1336 myConverter = my_ucnv_open(convName, &err); 1337 1338 if (U_FAILURE(err)) { 1339 log_data_err("Failed to create an %s converter\n", convName); 1340 return; 1341 } 1342 else 1343 { 1344 if (ucnv_getType(myConverter)!=convType) { 1345 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", 1346 convName, convType); 1347 } 1348 else { 1349 log_verbose("ucnv_getType %s ok\n", convName); 1350 } 1351 } 1352 ucnv_close(myConverter); 1353 } 1354 1355 static void TestConverterTypesAndStarters() 1356 { 1357 #if !UCONFIG_NO_LEGACY_CONVERSION 1358 UConverter* myConverter; 1359 UErrorCode err = U_ZERO_ERROR; 1360 UBool mystarters[256]; 1361 1362 /* const UBool expectedKSCstarters[256] = { 1363 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1364 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1365 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1366 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1367 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1368 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1369 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1370 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1371 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1372 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1373 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1374 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1375 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1376 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1377 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1378 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1379 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1380 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, 1381 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1382 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1383 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1384 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1385 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1386 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1387 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1388 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ 1389 1390 1391 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); 1392 1393 myConverter = ucnv_open("ksc", &err); 1394 if (U_FAILURE(err)) { 1395 log_data_err("Failed to create an ibm-ksc converter\n"); 1396 return; 1397 } 1398 else 1399 { 1400 if (ucnv_getType(myConverter)!=UCNV_MBCS) 1401 log_err("ucnv_getType Failed for ibm-949\n"); 1402 else 1403 log_verbose("ucnv_getType ibm-949 ok\n"); 1404 1405 if(myConverter!=NULL) 1406 ucnv_getStarters(myConverter, mystarters, &err); 1407 1408 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) 1409 log_err("Failed ucnv_getStarters for ksc\n"); 1410 else 1411 log_verbose("ucnv_getStarters ok\n");*/ 1412 1413 } 1414 ucnv_close(myConverter); 1415 1416 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); 1417 TestConverterType("ibm-878", UCNV_SBCS); 1418 #endif 1419 1420 TestConverterType("iso-8859-1", UCNV_LATIN_1); 1421 1422 TestConverterType("ibm-1208", UCNV_UTF8); 1423 1424 TestConverterType("utf-8", UCNV_UTF8); 1425 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); 1426 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); 1427 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); 1428 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); 1429 1430 #if !UCONFIG_NO_LEGACY_CONVERSION 1431 1432 #if defined(U_ENABLE_GENERIC_ISO_2022) 1433 TestConverterType("iso-2022", UCNV_ISO_2022); 1434 #endif 1435 1436 TestConverterType("hz", UCNV_HZ); 1437 #endif 1438 1439 TestConverterType("scsu", UCNV_SCSU); 1440 1441 #if !UCONFIG_NO_LEGACY_CONVERSION 1442 TestConverterType("x-iscii-de", UCNV_ISCII); 1443 #endif 1444 1445 TestConverterType("ascii", UCNV_US_ASCII); 1446 TestConverterType("utf-7", UCNV_UTF7); 1447 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); 1448 TestConverterType("bocu-1", UCNV_BOCU1); 1449 } 1450 1451 static void 1452 TestAmbiguousConverter(UConverter *cnv) { 1453 static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; 1454 UChar outUnicode[20]={ 0, 0, 0, 0 }; 1455 1456 const char *s; 1457 UChar *u; 1458 UErrorCode errorCode; 1459 UBool isAmbiguous; 1460 1461 /* try to convert an 'a', a square bracket and a US-ASCII backslash */ 1462 errorCode=U_ZERO_ERROR; 1463 s=inBytes; 1464 u=outUnicode; 1465 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); 1466 if(U_FAILURE(errorCode)) { 1467 /* we do not care about general failures in this test; the input may just not be mappable */ 1468 return; 1469 } 1470 1471 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { 1472 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ 1473 /* There are some encodings that are partially ASCII based, 1474 like the ISO-7 and GSM series of codepages, which we ignore. */ 1475 return; 1476 } 1477 1478 isAmbiguous=ucnv_isAmbiguous(cnv); 1479 1480 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ 1481 if((outUnicode[2]!=0x5c)!=isAmbiguous) { 1482 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", 1483 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); 1484 return; 1485 } 1486 1487 if(outUnicode[2]!=0x5c) { 1488 /* needs fixup, fix it */ 1489 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); 1490 if(outUnicode[2]!=0x5c) { 1491 /* the fix failed */ 1492 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); 1493 return; 1494 } 1495 } 1496 } 1497 1498 static void TestAmbiguous() 1499 { 1500 UErrorCode status = U_ZERO_ERROR; 1501 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; 1502 static const char target[] = { 1503 /* "\\usr\\local\\share\\data\\icutest.txt" */ 1504 0x5c, 0x75, 0x73, 0x72, 1505 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 1506 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, 1507 0x5c, 0x64, 0x61, 0x74, 0x61, 1508 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, 1509 0 1510 }; 1511 UChar asciiResult[200], sjisResult[200]; 1512 int32_t /*asciiLength = 0,*/ sjisLength = 0, i; 1513 const char *name; 1514 1515 /* enumerate all converters */ 1516 status=U_ZERO_ERROR; 1517 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { 1518 cnv=ucnv_open(name, &status); 1519 if(U_SUCCESS(status)) { 1520 TestAmbiguousConverter(cnv); 1521 ucnv_close(cnv); 1522 } else { 1523 log_err("error: unable to open available converter \"%s\"\n", name); 1524 status=U_ZERO_ERROR; 1525 } 1526 } 1527 1528 #if !UCONFIG_NO_LEGACY_CONVERSION 1529 sjis_cnv = ucnv_open("ibm-943", &status); 1530 if (U_FAILURE(status)) 1531 { 1532 log_data_err("Failed to create a SJIS converter\n"); 1533 return; 1534 } 1535 ascii_cnv = ucnv_open("LATIN-1", &status); 1536 if (U_FAILURE(status)) 1537 { 1538 log_data_err("Failed to create a LATIN-1 converter\n"); 1539 ucnv_close(sjis_cnv); 1540 return; 1541 } 1542 /* convert target from SJIS to Unicode */ 1543 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1544 if (U_FAILURE(status)) 1545 { 1546 log_err("Failed to convert the SJIS string.\n"); 1547 ucnv_close(sjis_cnv); 1548 ucnv_close(ascii_cnv); 1549 return; 1550 } 1551 /* convert target from Latin-1 to Unicode */ 1552 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1553 if (U_FAILURE(status)) 1554 { 1555 log_err("Failed to convert the Latin-1 string.\n"); 1556 ucnv_close(sjis_cnv); 1557 ucnv_close(ascii_cnv); 1558 return; 1559 } 1560 if (!ucnv_isAmbiguous(sjis_cnv)) 1561 { 1562 log_err("SJIS converter should contain ambiguous character mappings.\n"); 1563 ucnv_close(sjis_cnv); 1564 ucnv_close(ascii_cnv); 1565 return; 1566 } 1567 if (u_strcmp(sjisResult, asciiResult) == 0) 1568 { 1569 log_err("File separators for SJIS don't need to be fixed.\n"); 1570 } 1571 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); 1572 if (u_strcmp(sjisResult, asciiResult) != 0) 1573 { 1574 log_err("Fixing file separator for SJIS failed.\n"); 1575 } 1576 ucnv_close(sjis_cnv); 1577 ucnv_close(ascii_cnv); 1578 #endif 1579 } 1580 1581 static void 1582 TestSignatureDetection(){ 1583 /* with null terminated strings */ 1584 { 1585 static const char* data[] = { 1586 "\xFE\xFF\x00\x00", /* UTF-16BE */ 1587 "\xFF\xFE\x00\x00", /* UTF-16LE */ 1588 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1589 "\x0E\xFE\xFF\x00", /* SCSU */ 1590 1591 "\xFE\xFF", /* UTF-16BE */ 1592 "\xFF\xFE", /* UTF-16LE */ 1593 "\xEF\xBB\xBF", /* UTF-8 */ 1594 "\x0E\xFE\xFF", /* SCSU */ 1595 1596 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1597 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1598 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1599 "\x0E\xFE\xFF\x41", /* SCSU */ 1600 1601 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ 1602 "\x2B\x2F\x76\x38\x41", /* UTF-7 */ 1603 "\x2B\x2F\x76\x39\x41", /* UTF-7 */ 1604 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ 1605 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ 1606 1607 "\xDD\x73\x66\x73" /* UTF-EBCDIC */ 1608 }; 1609 static const char* expected[] = { 1610 "UTF-16BE", 1611 "UTF-16LE", 1612 "UTF-8", 1613 "SCSU", 1614 1615 "UTF-16BE", 1616 "UTF-16LE", 1617 "UTF-8", 1618 "SCSU", 1619 1620 "UTF-16BE", 1621 "UTF-16LE", 1622 "UTF-8", 1623 "SCSU", 1624 1625 "UTF-7", 1626 "UTF-7", 1627 "UTF-7", 1628 "UTF-7", 1629 "UTF-7", 1630 "UTF-EBCDIC" 1631 }; 1632 static const int32_t expectedLength[] ={ 1633 2, 1634 2, 1635 3, 1636 3, 1637 1638 2, 1639 2, 1640 3, 1641 3, 1642 1643 2, 1644 2, 1645 3, 1646 3, 1647 1648 5, 1649 4, 1650 4, 1651 4, 1652 4, 1653 4 1654 }; 1655 int i=0; 1656 UErrorCode err; 1657 int32_t signatureLength = -1; 1658 const char* source = NULL; 1659 const char* enc = NULL; 1660 for( ; i<sizeof(data)/sizeof(char*); i++){ 1661 err = U_ZERO_ERROR; 1662 source = data[i]; 1663 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err); 1664 if(U_FAILURE(err)){ 1665 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1666 continue; 1667 } 1668 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1669 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1670 continue; 1671 } 1672 if(signatureLength != expectedLength[i]){ 1673 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1674 } 1675 } 1676 } 1677 { 1678 static const char* data[] = { 1679 "\xFE\xFF\x00", /* UTF-16BE */ 1680 "\xFF\xFE\x00", /* UTF-16LE */ 1681 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1682 "\x0E\xFE\xFF\x00", /* SCSU */ 1683 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1684 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1685 "\xFE\xFF", /* UTF-16BE */ 1686 "\xFF\xFE", /* UTF-16LE */ 1687 "\xEF\xBB\xBF", /* UTF-8 */ 1688 "\x0E\xFE\xFF", /* SCSU */ 1689 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1690 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1691 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1692 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1693 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1694 "\x0E\xFE\xFF\x41", /* SCSU */ 1695 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ 1696 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ 1697 "\xFB\xEE\x28", /* BOCU-1 */ 1698 "\xFF\x41\x42" /* NULL */ 1699 }; 1700 static const int len[] = { 1701 3, 1702 3, 1703 4, 1704 4, 1705 4, 1706 4, 1707 2, 1708 2, 1709 3, 1710 3, 1711 4, 1712 4, 1713 4, 1714 4, 1715 4, 1716 4, 1717 5, 1718 5, 1719 3, 1720 3 1721 }; 1722 1723 static const char* expected[] = { 1724 "UTF-16BE", 1725 "UTF-16LE", 1726 "UTF-8", 1727 "SCSU", 1728 "UTF-32BE", 1729 "UTF-32LE", 1730 "UTF-16BE", 1731 "UTF-16LE", 1732 "UTF-8", 1733 "SCSU", 1734 "UTF-32BE", 1735 "UTF-32LE", 1736 "UTF-16BE", 1737 "UTF-16LE", 1738 "UTF-8", 1739 "SCSU", 1740 "UTF-32BE", 1741 "UTF-32LE", 1742 "BOCU-1", 1743 NULL 1744 }; 1745 static const int32_t expectedLength[] ={ 1746 2, 1747 2, 1748 3, 1749 3, 1750 4, 1751 4, 1752 2, 1753 2, 1754 3, 1755 3, 1756 4, 1757 4, 1758 2, 1759 2, 1760 3, 1761 3, 1762 4, 1763 4, 1764 3, 1765 0 1766 }; 1767 int i=0; 1768 UErrorCode err; 1769 int32_t signatureLength = -1; 1770 int32_t sourceLength=-1; 1771 const char* source = NULL; 1772 const char* enc = NULL; 1773 for( ; i<sizeof(data)/sizeof(char*); i++){ 1774 err = U_ZERO_ERROR; 1775 source = data[i]; 1776 sourceLength = len[i]; 1777 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err); 1778 if(U_FAILURE(err)){ 1779 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1780 continue; 1781 } 1782 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1783 if(expected[i] !=NULL){ 1784 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1785 continue; 1786 } 1787 } 1788 if(signatureLength != expectedLength[i]){ 1789 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1790 } 1791 } 1792 } 1793 } 1794 1795 static void TestUTF7() { 1796 /* test input */ 1797 static const uint8_t in[]={ 1798 /* H - +Jjo- - ! +- +2AHcAQ */ 1799 0x48, 1800 0x2d, 1801 0x2b, 0x4a, 0x6a, 0x6f, 1802 0x2d, 0x2d, 1803 0x21, 1804 0x2b, 0x2d, 1805 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 1806 }; 1807 1808 /* expected test results */ 1809 static const int32_t results[]={ 1810 /* number of bytes read, code point */ 1811 1, 0x48, 1812 1, 0x2d, 1813 4, 0x263a, /* <WHITE SMILING FACE> */ 1814 2, 0x2d, 1815 1, 0x21, 1816 2, 0x2b, 1817 7, 0x10401 1818 }; 1819 1820 const char *cnvName; 1821 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1822 UErrorCode errorCode=U_ZERO_ERROR; 1823 UConverter *cnv=ucnv_open("UTF-7", &errorCode); 1824 if(U_FAILURE(errorCode)) { 1825 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1826 return; 1827 } 1828 TestNextUChar(cnv, source, limit, results, "UTF-7"); 1829 /* Test the condition when source >= sourceLimit */ 1830 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1831 cnvName = ucnv_getName(cnv, &errorCode); 1832 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { 1833 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1834 } 1835 ucnv_close(cnv); 1836 } 1837 1838 static void TestIMAP() { 1839 /* test input */ 1840 static const uint8_t in[]={ 1841 /* H - &Jjo- - ! &- &2AHcAQ- \ */ 1842 0x48, 1843 0x2d, 1844 0x26, 0x4a, 0x6a, 0x6f, 1845 0x2d, 0x2d, 1846 0x21, 1847 0x26, 0x2d, 1848 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d 1849 }; 1850 1851 /* expected test results */ 1852 static const int32_t results[]={ 1853 /* number of bytes read, code point */ 1854 1, 0x48, 1855 1, 0x2d, 1856 4, 0x263a, /* <WHITE SMILING FACE> */ 1857 2, 0x2d, 1858 1, 0x21, 1859 2, 0x26, 1860 7, 0x10401 1861 }; 1862 1863 const char *cnvName; 1864 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1865 UErrorCode errorCode=U_ZERO_ERROR; 1866 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); 1867 if(U_FAILURE(errorCode)) { 1868 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1869 return; 1870 } 1871 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); 1872 /* Test the condition when source >= sourceLimit */ 1873 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1874 cnvName = ucnv_getName(cnv, &errorCode); 1875 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { 1876 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1877 } 1878 ucnv_close(cnv); 1879 } 1880 1881 static void TestUTF8() { 1882 /* test input */ 1883 static const uint8_t in[]={ 1884 0x61, 1885 0xc2, 0x80, 1886 0xe0, 0xa0, 0x80, 1887 0xf0, 0x90, 0x80, 0x80, 1888 0xf4, 0x84, 0x8c, 0xa1, 1889 0xf0, 0x90, 0x90, 0x81 1890 }; 1891 1892 /* expected test results */ 1893 static const int32_t results[]={ 1894 /* number of bytes read, code point */ 1895 1, 0x61, 1896 2, 0x80, 1897 3, 0x800, 1898 4, 0x10000, 1899 4, 0x104321, 1900 4, 0x10401 1901 }; 1902 1903 /* error test input */ 1904 static const uint8_t in2[]={ 1905 0x61, 1906 0xc0, 0x80, /* illegal non-shortest form */ 1907 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1908 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1909 0xc0, 0xc0, /* illegal trail byte */ 1910 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1911 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1912 0xfe, /* illegal byte altogether */ 1913 0x62 1914 }; 1915 1916 /* expected error test results */ 1917 static const int32_t results2[]={ 1918 /* number of bytes read, code point */ 1919 1, 0x61, 1920 22, 0x62 1921 }; 1922 1923 UConverterToUCallback cb; 1924 const void *p; 1925 1926 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 1927 UErrorCode errorCode=U_ZERO_ERROR; 1928 UConverter *cnv=ucnv_open("UTF-8", &errorCode); 1929 if(U_FAILURE(errorCode)) { 1930 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); 1931 return; 1932 } 1933 TestNextUChar(cnv, source, limit, results, "UTF-8"); 1934 /* Test the condition when source >= sourceLimit */ 1935 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1936 1937 /* test error behavior with a skip callback */ 1938 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 1939 source=(const char *)in2; 1940 limit=(const char *)(in2+sizeof(in2)); 1941 TestNextUChar(cnv, source, limit, results2, "UTF-8"); 1942 1943 ucnv_close(cnv); 1944 } 1945 1946 static void TestCESU8() { 1947 /* test input */ 1948 static const uint8_t in[]={ 1949 0x61, 1950 0xc2, 0x80, 1951 0xe0, 0xa0, 0x80, 1952 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 1953 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, 1954 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1955 0xef, 0xbf, 0xbc 1956 }; 1957 1958 /* expected test results */ 1959 static const int32_t results[]={ 1960 /* number of bytes read, code point */ 1961 1, 0x61, 1962 2, 0x80, 1963 3, 0x800, 1964 6, 0x10000, 1965 3, 0xdc01, 1966 -1,0xd802, /* may read 3 or 6 bytes */ 1967 -1,0x10ffff,/* may read 0 or 3 bytes */ 1968 3, 0xfffc 1969 }; 1970 1971 /* error test input */ 1972 static const uint8_t in2[]={ 1973 0x61, 1974 0xc0, 0x80, /* illegal non-shortest form */ 1975 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1976 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1977 0xc0, 0xc0, /* illegal trail byte */ 1978 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ 1979 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ 1980 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ 1981 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1982 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1983 0xfe, /* illegal byte altogether */ 1984 0x62 1985 }; 1986 1987 /* expected error test results */ 1988 static const int32_t results2[]={ 1989 /* number of bytes read, code point */ 1990 1, 0x61, 1991 34, 0x62 1992 }; 1993 1994 UConverterToUCallback cb; 1995 const void *p; 1996 1997 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 1998 UErrorCode errorCode=U_ZERO_ERROR; 1999 UConverter *cnv=ucnv_open("CESU-8", &errorCode); 2000 if(U_FAILURE(errorCode)) { 2001 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); 2002 return; 2003 } 2004 TestNextUChar(cnv, source, limit, results, "CESU-8"); 2005 /* Test the condition when source >= sourceLimit */ 2006 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2007 2008 /* test error behavior with a skip callback */ 2009 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2010 source=(const char *)in2; 2011 limit=(const char *)(in2+sizeof(in2)); 2012 TestNextUChar(cnv, source, limit, results2, "CESU-8"); 2013 2014 ucnv_close(cnv); 2015 } 2016 2017 static void TestUTF16() { 2018 /* test input */ 2019 static const uint8_t in1[]={ 2020 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff 2021 }; 2022 static const uint8_t in2[]={ 2023 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff 2024 }; 2025 static const uint8_t in3[]={ 2026 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 2027 }; 2028 2029 /* expected test results */ 2030 static const int32_t results1[]={ 2031 /* number of bytes read, code point */ 2032 4, 0x4e00, 2033 2, 0xfeff 2034 }; 2035 static const int32_t results2[]={ 2036 /* number of bytes read, code point */ 2037 4, 0x004e, 2038 2, 0xfffe 2039 }; 2040 static const int32_t results3[]={ 2041 /* number of bytes read, code point */ 2042 2, 0xfefe, 2043 2, 0x4e00, 2044 2, 0xfeff, 2045 4, 0x20001 2046 }; 2047 2048 const char *source, *limit; 2049 2050 UErrorCode errorCode=U_ZERO_ERROR; 2051 UConverter *cnv=ucnv_open("UTF-16", &errorCode); 2052 if(U_FAILURE(errorCode)) { 2053 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); 2054 return; 2055 } 2056 2057 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2058 TestNextUChar(cnv, source, limit, results1, "UTF-16"); 2059 2060 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2061 ucnv_resetToUnicode(cnv); 2062 TestNextUChar(cnv, source, limit, results2, "UTF-16"); 2063 2064 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2065 ucnv_resetToUnicode(cnv); 2066 TestNextUChar(cnv, source, limit, results3, "UTF-16"); 2067 2068 /* Test the condition when source >= sourceLimit */ 2069 ucnv_resetToUnicode(cnv); 2070 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2071 2072 ucnv_close(cnv); 2073 } 2074 2075 static void TestUTF16BE() { 2076 /* test input */ 2077 static const uint8_t in[]={ 2078 0x00, 0x61, 2079 0x00, 0xc0, 2080 0x00, 0x31, 2081 0x00, 0xf4, 2082 0xce, 0xfe, 2083 0xd8, 0x01, 0xdc, 0x01 2084 }; 2085 2086 /* expected test results */ 2087 static const int32_t results[]={ 2088 /* number of bytes read, code point */ 2089 2, 0x61, 2090 2, 0xc0, 2091 2, 0x31, 2092 2, 0xf4, 2093 2, 0xcefe, 2094 4, 0x10401 2095 }; 2096 2097 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2098 UErrorCode errorCode=U_ZERO_ERROR; 2099 UConverter *cnv=ucnv_open("utf-16be", &errorCode); 2100 if(U_FAILURE(errorCode)) { 2101 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); 2102 return; 2103 } 2104 TestNextUChar(cnv, source, limit, results, "UTF-16BE"); 2105 /* Test the condition when source >= sourceLimit */ 2106 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2107 /*Test for the condition where there is an invalid character*/ 2108 { 2109 static const uint8_t source2[]={0x61}; 2110 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2111 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2112 } 2113 #if 0 2114 /* 2115 * Test disabled because currently the UTF-16BE/LE converters are supposed 2116 * to not set errors for unpaired surrogates. 2117 * This may change with 2118 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2119 */ 2120 2121 /*Test for the condition where there is a surrogate pair*/ 2122 { 2123 const uint8_t source2[]={0xd8, 0x01}; 2124 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2125 } 2126 #endif 2127 ucnv_close(cnv); 2128 } 2129 2130 static void 2131 TestUTF16LE() { 2132 /* test input */ 2133 static const uint8_t in[]={ 2134 0x61, 0x00, 2135 0x31, 0x00, 2136 0x4e, 0x2e, 2137 0x4e, 0x00, 2138 0x01, 0xd8, 0x01, 0xdc 2139 }; 2140 2141 /* expected test results */ 2142 static const int32_t results[]={ 2143 /* number of bytes read, code point */ 2144 2, 0x61, 2145 2, 0x31, 2146 2, 0x2e4e, 2147 2, 0x4e, 2148 4, 0x10401 2149 }; 2150 2151 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2152 UErrorCode errorCode=U_ZERO_ERROR; 2153 UConverter *cnv=ucnv_open("utf-16le", &errorCode); 2154 if(U_FAILURE(errorCode)) { 2155 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); 2156 return; 2157 } 2158 TestNextUChar(cnv, source, limit, results, "UTF-16LE"); 2159 /* Test the condition when source >= sourceLimit */ 2160 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2161 /*Test for the condition where there is an invalid character*/ 2162 { 2163 static const uint8_t source2[]={0x61}; 2164 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2165 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2166 } 2167 #if 0 2168 /* 2169 * Test disabled because currently the UTF-16BE/LE converters are supposed 2170 * to not set errors for unpaired surrogates. 2171 * This may change with 2172 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2173 */ 2174 2175 /*Test for the condition where there is a surrogate character*/ 2176 { 2177 static const uint8_t source2[]={0x01, 0xd8}; 2178 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2179 } 2180 #endif 2181 2182 ucnv_close(cnv); 2183 } 2184 2185 static void TestUTF32() { 2186 /* test input */ 2187 static const uint8_t in1[]={ 2188 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff 2189 }; 2190 static const uint8_t in2[]={ 2191 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 2192 }; 2193 static const uint8_t in3[]={ 2194 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 2195 }; 2196 2197 /* expected test results */ 2198 static const int32_t results1[]={ 2199 /* number of bytes read, code point */ 2200 8, 0x100f00, 2201 4, 0xfeff 2202 }; 2203 static const int32_t results2[]={ 2204 /* number of bytes read, code point */ 2205 8, 0x0f1000, 2206 4, 0xfffe 2207 }; 2208 static const int32_t results3[]={ 2209 /* number of bytes read, code point */ 2210 4, 0xfefe, 2211 4, 0x100f00, 2212 4, 0xfffd, /* unmatched surrogate */ 2213 4, 0xfffd /* unmatched surrogate */ 2214 }; 2215 2216 const char *source, *limit; 2217 2218 UErrorCode errorCode=U_ZERO_ERROR; 2219 UConverter *cnv=ucnv_open("UTF-32", &errorCode); 2220 if(U_FAILURE(errorCode)) { 2221 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); 2222 return; 2223 } 2224 2225 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2226 TestNextUChar(cnv, source, limit, results1, "UTF-32"); 2227 2228 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2229 ucnv_resetToUnicode(cnv); 2230 TestNextUChar(cnv, source, limit, results2, "UTF-32"); 2231 2232 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2233 ucnv_resetToUnicode(cnv); 2234 TestNextUChar(cnv, source, limit, results3, "UTF-32"); 2235 2236 /* Test the condition when source >= sourceLimit */ 2237 ucnv_resetToUnicode(cnv); 2238 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2239 2240 ucnv_close(cnv); 2241 } 2242 2243 static void 2244 TestUTF32BE() { 2245 /* test input */ 2246 static const uint8_t in[]={ 2247 0x00, 0x00, 0x00, 0x61, 2248 0x00, 0x00, 0x30, 0x61, 2249 0x00, 0x00, 0xdc, 0x00, 2250 0x00, 0x00, 0xd8, 0x00, 2251 0x00, 0x00, 0xdf, 0xff, 2252 0x00, 0x00, 0xff, 0xfe, 2253 0x00, 0x10, 0xab, 0xcd, 2254 0x00, 0x10, 0xff, 0xff 2255 }; 2256 2257 /* expected test results */ 2258 static const int32_t results[]={ 2259 /* number of bytes read, code point */ 2260 4, 0x61, 2261 4, 0x3061, 2262 4, 0xfffd, 2263 4, 0xfffd, 2264 4, 0xfffd, 2265 4, 0xfffe, 2266 4, 0x10abcd, 2267 4, 0x10ffff 2268 }; 2269 2270 /* error test input */ 2271 static const uint8_t in2[]={ 2272 0x00, 0x00, 0x00, 0x61, 2273 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 2274 0x00, 0x00, 0x00, 0x62, 2275 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2276 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 2277 0x00, 0x00, 0x01, 0x62, 2278 0x00, 0x00, 0x02, 0x62 2279 }; 2280 2281 /* expected error test results */ 2282 static const int32_t results2[]={ 2283 /* number of bytes read, code point */ 2284 4, 0x61, 2285 8, 0x62, 2286 12, 0x162, 2287 4, 0x262 2288 }; 2289 2290 UConverterToUCallback cb; 2291 const void *p; 2292 2293 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2294 UErrorCode errorCode=U_ZERO_ERROR; 2295 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); 2296 if(U_FAILURE(errorCode)) { 2297 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); 2298 return; 2299 } 2300 TestNextUChar(cnv, source, limit, results, "UTF-32BE"); 2301 2302 /* Test the condition when source >= sourceLimit */ 2303 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2304 2305 /* test error behavior with a skip callback */ 2306 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2307 source=(const char *)in2; 2308 limit=(const char *)(in2+sizeof(in2)); 2309 TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); 2310 2311 ucnv_close(cnv); 2312 } 2313 2314 static void 2315 TestUTF32LE() { 2316 /* test input */ 2317 static const uint8_t in[]={ 2318 0x61, 0x00, 0x00, 0x00, 2319 0x61, 0x30, 0x00, 0x00, 2320 0x00, 0xdc, 0x00, 0x00, 2321 0x00, 0xd8, 0x00, 0x00, 2322 0xff, 0xdf, 0x00, 0x00, 2323 0xfe, 0xff, 0x00, 0x00, 2324 0xcd, 0xab, 0x10, 0x00, 2325 0xff, 0xff, 0x10, 0x00 2326 }; 2327 2328 /* expected test results */ 2329 static const int32_t results[]={ 2330 /* number of bytes read, code point */ 2331 4, 0x61, 2332 4, 0x3061, 2333 4, 0xfffd, 2334 4, 0xfffd, 2335 4, 0xfffd, 2336 4, 0xfffe, 2337 4, 0x10abcd, 2338 4, 0x10ffff 2339 }; 2340 2341 /* error test input */ 2342 static const uint8_t in2[]={ 2343 0x61, 0x00, 0x00, 0x00, 2344 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 2345 0x62, 0x00, 0x00, 0x00, 2346 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2347 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 2348 0x62, 0x01, 0x00, 0x00, 2349 0x62, 0x02, 0x00, 0x00, 2350 }; 2351 2352 /* expected error test results */ 2353 static const int32_t results2[]={ 2354 /* number of bytes read, code point */ 2355 4, 0x61, 2356 8, 0x62, 2357 12, 0x162, 2358 4, 0x262, 2359 }; 2360 2361 UConverterToUCallback cb; 2362 const void *p; 2363 2364 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2365 UErrorCode errorCode=U_ZERO_ERROR; 2366 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); 2367 if(U_FAILURE(errorCode)) { 2368 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); 2369 return; 2370 } 2371 TestNextUChar(cnv, source, limit, results, "UTF-32LE"); 2372 2373 /* Test the condition when source >= sourceLimit */ 2374 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2375 2376 /* test error behavior with a skip callback */ 2377 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2378 source=(const char *)in2; 2379 limit=(const char *)(in2+sizeof(in2)); 2380 TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); 2381 2382 ucnv_close(cnv); 2383 } 2384 2385 static void 2386 TestLATIN1() { 2387 /* test input */ 2388 static const uint8_t in[]={ 2389 0x61, 2390 0x31, 2391 0x32, 2392 0xc0, 2393 0xf0, 2394 0xf4, 2395 }; 2396 2397 /* expected test results */ 2398 static const int32_t results[]={ 2399 /* number of bytes read, code point */ 2400 1, 0x61, 2401 1, 0x31, 2402 1, 0x32, 2403 1, 0xc0, 2404 1, 0xf0, 2405 1, 0xf4, 2406 }; 2407 static const uint16_t in1[] = { 2408 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2409 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2410 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2411 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2412 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2413 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2414 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2415 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2416 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2417 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2418 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2419 0xcb, 0x82 2420 }; 2421 static const uint8_t out1[] = { 2422 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2423 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2424 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2425 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2426 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2427 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2428 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2429 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2430 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2431 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2432 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2433 0xcb, 0x82 2434 }; 2435 static const uint16_t in2[]={ 2436 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2437 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2438 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2439 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2440 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2441 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2442 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2443 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2444 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2445 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2446 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2447 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2448 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2449 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2450 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2451 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2452 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2453 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2454 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2455 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2456 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2457 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2458 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2459 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2460 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2461 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2462 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2463 0x37, 0x20, 0x2A, 0x2F, 2464 }; 2465 static const unsigned char out2[]={ 2466 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2467 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2468 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2469 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2470 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2471 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2472 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2473 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2474 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2475 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2476 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2477 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2478 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2479 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2480 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2481 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2482 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2483 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2484 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2485 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2486 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2487 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2488 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2489 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2490 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2491 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2492 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2493 0x37, 0x20, 0x2A, 0x2F, 2494 }; 2495 const char *source=(const char *)in; 2496 const char *limit=(const char *)in+sizeof(in); 2497 2498 UErrorCode errorCode=U_ZERO_ERROR; 2499 UConverter *cnv=ucnv_open("LATIN_1", &errorCode); 2500 if(U_FAILURE(errorCode)) { 2501 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode)); 2502 return; 2503 } 2504 TestNextUChar(cnv, source, limit, results, "LATIN_1"); 2505 /* Test the condition when source >= sourceLimit */ 2506 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2507 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1)); 2508 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2)); 2509 2510 ucnv_close(cnv); 2511 } 2512 2513 static void 2514 TestSBCS() { 2515 /* test input */ 2516 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; 2517 /* expected test results */ 2518 static const int32_t results[]={ 2519 /* number of bytes read, code point */ 2520 1, 0x61, 2521 1, 0xbf, 2522 1, 0xc4, 2523 1, 0x2021, 2524 1, 0xf8ff, 2525 1, 0x00d9 2526 }; 2527 2528 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2529 UErrorCode errorCode=U_ZERO_ERROR; 2530 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); 2531 if(U_FAILURE(errorCode)) { 2532 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode)); 2533 return; 2534 } 2535 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); 2536 /* Test the condition when source >= sourceLimit */ 2537 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2538 /*Test for Illegal character */ /* 2539 { 2540 static const uint8_t input1[]={ 0xA1 }; 2541 const char* illegalsource=(const char*)input1; 2542 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte"); 2543 } 2544 */ 2545 ucnv_close(cnv); 2546 } 2547 2548 static void 2549 TestDBCS() { 2550 /* test input */ 2551 static const uint8_t in[]={ 2552 0x44, 0x6a, 2553 0xc4, 0x9c, 2554 0x7a, 0x74, 2555 0x46, 0xab, 2556 0x42, 0x5b, 2557 2558 }; 2559 2560 /* expected test results */ 2561 static const int32_t results[]={ 2562 /* number of bytes read, code point */ 2563 2, 0x00a7, 2564 2, 0xe1d2, 2565 2, 0x6962, 2566 2, 0xf842, 2567 2, 0xffe5, 2568 }; 2569 2570 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2571 UErrorCode errorCode=U_ZERO_ERROR; 2572 2573 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); 2574 if(U_FAILURE(errorCode)) { 2575 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode)); 2576 return; 2577 } 2578 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); 2579 /* Test the condition when source >= sourceLimit */ 2580 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2581 /*Test for the condition where there is an invalid character*/ 2582 { 2583 static const uint8_t source2[]={0x1a, 0x1b}; 2584 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2585 } 2586 /*Test for the condition where we have a truncated char*/ 2587 { 2588 static const uint8_t source1[]={0xc4}; 2589 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2590 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2591 } 2592 ucnv_close(cnv); 2593 } 2594 2595 static void 2596 TestMBCS() { 2597 /* test input */ 2598 static const uint8_t in[]={ 2599 0x01, 2600 0xa6, 0xa3, 2601 0x00, 2602 0xa6, 0xa1, 2603 0x08, 2604 0xc2, 0x76, 2605 0xc2, 0x78, 2606 2607 }; 2608 2609 /* expected test results */ 2610 static const int32_t results[]={ 2611 /* number of bytes read, code point */ 2612 1, 0x0001, 2613 2, 0x250c, 2614 1, 0x0000, 2615 2, 0x2500, 2616 1, 0x0008, 2617 2, 0xd60c, 2618 2, 0xd60e, 2619 }; 2620 2621 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2622 UErrorCode errorCode=U_ZERO_ERROR; 2623 2624 UConverter *cnv=ucnv_open("ibm-1363", &errorCode); 2625 if(U_FAILURE(errorCode)) { 2626 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode)); 2627 return; 2628 } 2629 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); 2630 /* Test the condition when source >= sourceLimit */ 2631 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2632 /*Test for the condition where there is an invalid character*/ 2633 { 2634 static const uint8_t source2[]={0xa1, 0x80}; 2635 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2636 } 2637 /*Test for the condition where we have a truncated char*/ 2638 { 2639 static const uint8_t source1[]={0xc4}; 2640 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2641 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2642 } 2643 ucnv_close(cnv); 2644 2645 } 2646 2647 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 2648 static void 2649 TestICCRunout() { 2650 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */ 2651 2652 const char *cnvName = "ibm-1363"; 2653 UErrorCode status = U_ZERO_ERROR; 2654 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 }; 2655 /* UChar expectUData[] = { 0x00a1, 0x001a }; */ 2656 const char *source = sourceData; 2657 const char *sourceLim = sourceData+sizeof(sourceData); 2658 UChar c1, c2, c3; 2659 UConverter *cnv=ucnv_open(cnvName, &status); 2660 if(U_FAILURE(status)) { 2661 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status)); 2662 return; 2663 } 2664 2665 #if 0 2666 { 2667 UChar targetBuf[256]; 2668 UChar *target = targetBuf; 2669 UChar *targetLim = target+256; 2670 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status); 2671 2672 log_info("After convert: target@%d, source@%d, status%s\n", 2673 target-targetBuf, source-sourceData, u_errorName(status)); 2674 2675 if(U_FAILURE(status)) { 2676 log_err("Failed to convert: %s\n", u_errorName(status)); 2677 } else { 2678 2679 } 2680 } 2681 #endif 2682 2683 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2684 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status)); 2685 2686 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2687 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status)); 2688 2689 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2690 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status)); 2691 2692 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { 2693 log_verbose("OK\n"); 2694 } else { 2695 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n"); 2696 } 2697 2698 ucnv_close(cnv); 2699 2700 } 2701 #endif 2702 2703 #ifdef U_ENABLE_GENERIC_ISO_2022 2704 2705 static void 2706 TestISO_2022() { 2707 /* test input */ 2708 static const uint8_t in[]={ 2709 0x1b, 0x25, 0x42, 2710 0x31, 2711 0x32, 2712 0x61, 2713 0xc2, 0x80, 2714 0xe0, 0xa0, 0x80, 2715 0xf0, 0x90, 0x80, 0x80 2716 }; 2717 2718 2719 2720 /* expected test results */ 2721 static const int32_t results[]={ 2722 /* number of bytes read, code point */ 2723 4, 0x0031, /* 4 bytes including the escape sequence */ 2724 1, 0x0032, 2725 1, 0x61, 2726 2, 0x80, 2727 3, 0x800, 2728 4, 0x10000 2729 }; 2730 2731 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2732 UErrorCode errorCode=U_ZERO_ERROR; 2733 UConverter *cnv; 2734 2735 cnv=ucnv_open("ISO_2022", &errorCode); 2736 if(U_FAILURE(errorCode)) { 2737 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 2738 return; 2739 } 2740 TestNextUChar(cnv, source, limit, results, "ISO_2022"); 2741 2742 /* Test the condition when source >= sourceLimit */ 2743 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source"); 2744 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2745 /*Test for the condition where we have a truncated char*/ 2746 { 2747 static const uint8_t source1[]={0xc4}; 2748 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2749 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2750 } 2751 /*Test for the condition where there is an invalid character*/ 2752 { 2753 static const uint8_t source2[]={0xa1, 0x01}; 2754 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); 2755 } 2756 ucnv_close(cnv); 2757 } 2758 2759 #endif 2760 2761 static void 2762 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2763 const UChar* uSource; 2764 const UChar* uSourceLimit; 2765 const char* cSource; 2766 const char* cSourceLimit; 2767 UChar *uTargetLimit =NULL; 2768 UChar *uTarget; 2769 char *cTarget; 2770 const char *cTargetLimit; 2771 char *cBuf; 2772 UChar *uBuf; /*,*test;*/ 2773 int32_t uBufSize = 120; 2774 int len=0; 2775 int i=2; 2776 UErrorCode errorCode=U_ZERO_ERROR; 2777 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2778 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2779 ucnv_reset(cnv); 2780 for(;--i>0; ){ 2781 uSource = (UChar*) source; 2782 uSourceLimit=(const UChar*)sourceLimit; 2783 cTarget = cBuf; 2784 uTarget = uBuf; 2785 cSource = cBuf; 2786 cTargetLimit = cBuf; 2787 uTargetLimit = uBuf; 2788 2789 do{ 2790 2791 cTargetLimit = cTargetLimit+ i; 2792 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2793 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2794 errorCode=U_ZERO_ERROR; 2795 continue; 2796 } 2797 2798 if(U_FAILURE(errorCode)){ 2799 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2800 return; 2801 } 2802 2803 }while (uSource<uSourceLimit); 2804 2805 cSourceLimit =cTarget; 2806 do{ 2807 uTargetLimit=uTargetLimit+i; 2808 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2809 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2810 errorCode=U_ZERO_ERROR; 2811 continue; 2812 } 2813 if(U_FAILURE(errorCode)){ 2814 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2815 return; 2816 } 2817 }while(cSource<cSourceLimit); 2818 2819 uSource = source; 2820 /*test =uBuf;*/ 2821 for(len=0;len<(int)(source - sourceLimit);len++){ 2822 if(uBuf[len]!=uSource[len]){ 2823 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2824 } 2825 } 2826 } 2827 free(uBuf); 2828 free(cBuf); 2829 } 2830 /* Test for Jitterbug 778 */ 2831 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2832 const UChar* uSource; 2833 const UChar* uSourceLimit; 2834 const char* cSource; 2835 UChar *uTargetLimit =NULL; 2836 UChar *uTarget; 2837 char *cTarget; 2838 const char *cTargetLimit; 2839 char *cBuf; 2840 UChar *uBuf,*test; 2841 int32_t uBufSize = 120; 2842 int numCharsInTarget=0; 2843 UErrorCode errorCode=U_ZERO_ERROR; 2844 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2845 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 2846 uSource = source; 2847 uSourceLimit=sourceLimit; 2848 cTarget = cBuf; 2849 cTargetLimit = cBuf +uBufSize*5; 2850 uTarget = uBuf; 2851 uTargetLimit = uBuf+ uBufSize*5; 2852 ucnv_reset(cnv); 2853 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); 2854 if(U_FAILURE(errorCode)){ 2855 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2856 return; 2857 } 2858 cSource = cBuf; 2859 test =uBuf; 2860 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode); 2861 if(U_FAILURE(errorCode)){ 2862 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode)); 2863 return; 2864 } 2865 uSource = source; 2866 while(uSource<uSourceLimit){ 2867 if(*test!=*uSource){ 2868 2869 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 2870 } 2871 uSource++; 2872 test++; 2873 } 2874 free(uBuf); 2875 free(cBuf); 2876 } 2877 2878 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2879 const UChar* uSource; 2880 const UChar* uSourceLimit; 2881 const char* cSource; 2882 const char* cSourceLimit; 2883 UChar *uTargetLimit =NULL; 2884 UChar *uTarget; 2885 char *cTarget; 2886 const char *cTargetLimit; 2887 char *cBuf; 2888 UChar *uBuf; /*,*test;*/ 2889 int32_t uBufSize = 120; 2890 int len=0; 2891 int i=2; 2892 const UChar *temp = sourceLimit; 2893 UErrorCode errorCode=U_ZERO_ERROR; 2894 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2895 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2896 2897 ucnv_reset(cnv); 2898 for(;--i>0;){ 2899 uSource = (UChar*) source; 2900 cTarget = cBuf; 2901 uTarget = uBuf; 2902 cSource = cBuf; 2903 cTargetLimit = cBuf; 2904 uTargetLimit = uBuf+uBufSize*5; 2905 cTargetLimit = cTargetLimit+uBufSize*10; 2906 uSourceLimit=uSource; 2907 do{ 2908 2909 if (uSourceLimit < sourceLimit) { 2910 uSourceLimit = uSourceLimit+1; 2911 } 2912 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2913 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2914 errorCode=U_ZERO_ERROR; 2915 continue; 2916 } 2917 2918 if(U_FAILURE(errorCode)){ 2919 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2920 return; 2921 } 2922 2923 }while (uSource<temp); 2924 2925 cSourceLimit =cBuf; 2926 do{ 2927 if (cSourceLimit < cBuf + (cTarget - cBuf)) { 2928 cSourceLimit = cSourceLimit+1; 2929 } 2930 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2931 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2932 errorCode=U_ZERO_ERROR; 2933 continue; 2934 } 2935 if(U_FAILURE(errorCode)){ 2936 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2937 return; 2938 } 2939 }while(cSource<cTarget); 2940 2941 uSource = source; 2942 /*test =uBuf;*/ 2943 for(;len<(int)(source - sourceLimit);len++){ 2944 if(uBuf[len]!=uSource[len]){ 2945 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2946 } 2947 } 2948 } 2949 free(uBuf); 2950 free(cBuf); 2951 } 2952 static void 2953 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, 2954 const uint16_t results[], const char* message){ 2955 /* const char* s0; */ 2956 const char* s=(char*)source; 2957 const uint16_t *r=results; 2958 UErrorCode errorCode=U_ZERO_ERROR; 2959 uint32_t c,exC; 2960 ucnv_reset(cnv); 2961 while(s<limit) { 2962 /* s0=s; */ 2963 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 2964 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 2965 break; /* no more significant input */ 2966 } else if(U_FAILURE(errorCode)) { 2967 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 2968 break; 2969 } else { 2970 if(UTF_IS_FIRST_SURROGATE(*r)){ 2971 int i =0, len = 2; 2972 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE); 2973 r++; 2974 }else{ 2975 exC = *r; 2976 } 2977 if(c!=(uint32_t)(exC)) 2978 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c); 2979 } 2980 r++; 2981 } 2982 } 2983 2984 static int TestJitterbug930(const char* enc){ 2985 UErrorCode err = U_ZERO_ERROR; 2986 UConverter*converter; 2987 char out[80]; 2988 char*target = out; 2989 UChar in[4]; 2990 const UChar*source = in; 2991 int32_t off[80]; 2992 int32_t* offsets = off; 2993 int numOffWritten=0; 2994 UBool flush = 0; 2995 converter = my_ucnv_open(enc, &err); 2996 2997 in[0] = 0x41; /* 0x4E00;*/ 2998 in[1] = 0x4E01; 2999 in[2] = 0x4E02; 3000 in[3] = 0x4E03; 3001 3002 memset(off, '*', sizeof(off)); 3003 3004 ucnv_fromUnicode (converter, 3005 &target, 3006 target+2, 3007 &source, 3008 source+3, 3009 offsets, 3010 flush, 3011 &err); 3012 3013 /* writes three bytes into the output buffer: 41 1B 24 3014 * but offsets contains 0 1 1 3015 */ 3016 while(*offsets< off[10]){ 3017 numOffWritten++; 3018 offsets++; 3019 } 3020 log_verbose("Testing Jitterbug 930 for encoding %s",enc); 3021 if(numOffWritten!= (int)(target-out)){ 3022 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten); 3023 } 3024 3025 err = U_ZERO_ERROR; 3026 3027 memset(off,'*' , sizeof(off)); 3028 3029 flush = 1; 3030 offsets=off; 3031 ucnv_fromUnicode (converter, 3032 &target, 3033 target+4, 3034 &source, 3035 source, 3036 offsets, 3037 flush, 3038 &err); 3039 numOffWritten=0; 3040 while(*offsets< off[10]){ 3041 numOffWritten++; 3042 if(*offsets!= -1){ 3043 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ; 3044 } 3045 offsets++; 3046 } 3047 3048 /* writes 42 43 7A into output buffer, 3049 * offsets contains -1 -1 -1 3050 */ 3051 ucnv_close(converter); 3052 return 0; 3053 } 3054 3055 static void 3056 TestHZ() { 3057 /* test input */ 3058 static const uint16_t in[]={ 3059 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 3060 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 3061 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 3062 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 3063 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 3064 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 3065 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 3066 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 3067 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 3068 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 3069 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 3070 0x005A, 0x005B, 0x005C, 0x000A 3071 }; 3072 const UChar* uSource; 3073 const UChar* uSourceLimit; 3074 const char* cSource; 3075 const char* cSourceLimit; 3076 UChar *uTargetLimit =NULL; 3077 UChar *uTarget; 3078 char *cTarget; 3079 const char *cTargetLimit; 3080 char *cBuf; 3081 UChar *uBuf,*test; 3082 int32_t uBufSize = 120; 3083 UErrorCode errorCode=U_ZERO_ERROR; 3084 UConverter *cnv; 3085 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3086 int32_t* myOff= offsets; 3087 cnv=ucnv_open("HZ", &errorCode); 3088 if(U_FAILURE(errorCode)) { 3089 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)); 3090 return; 3091 } 3092 3093 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3094 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3095 uSource = (const UChar*)in; 3096 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3097 cTarget = cBuf; 3098 cTargetLimit = cBuf +uBufSize*5; 3099 uTarget = uBuf; 3100 uTargetLimit = uBuf+ uBufSize*5; 3101 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3102 if(U_FAILURE(errorCode)){ 3103 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3104 return; 3105 } 3106 cSource = cBuf; 3107 cSourceLimit =cTarget; 3108 test =uBuf; 3109 myOff=offsets; 3110 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3111 if(U_FAILURE(errorCode)){ 3112 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3113 return; 3114 } 3115 uSource = (const UChar*)in; 3116 while(uSource<uSourceLimit){ 3117 if(*test!=*uSource){ 3118 3119 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3120 } 3121 uSource++; 3122 test++; 3123 } 3124 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); 3125 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3126 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3127 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3128 TestJitterbug930("csISO2022JP"); 3129 ucnv_close(cnv); 3130 free(offsets); 3131 free(uBuf); 3132 free(cBuf); 3133 } 3134 3135 static void 3136 TestISCII(){ 3137 /* test input */ 3138 static const uint16_t in[]={ 3139 /* test full range of Devanagari */ 3140 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, 3141 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, 3142 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, 3143 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, 3144 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, 3145 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, 3146 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, 3147 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, 3148 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, 3149 0x096D,0x096E,0x096F, 3150 /* test Soft halant*/ 3151 0x0915,0x094d, 0x200D, 3152 /* test explicit halant */ 3153 0x0915,0x094d, 0x200c, 3154 /* test double danda */ 3155 0x965, 3156 /* test ASCII */ 3157 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3158 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3159 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3160 /* tests from Lotus */ 3161 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, 3162 0x0930,0x094D,0x200D, 3163 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, 3164 0x0915,0x0921,0x002B,0x095F, 3165 /* tamil range */ 3166 0x0B86, 0xB87, 0xB88, 3167 /* telugu range */ 3168 0x0C05, 0x0C02, 0x0C03,0x0c31, 3169 /* kannada range */ 3170 0x0C85, 0xC82, 0x0C83, 3171 /* test Abbr sign and Anudatta */ 3172 0x0970, 0x952, 3173 /* 0x0958, 3174 0x0959, 3175 0x095A, 3176 0x095B, 3177 0x095C, 3178 0x095D, 3179 0x095E, 3180 0x095F,*/ 3181 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, 3182 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, 3183 0x090C , 3184 0x0962, 3185 0x0961 /* Vocallic LL 0xa6, 0xE9 */, 3186 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, 3187 0x0950 /* OM Symbol 0xa1, 0xE9,*/, 3188 0x093D /* Avagraha 0xEA, 0xE9*/, 3189 0x0958, 3190 0x0959, 3191 0x095A, 3192 0x095B, 3193 0x095C, 3194 0x095D, 3195 0x095E, 3196 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 3197 }; 3198 static const unsigned char byteArr[]={ 3199 3200 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, 3201 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, 3202 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, 3203 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, 3204 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, 3205 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 3206 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, 3207 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, 3208 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, 3209 0xf8,0xf9,0xfa, 3210 /* test soft halant */ 3211 0xb3, 0xE8, 0xE9, 3212 /* test explicit halant */ 3213 0xb3, 0xE8, 0xE8, 3214 /* test double danda */ 3215 0xea, 0xea, 3216 /* test ASCII */ 3217 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3218 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3219 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3220 /* test ATR code */ 3221 3222 /* tests from Lotus */ 3223 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, 3224 0xEF,0x42,0xCF,0xE8,0xD9, 3225 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, 3226 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, 3227 /* tamil range */ 3228 0xEF, 0x44, 0xa5, 0xa6, 0xa7, 3229 /* telugu range */ 3230 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, 3231 /* kannada range */ 3232 0xEF, 0x48,0xa4, 0xa2, 0xa3, 3233 /* anudatta and abbreviation sign */ 3234 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, 3235 3236 3237 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ 3238 3239 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ 3240 3241 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ 3242 3243 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ 3244 3245 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ 3246 3247 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ 3248 3249 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ 3250 3251 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ 3252 3253 0xB3, 0xE9, /* Ka + NUKTA */ 3254 3255 0xB4, 0xE9, /* Kha + NUKTA */ 3256 3257 0xB5, 0xE9, /* Ga + NUKTA */ 3258 3259 0xBA, 0xE9, 3260 3261 0xBF, 0xE9, 3262 3263 0xC0, 0xE9, 3264 3265 0xC9, 0xE9, 3266 /* INV halant RA */ 3267 0xD9, 0xE8, 0xCF, 3268 0x00, 0x00A0, 3269 /* just consume unhandled codepoints */ 3270 0xEF, 0x30, 3271 3272 }; 3273 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE); 3274 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr)); 3275 3276 } 3277 3278 static void 3279 TestISO_2022_JP() { 3280 /* test input */ 3281 static const uint16_t in[]={ 3282 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, 3283 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3284 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3285 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3286 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3287 0x201D, 0x3014, 0x000D, 0x000A, 3288 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3289 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3290 }; 3291 const UChar* uSource; 3292 const UChar* uSourceLimit; 3293 const char* cSource; 3294 const char* cSourceLimit; 3295 UChar *uTargetLimit =NULL; 3296 UChar *uTarget; 3297 char *cTarget; 3298 const char *cTargetLimit; 3299 char *cBuf; 3300 UChar *uBuf,*test; 3301 int32_t uBufSize = 120; 3302 UErrorCode errorCode=U_ZERO_ERROR; 3303 UConverter *cnv; 3304 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3305 int32_t* myOff= offsets; 3306 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3307 if(U_FAILURE(errorCode)) { 3308 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode)); 3309 return; 3310 } 3311 3312 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3313 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3314 uSource = (const UChar*)in; 3315 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3316 cTarget = cBuf; 3317 cTargetLimit = cBuf +uBufSize*5; 3318 uTarget = uBuf; 3319 uTargetLimit = uBuf+ uBufSize*5; 3320 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3321 if(U_FAILURE(errorCode)){ 3322 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3323 return; 3324 } 3325 cSource = cBuf; 3326 cSourceLimit =cTarget; 3327 test =uBuf; 3328 myOff=offsets; 3329 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3330 if(U_FAILURE(errorCode)){ 3331 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3332 return; 3333 } 3334 3335 uSource = (const UChar*)in; 3336 while(uSource<uSourceLimit){ 3337 if(*test!=*uSource){ 3338 3339 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3340 } 3341 uSource++; 3342 test++; 3343 } 3344 3345 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3346 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3347 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); 3348 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3349 TestJitterbug930("csISO2022JP"); 3350 ucnv_close(cnv); 3351 free(uBuf); 3352 free(cBuf); 3353 free(offsets); 3354 } 3355 3356 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){ 3357 const UChar* uSource; 3358 const UChar* uSourceLimit; 3359 const char* cSource; 3360 const char* cSourceLimit; 3361 UChar *uTargetLimit =NULL; 3362 UChar *uTarget; 3363 char *cTarget; 3364 const char *cTargetLimit; 3365 char *cBuf; 3366 UChar *uBuf,*test; 3367 int32_t uBufSize = 120*10; 3368 UErrorCode errorCode=U_ZERO_ERROR; 3369 UConverter *cnv; 3370 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); 3371 int32_t* myOff= offsets; 3372 cnv=my_ucnv_open(conv, &errorCode); 3373 if(U_FAILURE(errorCode)) { 3374 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode)); 3375 return; 3376 } 3377 3378 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); 3379 cBuf =(char*)malloc(uBufSize * sizeof(char)); 3380 uSource = (const UChar*)in; 3381 uSourceLimit=uSource+len; 3382 cTarget = cBuf; 3383 cTargetLimit = cBuf +uBufSize; 3384 uTarget = uBuf; 3385 uTargetLimit = uBuf+ uBufSize; 3386 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3387 if(U_FAILURE(errorCode)){ 3388 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3389 return; 3390 } 3391 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/ 3392 cSource = cBuf; 3393 cSourceLimit =cTarget; 3394 test =uBuf; 3395 myOff=offsets; 3396 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3397 if(U_FAILURE(errorCode)){ 3398 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode)); 3399 return; 3400 } 3401 3402 uSource = (const UChar*)in; 3403 while(uSource<uSourceLimit){ 3404 if(*test!=*uSource){ 3405 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ; 3406 } 3407 uSource++; 3408 test++; 3409 } 3410 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); 3411 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); 3412 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); 3413 if(byteArr && byteArrLen!=0){ 3414 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); 3415 TestToAndFromUChars(in,(const UChar*)&in[len],cnv); 3416 { 3417 cSource = byteArr; 3418 cSourceLimit = cSource+byteArrLen; 3419 test=uBuf; 3420 myOff = offsets; 3421 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3422 if(U_FAILURE(errorCode)){ 3423 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3424 return; 3425 } 3426 3427 uSource = (const UChar*)in; 3428 while(uSource<uSourceLimit){ 3429 if(*test!=*uSource){ 3430 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3431 } 3432 uSource++; 3433 test++; 3434 } 3435 } 3436 } 3437 3438 ucnv_close(cnv); 3439 free(uBuf); 3440 free(cBuf); 3441 free(offsets); 3442 } 3443 static UChar U_CALLCONV 3444 _charAt(int32_t offset, void *context) { 3445 return ((char*)context)[offset]; 3446 } 3447 3448 static int32_t 3449 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){ 3450 int32_t srcIndex=0; 3451 int32_t dstIndex=0; 3452 if(U_FAILURE(*status)){ 3453 return 0; 3454 } 3455 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ 3456 *status = U_ILLEGAL_ARGUMENT_ERROR; 3457 return 0; 3458 } 3459 if(srcLen==-1){ 3460 srcLen = (int32_t)uprv_strlen(src); 3461 } 3462 3463 for (; srcIndex<srcLen; ) { 3464 UChar32 c = src[srcIndex++]; 3465 if (c == 0x005C /*'\\'*/) { 3466 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/ 3467 if (c == (UChar32)0xFFFFFFFF) { 3468 *status=U_INVALID_CHAR_FOUND; /* return empty string */ 3469 break; /* invalid escape sequence */ 3470 } 3471 } 3472 if(dstIndex < dstLen){ 3473 if(c>0xFFFF){ 3474 dst[dstIndex++] = UTF16_LEAD(c); 3475 if(dstIndex<dstLen){ 3476 dst[dstIndex]=UTF16_TRAIL(c); 3477 }else{ 3478 *status=U_BUFFER_OVERFLOW_ERROR; 3479 } 3480 }else{ 3481 dst[dstIndex]=(UChar)c; 3482 } 3483 3484 }else{ 3485 *status = U_BUFFER_OVERFLOW_ERROR; 3486 } 3487 dstIndex++; /* for preflighting */ 3488 } 3489 return dstIndex; 3490 } 3491 3492 static void 3493 TestFullRoundtrip(const char* cp){ 3494 UChar usource[10] ={0}; 3495 UChar nsrc[10] = {0}; 3496 uint32_t i=1; 3497 int len=0, ulen; 3498 nsrc[0]=0x0061; 3499 /* Test codepoint 0 */ 3500 TestConv(usource,1,cp,"",NULL,0); 3501 TestConv(usource,2,cp,"",NULL,0); 3502 nsrc[2]=0x5555; 3503 TestConv(nsrc,3,cp,"",NULL,0); 3504 3505 for(;i<=0x10FFFF;i++){ 3506 if(i==0xD800){ 3507 i=0xDFFF; 3508 continue; 3509 } 3510 if(i<=0xFFFF){ 3511 usource[0] =(UChar) i; 3512 len=1; 3513 }else{ 3514 usource[0]=UTF16_LEAD(i); 3515 usource[1]=UTF16_TRAIL(i); 3516 len=2; 3517 } 3518 ulen=len; 3519 if(i==0x80) { 3520 usource[2]=0; 3521 } 3522 /* Test only single code points */ 3523 TestConv(usource,ulen,cp,"",NULL,0); 3524 /* Test codepoint repeated twice */ 3525 usource[ulen]=usource[0]; 3526 usource[ulen+1]=usource[1]; 3527 ulen+=len; 3528 TestConv(usource,ulen,cp,"",NULL,0); 3529 /* Test codepoint repeated 3 times */ 3530 usource[ulen]=usource[0]; 3531 usource[ulen+1]=usource[1]; 3532 ulen+=len; 3533 TestConv(usource,ulen,cp,"",NULL,0); 3534 /* Test codepoint in between 2 codepoints */ 3535 nsrc[1]=usource[0]; 3536 nsrc[2]=usource[1]; 3537 nsrc[len+1]=0x5555; 3538 TestConv(nsrc,len+2,cp,"",NULL,0); 3539 uprv_memset(usource,0,sizeof(UChar)*10); 3540 } 3541 } 3542 3543 static void 3544 TestRoundTrippingAllUTF(void){ 3545 if(!getTestOption(QUICK_OPTION)){ 3546 log_verbose("Running exhaustive round trip test for BOCU-1\n"); 3547 TestFullRoundtrip("BOCU-1"); 3548 log_verbose("Running exhaustive round trip test for SCSU\n"); 3549 TestFullRoundtrip("SCSU"); 3550 log_verbose("Running exhaustive round trip test for UTF-8\n"); 3551 TestFullRoundtrip("UTF-8"); 3552 log_verbose("Running exhaustive round trip test for CESU-8\n"); 3553 TestFullRoundtrip("CESU-8"); 3554 log_verbose("Running exhaustive round trip test for UTF-16BE\n"); 3555 TestFullRoundtrip("UTF-16BE"); 3556 log_verbose("Running exhaustive round trip test for UTF-16LE\n"); 3557 TestFullRoundtrip("UTF-16LE"); 3558 log_verbose("Running exhaustive round trip test for UTF-16\n"); 3559 TestFullRoundtrip("UTF-16"); 3560 log_verbose("Running exhaustive round trip test for UTF-32BE\n"); 3561 TestFullRoundtrip("UTF-32BE"); 3562 log_verbose("Running exhaustive round trip test for UTF-32LE\n"); 3563 TestFullRoundtrip("UTF-32LE"); 3564 log_verbose("Running exhaustive round trip test for UTF-32\n"); 3565 TestFullRoundtrip("UTF-32"); 3566 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3567 TestFullRoundtrip("UTF-7"); 3568 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3569 TestFullRoundtrip("UTF-7,version=1"); 3570 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"); 3571 TestFullRoundtrip("IMAP-mailbox-name"); 3572 log_verbose("Running exhaustive round trip test for GB18030\n"); 3573 TestFullRoundtrip("GB18030"); 3574 } 3575 } 3576 3577 static void 3578 TestSCSU() { 3579 3580 static const uint16_t germanUTF16[]={ 3581 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 3582 }; 3583 3584 static const uint8_t germanSCSU[]={ 3585 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 3586 }; 3587 3588 static const uint16_t russianUTF16[]={ 3589 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 3590 }; 3591 3592 static const uint8_t russianSCSU[]={ 3593 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 3594 }; 3595 3596 static const uint16_t japaneseUTF16[]={ 3597 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 3598 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 3599 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 3600 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 3601 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 3602 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 3603 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 3604 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 3605 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 3606 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 3607 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 3608 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 3609 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 3610 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 3611 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 3612 }; 3613 3614 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 3615 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */ 3616 static const uint8_t japaneseSCSU[]={ 3617 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 3618 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 3619 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 3620 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 3621 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 3622 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 3623 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 3624 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 3625 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 3626 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 3627 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 3628 0xcb, 0x82 3629 }; 3630 3631 static const uint16_t allFeaturesUTF16[]={ 3632 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 3633 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 3634 0x01df, 0xf000, 0xdbff, 0xdfff 3635 }; 3636 3637 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter 3638 * result here (34B vs. 35B) 3639 */ 3640 static const uint8_t allFeaturesSCSU[]={ 3641 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, 3642 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, 3643 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, 3644 0xdf, 0x14, 0x80, 0x15, 0xff 3645 }; 3646 static const uint16_t monkeyIn[]={ 3647 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3648 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3649 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3650 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3651 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3652 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3653 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3654 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3655 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3656 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3657 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3658 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3659 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3660 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3661 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3662 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3663 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3664 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3665 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 3666 /* test non-BMP code points */ 3667 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 3668 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 3669 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 3670 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 3671 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 3672 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 3673 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 3674 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 3675 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 3676 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 3677 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 3678 3679 3680 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 3681 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 3682 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 3683 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 3684 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 3685 }; 3686 static const char *fTestCases [] = { 3687 "\\ud800\\udc00", /* smallest surrogate*/ 3688 "\\ud8ff\\udcff", 3689 "\\udBff\\udFff", /* largest surrogate pair*/ 3690 "\\ud834\\udc00", 3691 "\\U0010FFFF", 3692 "Hello \\u9292 \\u9192 World!", 3693 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", 3694 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3695 3696 "\\u0648\\u06c8", /* catch missing reset*/ 3697 "\\u0648\\u06c8", 3698 3699 "\\u4444\\uE001", /* lowest quotable*/ 3700 "\\u4444\\uf2FF", /* highest quotable*/ 3701 "\\u4444\\uf188\\u4444", 3702 "\\u4444\\uf188\\uf288", 3703 "\\u4444\\uf188abc\\u0429\\uf288", 3704 "\\u9292\\u2222", 3705 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", 3706 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3707 "Hello World!123456", 3708 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ 3709 3710 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ 3711 "abc\\u4411d", /* uses SQU*/ 3712 "abc\\u4411\\u4412d",/* uses SCU*/ 3713 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ 3714 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/ 3715 "\\u9292\\u2222", 3716 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", 3717 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c", 3718 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", 3719 3720 "", /* empty input*/ 3721 "\\u0000", /* smallest BMP character*/ 3722 "\\uFFFF", /* largest BMP character*/ 3723 3724 /* regression tests*/ 3725 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", 3726 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff", 3727 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", 3728 "\\u0041\\u00df\\u0401\\u015f", 3729 "\\u9066\\u2123abc", 3730 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5", 3731 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489", 3732 }; 3733 int i=0; 3734 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){ 3735 const char* cSrc = fTestCases[i]; 3736 UErrorCode status = U_ZERO_ERROR; 3737 int32_t cSrcLen,srcLen; 3738 UChar* src; 3739 /* UConverter* cnv = ucnv_open("SCSU",&status); */ 3740 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); 3741 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); 3742 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); 3743 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); 3744 TestConv(src,srcLen,"SCSU","Coverage",NULL,0); 3745 free(src); 3746 } 3747 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3748 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3749 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3750 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3751 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU)); 3752 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU)); 3753 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); 3754 } 3755 3756 #if !UCONFIG_NO_LEGACY_CONVERSION 3757 static void TestJitterbug2346(){ 3758 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, 3759 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; 3760 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; 3761 3762 UChar uTarget[500]={'\0'}; 3763 UChar* utarget=uTarget; 3764 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 3765 3766 char cTarget[500]={'\0'}; 3767 char* ctarget=cTarget; 3768 char* ctargetLimit=cTarget+sizeof(cTarget); 3769 const char* csource=source; 3770 UChar* temp = expected; 3771 UErrorCode err=U_ZERO_ERROR; 3772 3773 UConverter* conv =ucnv_open("ISO_2022_JP",&err); 3774 if(U_FAILURE(err)) { 3775 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 3776 return; 3777 } 3778 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err); 3779 if(U_FAILURE(err)) { 3780 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err)); 3781 return; 3782 } 3783 utargetLimit=utarget; 3784 utarget = uTarget; 3785 while(utarget<utargetLimit){ 3786 if(*temp!=*utarget){ 3787 3788 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ; 3789 } 3790 utarget++; 3791 temp++; 3792 } 3793 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 3794 if(U_FAILURE(err)) { 3795 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err)); 3796 return; 3797 } 3798 ctargetLimit=ctarget; 3799 ctarget =cTarget; 3800 ucnv_close(conv); 3801 3802 3803 } 3804 3805 static void 3806 TestISO_2022_JP_1() { 3807 /* test input */ 3808 static const uint16_t in[]={ 3809 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A, 3810 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3811 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A, 3812 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3813 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3814 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3815 0x201D, 0x000D, 0x000A, 3816 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3817 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A, 3818 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3819 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A, 3820 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A, 3821 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A 3822 }; 3823 const UChar* uSource; 3824 const UChar* uSourceLimit; 3825 const char* cSource; 3826 const char* cSourceLimit; 3827 UChar *uTargetLimit =NULL; 3828 UChar *uTarget; 3829 char *cTarget; 3830 const char *cTargetLimit; 3831 char *cBuf; 3832 UChar *uBuf,*test; 3833 int32_t uBufSize = 120; 3834 UErrorCode errorCode=U_ZERO_ERROR; 3835 UConverter *cnv; 3836 3837 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3838 if(U_FAILURE(errorCode)) { 3839 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3840 return; 3841 } 3842 3843 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3844 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3845 uSource = (const UChar*)in; 3846 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3847 cTarget = cBuf; 3848 cTargetLimit = cBuf +uBufSize*5; 3849 uTarget = uBuf; 3850 uTargetLimit = uBuf+ uBufSize*5; 3851 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode); 3852 if(U_FAILURE(errorCode)){ 3853 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3854 return; 3855 } 3856 cSource = cBuf; 3857 cSourceLimit =cTarget; 3858 test =uBuf; 3859 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode); 3860 if(U_FAILURE(errorCode)){ 3861 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3862 return; 3863 } 3864 uSource = (const UChar*)in; 3865 while(uSource<uSourceLimit){ 3866 if(*test!=*uSource){ 3867 3868 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3869 } 3870 uSource++; 3871 test++; 3872 } 3873 /*ucnv_close(cnv); 3874 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ 3875 /*Test for the condition where there is an invalid character*/ 3876 ucnv_reset(cnv); 3877 { 3878 static const uint8_t source2[]={0x0e,0x24,0x053}; 3879 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); 3880 } 3881 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3882 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3883 ucnv_close(cnv); 3884 free(uBuf); 3885 free(cBuf); 3886 } 3887 3888 static void 3889 TestISO_2022_JP_2() { 3890 /* test input */ 3891 static const uint16_t in[]={ 3892 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3893 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3894 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3895 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3896 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3897 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3898 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3899 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3900 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3901 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3902 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3903 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3904 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3905 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3906 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3907 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3908 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3909 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3910 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A 3911 }; 3912 const UChar* uSource; 3913 const UChar* uSourceLimit; 3914 const char* cSource; 3915 const char* cSourceLimit; 3916 UChar *uTargetLimit =NULL; 3917 UChar *uTarget; 3918 char *cTarget; 3919 const char *cTargetLimit; 3920 char *cBuf; 3921 UChar *uBuf,*test; 3922 int32_t uBufSize = 120; 3923 UErrorCode errorCode=U_ZERO_ERROR; 3924 UConverter *cnv; 3925 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3926 int32_t* myOff= offsets; 3927 cnv=ucnv_open("ISO_2022_JP_2", &errorCode); 3928 if(U_FAILURE(errorCode)) { 3929 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3930 return; 3931 } 3932 3933 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3934 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3935 uSource = (const UChar*)in; 3936 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3937 cTarget = cBuf; 3938 cTargetLimit = cBuf +uBufSize*5; 3939 uTarget = uBuf; 3940 uTargetLimit = uBuf+ uBufSize*5; 3941 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3942 if(U_FAILURE(errorCode)){ 3943 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3944 return; 3945 } 3946 cSource = cBuf; 3947 cSourceLimit =cTarget; 3948 test =uBuf; 3949 myOff=offsets; 3950 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3951 if(U_FAILURE(errorCode)){ 3952 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3953 return; 3954 } 3955 uSource = (const UChar*)in; 3956 while(uSource<uSourceLimit){ 3957 if(*test!=*uSource){ 3958 3959 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3960 } 3961 uSource++; 3962 test++; 3963 } 3964 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3965 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3966 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3967 /*Test for the condition where there is an invalid character*/ 3968 ucnv_reset(cnv); 3969 { 3970 static const uint8_t source2[]={0x0e,0x24,0x053}; 3971 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); 3972 } 3973 ucnv_close(cnv); 3974 free(uBuf); 3975 free(cBuf); 3976 free(offsets); 3977 } 3978 3979 static void 3980 TestISO_2022_KR() { 3981 /* test input */ 3982 static const uint16_t in[]={ 3983 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 3984 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 3985 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 3986 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 3987 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 3988 ,0x53E3,0x53E4,0x000A,0x000D}; 3989 const UChar* uSource; 3990 const UChar* uSourceLimit; 3991 const char* cSource; 3992 const char* cSourceLimit; 3993 UChar *uTargetLimit =NULL; 3994 UChar *uTarget; 3995 char *cTarget; 3996 const char *cTargetLimit; 3997 char *cBuf; 3998 UChar *uBuf,*test; 3999 int32_t uBufSize = 120; 4000 UErrorCode errorCode=U_ZERO_ERROR; 4001 UConverter *cnv; 4002 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4003 int32_t* myOff= offsets; 4004 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); 4005 if(U_FAILURE(errorCode)) { 4006 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4007 return; 4008 } 4009 4010 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4011 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4012 uSource = (const UChar*)in; 4013 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4014 cTarget = cBuf; 4015 cTargetLimit = cBuf +uBufSize*5; 4016 uTarget = uBuf; 4017 uTargetLimit = uBuf+ uBufSize*5; 4018 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4019 if(U_FAILURE(errorCode)){ 4020 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4021 return; 4022 } 4023 cSource = cBuf; 4024 cSourceLimit =cTarget; 4025 test =uBuf; 4026 myOff=offsets; 4027 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4028 if(U_FAILURE(errorCode)){ 4029 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4030 return; 4031 } 4032 uSource = (const UChar*)in; 4033 while(uSource<uSourceLimit){ 4034 if(*test!=*uSource){ 4035 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4036 } 4037 uSource++; 4038 test++; 4039 } 4040 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4041 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4042 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4043 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4044 TestJitterbug930("csISO2022KR"); 4045 /*Test for the condition where there is an invalid character*/ 4046 ucnv_reset(cnv); 4047 { 4048 static const uint8_t source2[]={0x1b,0x24,0x053}; 4049 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4050 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4051 } 4052 ucnv_close(cnv); 4053 free(uBuf); 4054 free(cBuf); 4055 free(offsets); 4056 } 4057 4058 static void 4059 TestISO_2022_KR_1() { 4060 /* test input */ 4061 static const uint16_t in[]={ 4062 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4063 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4064 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4065 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4066 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4067 ,0x53E3,0x53E4,0x000A,0x000D}; 4068 const UChar* uSource; 4069 const UChar* uSourceLimit; 4070 const char* cSource; 4071 const char* cSourceLimit; 4072 UChar *uTargetLimit =NULL; 4073 UChar *uTarget; 4074 char *cTarget; 4075 const char *cTargetLimit; 4076 char *cBuf; 4077 UChar *uBuf,*test; 4078 int32_t uBufSize = 120; 4079 UErrorCode errorCode=U_ZERO_ERROR; 4080 UConverter *cnv; 4081 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4082 int32_t* myOff= offsets; 4083 cnv=ucnv_open("ibm-25546", &errorCode); 4084 if(U_FAILURE(errorCode)) { 4085 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4086 return; 4087 } 4088 4089 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4090 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4091 uSource = (const UChar*)in; 4092 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4093 cTarget = cBuf; 4094 cTargetLimit = cBuf +uBufSize*5; 4095 uTarget = uBuf; 4096 uTargetLimit = uBuf+ uBufSize*5; 4097 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4098 if(U_FAILURE(errorCode)){ 4099 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4100 return; 4101 } 4102 cSource = cBuf; 4103 cSourceLimit =cTarget; 4104 test =uBuf; 4105 myOff=offsets; 4106 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4107 if(U_FAILURE(errorCode)){ 4108 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4109 return; 4110 } 4111 uSource = (const UChar*)in; 4112 while(uSource<uSourceLimit){ 4113 if(*test!=*uSource){ 4114 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4115 } 4116 uSource++; 4117 test++; 4118 } 4119 ucnv_reset(cnv); 4120 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4121 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4122 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4123 ucnv_reset(cnv); 4124 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4125 /*Test for the condition where there is an invalid character*/ 4126 ucnv_reset(cnv); 4127 { 4128 static const uint8_t source2[]={0x1b,0x24,0x053}; 4129 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4130 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4131 } 4132 ucnv_close(cnv); 4133 free(uBuf); 4134 free(cBuf); 4135 free(offsets); 4136 } 4137 4138 static void TestJitterbug2411(){ 4139 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" 4140 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; 4141 UConverter* kr=NULL, *kr1=NULL; 4142 UErrorCode errorCode = U_ZERO_ERROR; 4143 UChar tgt[100]={'\0'}; 4144 UChar* target = tgt; 4145 UChar* targetLimit = target+100; 4146 kr=ucnv_open("iso-2022-kr", &errorCode); 4147 if(U_FAILURE(errorCode)) { 4148 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode)); 4149 return; 4150 } 4151 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4152 if(U_FAILURE(errorCode)) { 4153 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4154 return; 4155 } 4156 kr1 = ucnv_open("ibm-25546", &errorCode); 4157 if(U_FAILURE(errorCode)) { 4158 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode)); 4159 return; 4160 } 4161 target = tgt; 4162 targetLimit = target+100; 4163 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4164 4165 if(U_FAILURE(errorCode)) { 4166 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4167 return; 4168 } 4169 4170 ucnv_close(kr); 4171 ucnv_close(kr1); 4172 4173 } 4174 4175 static void 4176 TestJIS(){ 4177 /* From Unicode moved to testdata/conversion.txt */ 4178 /*To Unicode*/ 4179 { 4180 static const uint8_t sampleTextJIS[] = { 4181 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ 4182 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4183 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4184 }; 4185 static const uint16_t expectedISO2022JIS[] = { 4186 0x0041, 0x0042, 4187 0xFF81, 0xFF82, 4188 0x3000 4189 }; 4190 static const int32_t toISO2022JISOffs[]={ 4191 3,4, 4192 8,9, 4193 16 4194 }; 4195 4196 static const uint8_t sampleTextJIS7[] = { 4197 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ 4198 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4199 0x1b,0x24,0x42,0x21,0x21, 4200 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ 4201 0x21,0x22, 4202 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4203 }; 4204 static const uint16_t expectedISO2022JIS7[] = { 4205 0x0041, 0x0042, 4206 0xFF81, 0xFF82, 4207 0x3000, 4208 0xFF81, 0xFF82, 4209 0x3001, 4210 0x3000 4211 }; 4212 static const int32_t toISO2022JIS7Offs[]={ 4213 3,4, 4214 8,9, 4215 13,16, 4216 17, 4217 19,27 4218 }; 4219 static const uint8_t sampleTextJIS8[] = { 4220 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ 4221 0xa1,0xc8,0xd9,/*Katakana Set*/ 4222 0x1b,0x28,0x42, 4223 0x41,0x42, 4224 0xb1,0xc3, /*Katakana Set*/ 4225 0x1b,0x24,0x42,0x21,0x21 4226 }; 4227 static const uint16_t expectedISO2022JIS8[] = { 4228 0x0041, 0x0042, 4229 0xff61, 0xff88, 0xff99, 4230 0x0041, 0x0042, 4231 0xff71, 0xff83, 4232 0x3000 4233 }; 4234 static const int32_t toISO2022JIS8Offs[]={ 4235 3, 4, 5, 6, 4236 7, 11, 12, 13, 4237 14, 18, 4238 }; 4239 4240 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, 4241 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE); 4242 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, 4243 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE); 4244 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, 4245 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE); 4246 } 4247 4248 } 4249 4250 4251 #if 0 4252 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 4253 4254 static void TestJitterbug915(){ 4255 /* tests for roundtripping of the below sequence 4256 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / 4257 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / 4258 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / 4259 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / 4260 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / 4261 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / 4262 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / 4263 */ 4264 static const char cSource[]={ 4265 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 4266 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 4267 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 4268 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 4269 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 4270 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 4271 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, 4272 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 4273 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 4274 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 4275 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 4276 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 4277 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 4278 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 4279 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4280 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4281 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 4282 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 4283 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4284 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4285 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 4286 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 4287 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 4288 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 4289 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 4290 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 4291 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 4292 0x37, 0x20, 0x2A, 0x2F 4293 }; 4294 UChar uTarget[500]={'\0'}; 4295 UChar* utarget=uTarget; 4296 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 4297 4298 char cTarget[500]={'\0'}; 4299 char* ctarget=cTarget; 4300 char* ctargetLimit=cTarget+sizeof(cTarget); 4301 const char* csource=cSource; 4302 const char* tempSrc = cSource; 4303 UErrorCode err=U_ZERO_ERROR; 4304 4305 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); 4306 if(U_FAILURE(err)) { 4307 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 4308 return; 4309 } 4310 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err); 4311 if(U_FAILURE(err)) { 4312 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err)); 4313 return; 4314 } 4315 utargetLimit=utarget; 4316 utarget = uTarget; 4317 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 4318 if(U_FAILURE(err)) { 4319 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err)); 4320 return; 4321 } 4322 ctargetLimit=ctarget; 4323 ctarget =cTarget; 4324 while(ctarget<ctargetLimit){ 4325 if(*ctarget != *tempSrc){ 4326 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ; 4327 } 4328 ++ctarget; 4329 ++tempSrc; 4330 } 4331 4332 ucnv_close(conv); 4333 } 4334 4335 static void 4336 TestISO_2022_CN_EXT() { 4337 /* test input */ 4338 static const uint16_t in[]={ 4339 /* test Non-BMP code points */ 4340 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 4341 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 4342 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 4343 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 4344 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 4345 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 4346 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 4347 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 4348 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 4349 0xD869, 0xDED5, 4350 4351 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 4352 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 4353 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 4354 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4355 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4356 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4357 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4358 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4359 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4360 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4361 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4362 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4363 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4364 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A, 4365 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 4366 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A, 4367 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A, 4368 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A, 4369 4370 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A 4371 4372 }; 4373 4374 const UChar* uSource; 4375 const UChar* uSourceLimit; 4376 const char* cSource; 4377 const char* cSourceLimit; 4378 UChar *uTargetLimit =NULL; 4379 UChar *uTarget; 4380 char *cTarget; 4381 const char *cTargetLimit; 4382 char *cBuf; 4383 UChar *uBuf,*test; 4384 int32_t uBufSize = 180; 4385 UErrorCode errorCode=U_ZERO_ERROR; 4386 UConverter *cnv; 4387 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4388 int32_t* myOff= offsets; 4389 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); 4390 if(U_FAILURE(errorCode)) { 4391 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4392 return; 4393 } 4394 4395 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4396 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4397 uSource = (const UChar*)in; 4398 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4399 cTarget = cBuf; 4400 cTargetLimit = cBuf +uBufSize*5; 4401 uTarget = uBuf; 4402 uTargetLimit = uBuf+ uBufSize*5; 4403 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4404 if(U_FAILURE(errorCode)){ 4405 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4406 return; 4407 } 4408 cSource = cBuf; 4409 cSourceLimit =cTarget; 4410 test =uBuf; 4411 myOff=offsets; 4412 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4413 if(U_FAILURE(errorCode)){ 4414 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4415 return; 4416 } 4417 uSource = (const UChar*)in; 4418 while(uSource<uSourceLimit){ 4419 if(*test!=*uSource){ 4420 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4421 } 4422 else{ 4423 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4424 } 4425 uSource++; 4426 test++; 4427 } 4428 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4429 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4430 /*Test for the condition where there is an invalid character*/ 4431 ucnv_reset(cnv); 4432 { 4433 static const uint8_t source2[]={0x0e,0x24,0x053}; 4434 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); 4435 } 4436 ucnv_close(cnv); 4437 free(uBuf); 4438 free(cBuf); 4439 free(offsets); 4440 } 4441 #endif 4442 4443 static void 4444 TestISO_2022_CN() { 4445 /* test input */ 4446 static const uint16_t in[]={ 4447 /* jitterbug 951 */ 4448 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52, 4449 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 4450 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52, 4451 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45, 4452 0x0020, 0x0045, 0x004e, 0x0044, 4453 /**/ 4454 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A, 4455 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A, 4456 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A, 4457 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4458 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4459 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4460 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4461 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4462 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4463 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4464 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4465 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4466 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A, 4467 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A, 4468 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4469 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 4470 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A, 4471 4472 }; 4473 const UChar* uSource; 4474 const UChar* uSourceLimit; 4475 const char* cSource; 4476 const char* cSourceLimit; 4477 UChar *uTargetLimit =NULL; 4478 UChar *uTarget; 4479 char *cTarget; 4480 const char *cTargetLimit; 4481 char *cBuf; 4482 UChar *uBuf,*test; 4483 int32_t uBufSize = 180; 4484 UErrorCode errorCode=U_ZERO_ERROR; 4485 UConverter *cnv; 4486 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4487 int32_t* myOff= offsets; 4488 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); 4489 if(U_FAILURE(errorCode)) { 4490 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4491 return; 4492 } 4493 4494 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4495 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4496 uSource = (const UChar*)in; 4497 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4498 cTarget = cBuf; 4499 cTargetLimit = cBuf +uBufSize*5; 4500 uTarget = uBuf; 4501 uTargetLimit = uBuf+ uBufSize*5; 4502 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4503 if(U_FAILURE(errorCode)){ 4504 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4505 return; 4506 } 4507 cSource = cBuf; 4508 cSourceLimit =cTarget; 4509 test =uBuf; 4510 myOff=offsets; 4511 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4512 if(U_FAILURE(errorCode)){ 4513 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4514 return; 4515 } 4516 uSource = (const UChar*)in; 4517 while(uSource<uSourceLimit){ 4518 if(*test!=*uSource){ 4519 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4520 } 4521 else{ 4522 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4523 } 4524 uSource++; 4525 test++; 4526 } 4527 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); 4528 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4529 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4530 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4531 TestJitterbug930("csISO2022CN"); 4532 /*Test for the condition where there is an invalid character*/ 4533 ucnv_reset(cnv); 4534 { 4535 static const uint8_t source2[]={0x0e,0x24,0x053}; 4536 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); 4537 } 4538 4539 ucnv_close(cnv); 4540 free(uBuf); 4541 free(cBuf); 4542 free(offsets); 4543 } 4544 4545 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */ 4546 typedef struct { 4547 const char * converterName; 4548 const char * inputText; 4549 int inputTextLength; 4550 } EmptySegmentTest; 4551 4552 /* Callback for TestJitterbug6175, should only get called for empty segment errors */ 4553 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, 4554 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) { 4555 if (reason > UCNV_IRREGULAR) { 4556 return; 4557 } 4558 if (reason != UCNV_IRREGULAR) { 4559 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); 4560 } 4561 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ 4562 *err = U_ZERO_ERROR; 4563 ucnv_cbToUWriteSub(toArgs,0,err); 4564 } 4565 4566 enum { kEmptySegmentToUCharsMax = 64 }; 4567 static void TestJitterbug6175(void) { 4568 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; 4569 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; 4570 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; 4571 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; 4572 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; 4573 static const EmptySegmentTest emptySegmentTests[] = { 4574 /* converterName inputText inputTextLength */ 4575 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, 4576 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, 4577 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, 4578 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, 4579 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, 4580 /* terminator: */ 4581 { NULL, NULL, 0, } 4582 }; 4583 const EmptySegmentTest * testPtr; 4584 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { 4585 UErrorCode err = U_ZERO_ERROR; 4586 UConverter * cnv = ucnv_open(testPtr->converterName, &err); 4587 if (U_FAILURE(err)) { 4588 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4589 return; 4590 } 4591 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); 4592 if (U_FAILURE(err)) { 4593 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4594 ucnv_close(cnv); 4595 return; 4596 } 4597 { 4598 UChar toUChars[kEmptySegmentToUCharsMax]; 4599 UChar * toUCharsPtr = toUChars; 4600 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; 4601 const char * inCharsPtr = testPtr->inputText; 4602 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; 4603 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); 4604 } 4605 ucnv_close(cnv); 4606 } 4607 } 4608 4609 static void 4610 TestEBCDIC_STATEFUL() { 4611 /* test input */ 4612 static const uint8_t in[]={ 4613 0x61, 4614 0x1a, 4615 0x0f, 0x4b, 4616 0x42, 4617 0x40, 4618 0x36, 4619 }; 4620 4621 /* expected test results */ 4622 static const int32_t results[]={ 4623 /* number of bytes read, code point */ 4624 1, 0x002f, 4625 1, 0x0092, 4626 2, 0x002e, 4627 1, 0xff62, 4628 1, 0x0020, 4629 1, 0x0096, 4630 4631 }; 4632 static const uint8_t in2[]={ 4633 0x0f, 4634 0xa1, 4635 0x01 4636 }; 4637 4638 /* expected test results */ 4639 static const int32_t results2[]={ 4640 /* number of bytes read, code point */ 4641 2, 0x203E, 4642 1, 0x0001, 4643 }; 4644 4645 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 4646 UErrorCode errorCode=U_ZERO_ERROR; 4647 UConverter *cnv=ucnv_open("ibm-930", &errorCode); 4648 if(U_FAILURE(errorCode)) { 4649 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode)); 4650 return; 4651 } 4652 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); 4653 ucnv_reset(cnv); 4654 /* Test the condition when source >= sourceLimit */ 4655 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 4656 ucnv_reset(cnv); 4657 /*Test for the condition where source > sourcelimit after consuming the shift chracter */ 4658 { 4659 static const uint8_t source1[]={0x0f}; 4660 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); 4661 } 4662 /*Test for the condition where there is an invalid character*/ 4663 ucnv_reset(cnv); 4664 { 4665 static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; 4666 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); 4667 } 4668 ucnv_reset(cnv); 4669 source=(const char*)in2; 4670 limit=(const char*)in2+sizeof(in2); 4671 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); 4672 ucnv_close(cnv); 4673 4674 } 4675 4676 static void 4677 TestGB18030() { 4678 /* test input */ 4679 static const uint8_t in[]={ 4680 0x24, 4681 0x7f, 4682 0x81, 0x30, 0x81, 0x30, 4683 0xa8, 0xbf, 4684 0xa2, 0xe3, 4685 0xd2, 0xbb, 4686 0x82, 0x35, 0x8f, 0x33, 4687 0x84, 0x31, 0xa4, 0x39, 4688 0x90, 0x30, 0x81, 0x30, 4689 0xe3, 0x32, 0x9a, 0x35 4690 #if 0 4691 /* 4692 * Feature removed markus 2000-oct-26 4693 * Only some codepages must match surrogate pairs into supplementary code points - 4694 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c . 4695 * GB 18030 provides direct encodings for supplementary code points, therefore 4696 * it must not combine two single-encoded surrogates into one code point. 4697 */ 4698 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */ 4699 #endif 4700 }; 4701 4702 /* expected test results */ 4703 static const int32_t results[]={ 4704 /* number of bytes read, code point */ 4705 1, 0x24, 4706 1, 0x7f, 4707 4, 0x80, 4708 2, 0x1f9, 4709 2, 0x20ac, 4710 2, 0x4e00, 4711 4, 0x9fa6, 4712 4, 0xffff, 4713 4, 0x10000, 4714 4, 0x10ffff 4715 #if 0 4716 /* Feature removed. See comment above. */ 4717 8, 0x10000 4718 #endif 4719 }; 4720 4721 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ 4722 UErrorCode errorCode=U_ZERO_ERROR; 4723 UConverter *cnv=ucnv_open("gb18030", &errorCode); 4724 if(U_FAILURE(errorCode)) { 4725 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode)); 4726 return; 4727 } 4728 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030"); 4729 ucnv_close(cnv); 4730 } 4731 4732 static void 4733 TestLMBCS() { 4734 /* LMBCS-1 string */ 4735 static const uint8_t pszLMBCS[]={ 4736 0x61, 4737 0x01, 0x29, 4738 0x81, 4739 0xA0, 4740 0x0F, 0x27, 4741 0x0F, 0x91, 4742 0x14, 0x0a, 0x74, 4743 0x14, 0xF6, 0x02, 4744 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ 4745 0x10, 0x88, 0xA0, 4746 }; 4747 4748 /* Unicode UChar32 equivalents */ 4749 static const UChar32 pszUnicode32[]={ 4750 /* code point */ 4751 0x00000061, 4752 0x00002013, 4753 0x000000FC, 4754 0x000000E1, 4755 0x00000007, 4756 0x00000091, 4757 0x00000a74, 4758 0x00000200, 4759 0x00023456, /* code point for surrogate pair */ 4760 0x00005516 4761 }; 4762 4763 /* Unicode UChar equivalents */ 4764 static const UChar pszUnicode[]={ 4765 /* code point */ 4766 0x0061, 4767 0x2013, 4768 0x00FC, 4769 0x00E1, 4770 0x0007, 4771 0x0091, 4772 0x0a74, 4773 0x0200, 4774 0xD84D, /* low surrogate */ 4775 0xDC56, /* high surrogate */ 4776 0x5516 4777 }; 4778 4779 /* expected test results */ 4780 static const int offsets32[]={ 4781 /* number of bytes read, code point */ 4782 0, 4783 1, 4784 3, 4785 4, 4786 5, 4787 7, 4788 9, 4789 12, 4790 15, 4791 21, 4792 24 4793 }; 4794 4795 /* expected test results */ 4796 static const int offsets[]={ 4797 /* number of bytes read, code point */ 4798 0, 4799 1, 4800 3, 4801 4, 4802 5, 4803 7, 4804 9, 4805 12, 4806 15, 4807 18, 4808 21, 4809 24 4810 }; 4811 4812 4813 UConverter *cnv; 4814 4815 #define NAME_LMBCS_1 "LMBCS-1" 4816 #define NAME_LMBCS_2 "LMBCS-2" 4817 4818 4819 /* Some basic open/close/property tests on some LMBCS converters */ 4820 { 4821 4822 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ 4823 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ 4824 char get_subchars [1]; 4825 const char * get_name; 4826 UConverter *cnv1; 4827 UConverter *cnv2; 4828 4829 int8_t len = sizeof(get_subchars); 4830 4831 UErrorCode errorCode=U_ZERO_ERROR; 4832 4833 /* Open */ 4834 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); 4835 if(U_FAILURE(errorCode)) { 4836 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4837 return; 4838 } 4839 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); 4840 if(U_FAILURE(errorCode)) { 4841 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode)); 4842 return; 4843 } 4844 4845 /* Name */ 4846 get_name = ucnv_getName (cnv1, &errorCode); 4847 if (strcmp(NAME_LMBCS_1,get_name)){ 4848 log_err("Unexpected converter name: %s\n", get_name); 4849 } 4850 get_name = ucnv_getName (cnv2, &errorCode); 4851 if (strcmp(NAME_LMBCS_2,get_name)){ 4852 log_err("Unexpected converter name: %s\n", get_name); 4853 } 4854 4855 /* substitution chars */ 4856 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); 4857 if(U_FAILURE(errorCode)) { 4858 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4859 } 4860 if (len!=1){ 4861 log_err("Unexpected length of sub chars\n"); 4862 } 4863 if (get_subchars[0] != expected_subchars[0]){ 4864 log_err("Unexpected value of sub chars\n"); 4865 } 4866 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); 4867 if(U_FAILURE(errorCode)) { 4868 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); 4869 } 4870 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); 4871 if(U_FAILURE(errorCode)) { 4872 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4873 } 4874 if (len!=1){ 4875 log_err("Unexpected length of sub chars\n"); 4876 } 4877 if (get_subchars[0] != new_subchars[0]){ 4878 log_err("Unexpected value of sub chars\n"); 4879 } 4880 ucnv_close(cnv1); 4881 ucnv_close(cnv2); 4882 4883 } 4884 4885 /* LMBCS to Unicode - offsets */ 4886 { 4887 UErrorCode errorCode=U_ZERO_ERROR; 4888 4889 const char * pSource = (const char *)pszLMBCS; 4890 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 4891 4892 UChar Out [sizeof(pszUnicode) + 1]; 4893 UChar * pOut = Out; 4894 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 4895 4896 int32_t off [sizeof(offsets)]; 4897 4898 /* last 'offset' in expected results is just the final size. 4899 (Makes other tests easier). Compensate here: */ 4900 4901 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); 4902 4903 4904 4905 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ 4906 if(U_FAILURE(errorCode)) { 4907 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode)); 4908 return; 4909 } 4910 4911 4912 4913 ucnv_toUnicode (cnv, 4914 &pOut, 4915 OutLimit, 4916 &pSource, 4917 sourceLimit, 4918 off, 4919 TRUE, 4920 &errorCode); 4921 4922 4923 if (memcmp(off,offsets,sizeof(offsets))) 4924 { 4925 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n"); 4926 } 4927 if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) 4928 { 4929 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n"); 4930 } 4931 ucnv_close(cnv); 4932 } 4933 { 4934 /* LMBCS to Unicode - getNextUChar */ 4935 const char * sourceStart; 4936 const char *source=(const char *)pszLMBCS; 4937 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); 4938 const UChar32 *results= pszUnicode32; 4939 const int *off = offsets32; 4940 4941 UErrorCode errorCode=U_ZERO_ERROR; 4942 UChar32 uniChar; 4943 4944 cnv=ucnv_open("LMBCS-1", &errorCode); 4945 if(U_FAILURE(errorCode)) { 4946 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4947 return; 4948 } 4949 else 4950 { 4951 4952 while(source<limit) { 4953 sourceStart=source; 4954 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode); 4955 if(U_FAILURE(errorCode)) { 4956 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode)); 4957 break; 4958 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) { 4959 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 4960 uniChar, (source-sourceStart), *results, *off); 4961 break; 4962 } 4963 results++; 4964 off++; 4965 } 4966 } 4967 ucnv_close(cnv); 4968 } 4969 { /* test locale & optimization group operations: Unicode to LMBCS */ 4970 4971 UErrorCode errorCode=U_ZERO_ERROR; 4972 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); 4973 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); 4974 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); 4975 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ 4976 const UChar * pUniOut = uniString; 4977 UChar * pUniIn = uniString; 4978 uint8_t lmbcsString [4]; 4979 const char * pLMBCSOut = (const char *)lmbcsString; 4980 char * pLMBCSIn = (char *)lmbcsString; 4981 4982 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */ 4983 ucnv_fromUnicode (cnv16he, 4984 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 4985 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 4986 NULL, 1, &errorCode); 4987 4988 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) 4989 { 4990 log_err("LMBCS-16,locale=he gives unexpected translation\n"); 4991 } 4992 4993 pLMBCSIn= (char *)lmbcsString; 4994 pUniOut = uniString; 4995 ucnv_fromUnicode (cnv01us, 4996 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 4997 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 4998 NULL, 1, &errorCode); 4999 5000 if (lmbcsString[0] != 0x9F) 5001 { 5002 log_err("LMBCS-1,locale=US gives unexpected translation\n"); 5003 } 5004 5005 /* single byte char from mbcs char set */ 5006 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ 5007 pLMBCSOut = (const char *)lmbcsString; 5008 pUniIn = uniString; 5009 ucnv_toUnicode (cnv16jp, 5010 &pUniIn, pUniIn + 1, 5011 &pLMBCSOut, (pLMBCSOut + 1), 5012 NULL, 1, &errorCode); 5013 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5014 { 5015 log_err("Unexpected results from LMBCS-16 single byte char\n"); 5016 } 5017 /* convert to group 1: should be 3 bytes */ 5018 pLMBCSIn = (char *)lmbcsString; 5019 pUniOut = uniString; 5020 ucnv_fromUnicode (cnv01us, 5021 &pLMBCSIn, (const char *)(pLMBCSIn + 3), 5022 &pUniOut, pUniOut + 1, 5023 NULL, 1, &errorCode); 5024 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1 5025 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE) 5026 { 5027 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); 5028 } 5029 pLMBCSOut = (const char *)lmbcsString; 5030 pUniIn = uniString; 5031 ucnv_toUnicode (cnv01us, 5032 &pUniIn, pUniIn + 1, 5033 &pLMBCSOut, (const char *)(pLMBCSOut + 3), 5034 NULL, 1, &errorCode); 5035 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5036 { 5037 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); 5038 } 5039 pLMBCSIn = (char *)lmbcsString; 5040 pUniOut = uniString; 5041 ucnv_fromUnicode (cnv16jp, 5042 &pLMBCSIn, (const char *)(pLMBCSIn + 1), 5043 &pUniOut, pUniOut + 1, 5044 NULL, 1, &errorCode); 5045 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE) 5046 { 5047 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); 5048 } 5049 ucnv_close(cnv16he); 5050 ucnv_close(cnv16jp); 5051 ucnv_close(cnv01us); 5052 } 5053 { 5054 /* Small source buffer testing, LMBCS -> Unicode */ 5055 5056 UErrorCode errorCode=U_ZERO_ERROR; 5057 5058 const char * pSource = (const char *)pszLMBCS; 5059 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 5060 int codepointCount = 0; 5061 5062 UChar Out [sizeof(pszUnicode) + 1]; 5063 UChar * pOut = Out; 5064 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 5065 5066 5067 cnv = ucnv_open(NAME_LMBCS_1, &errorCode); 5068 if(U_FAILURE(errorCode)) { 5069 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 5070 return; 5071 } 5072 5073 5074 while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) 5075 { 5076 ucnv_toUnicode (cnv, 5077 &pOut, 5078 OutLimit, 5079 &pSource, 5080 (pSource+1), /* claim that this is a 1- byte buffer */ 5081 NULL, 5082 FALSE, /* FALSE means there might be more chars in the next buffer */ 5083 &errorCode); 5084 5085 if (U_SUCCESS (errorCode)) 5086 { 5087 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1]) 5088 { 5089 /* we are on to the next code point: check value */ 5090 5091 if (Out[0] != pszUnicode[codepointCount]){ 5092 log_err("LMBCS->Uni result %lx should have been %lx \n", 5093 Out[0], pszUnicode[codepointCount]); 5094 } 5095 5096 pOut = Out; /* reset for accumulating next code point */ 5097 codepointCount++; 5098 } 5099 } 5100 else 5101 { 5102 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode)); 5103 } 5104 } 5105 { 5106 /* limits & surrogate error testing */ 5107 char LIn [sizeof(pszLMBCS)]; 5108 const char * pLIn = LIn; 5109 5110 char LOut [sizeof(pszLMBCS)]; 5111 char * pLOut = LOut; 5112 5113 UChar UOut [sizeof(pszUnicode)]; 5114 UChar * pUOut = UOut; 5115 5116 UChar UIn [sizeof(pszUnicode)]; 5117 const UChar * pUIn = UIn; 5118 5119 int32_t off [sizeof(offsets)]; 5120 UChar32 uniChar; 5121 5122 errorCode=U_ZERO_ERROR; 5123 5124 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ 5125 pUIn++; 5126 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode); 5127 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5128 { 5129 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); 5130 } 5131 pUIn--; 5132 5133 errorCode=U_ZERO_ERROR; 5134 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); 5135 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5136 { 5137 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode)); 5138 } 5139 errorCode=U_ZERO_ERROR; 5140 5141 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode); 5142 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5143 { 5144 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode)); 5145 } 5146 errorCode=U_ZERO_ERROR; 5147 5148 /* 0 byte source request - no error, no pointer movement */ 5149 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode); 5150 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); 5151 if(U_FAILURE(errorCode)) { 5152 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode)); 5153 } 5154 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn)) 5155 { 5156 log_err("Unexpected pointer move in 0 byte source request \n"); 5157 } 5158 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ 5159 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); 5160 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) 5161 { 5162 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); 5163 } 5164 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ 5165 { 5166 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n"); 5167 } 5168 errorCode = U_ZERO_ERROR; 5169 5170 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ 5171 5172 pUIn = pszUnicode; 5173 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode); 5174 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) 5175 { 5176 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); 5177 } 5178 5179 errorCode = U_ZERO_ERROR; 5180 5181 pLIn = (const char *)pszLMBCS; 5182 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); 5183 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4]) 5184 { 5185 log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); 5186 } 5187 5188 /* unpaired or chopped LMBCS surrogates */ 5189 5190 /* OK high surrogate, Low surrogate is chopped */ 5191 LIn [0] = (char)0x14; 5192 LIn [1] = (char)0xD8; 5193 LIn [2] = (char)0x01; 5194 LIn [3] = (char)0x14; 5195 LIn [4] = (char)0xDC; 5196 pLIn = LIn; 5197 errorCode = U_ZERO_ERROR; 5198 pUOut = UOut; 5199 5200 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 5201 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5202 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5203 { 5204 log_err("Unexpected results on chopped low surrogate\n"); 5205 } 5206 5207 /* chopped at surrogate boundary */ 5208 LIn [0] = (char)0x14; 5209 LIn [1] = (char)0xD8; 5210 LIn [2] = (char)0x01; 5211 pLIn = LIn; 5212 errorCode = U_ZERO_ERROR; 5213 pUOut = UOut; 5214 5215 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); 5216 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) 5217 { 5218 log_err("Unexpected results on chopped at surrogate boundary \n"); 5219 } 5220 5221 /* unpaired surrogate plus valid Unichar */ 5222 LIn [0] = (char)0x14; 5223 LIn [1] = (char)0xD8; 5224 LIn [2] = (char)0x01; 5225 LIn [3] = (char)0x14; 5226 LIn [4] = (char)0xC9; 5227 LIn [5] = (char)0xD0; 5228 pLIn = LIn; 5229 errorCode = U_ZERO_ERROR; 5230 pUOut = UOut; 5231 5232 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); 5233 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) 5234 { 5235 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); 5236 } 5237 5238 /* unpaired surrogate plus chopped Unichar */ 5239 LIn [0] = (char)0x14; 5240 LIn [1] = (char)0xD8; 5241 LIn [2] = (char)0x01; 5242 LIn [3] = (char)0x14; 5243 LIn [4] = (char)0xC9; 5244 5245 pLIn = LIn; 5246 errorCode = U_ZERO_ERROR; 5247 pUOut = UOut; 5248 5249 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5250 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5251 { 5252 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); 5253 } 5254 5255 /* unpaired surrogate plus valid non-Unichar */ 5256 LIn [0] = (char)0x14; 5257 LIn [1] = (char)0xD8; 5258 LIn [2] = (char)0x01; 5259 LIn [3] = (char)0x0F; 5260 LIn [4] = (char)0x3B; 5261 5262 pLIn = LIn; 5263 errorCode = U_ZERO_ERROR; 5264 pUOut = UOut; 5265 5266 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5267 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) 5268 { 5269 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); 5270 } 5271 5272 /* unpaired surrogate plus chopped non-Unichar */ 5273 LIn [0] = (char)0x14; 5274 LIn [1] = (char)0xD8; 5275 LIn [2] = (char)0x01; 5276 LIn [3] = (char)0x0F; 5277 5278 pLIn = LIn; 5279 errorCode = U_ZERO_ERROR; 5280 pUOut = UOut; 5281 5282 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); 5283 5284 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) 5285 { 5286 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n"); 5287 } 5288 } 5289 } 5290 ucnv_close(cnv); /* final cleanup */ 5291 } 5292 5293 5294 static void TestJitterbug255() 5295 { 5296 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; 5297 const char *testBuffer = (const char *)testBytes; 5298 const char *testEnd = (const char *)testBytes + sizeof(testBytes); 5299 UErrorCode status = U_ZERO_ERROR; 5300 /*UChar32 result;*/ 5301 UConverter *cnv = 0; 5302 5303 cnv = ucnv_open("shift-jis", &status); 5304 if (U_FAILURE(status) || cnv == 0) { 5305 log_data_err("Failed to open the converter for SJIS.\n"); 5306 return; 5307 } 5308 while (testBuffer != testEnd) 5309 { 5310 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); 5311 if (U_FAILURE(status)) 5312 { 5313 log_err("Failed to convert the next UChar for SJIS.\n"); 5314 break; 5315 } 5316 } 5317 ucnv_close(cnv); 5318 } 5319 5320 static void TestEBCDICUS4XML() 5321 { 5322 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; 5323 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; 5324 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; 5325 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; 5326 char target_x[] = {0x00, 0x00, 0x00, 0x00}; 5327 UChar *unicodes = unicodes_x; 5328 const UChar *toUnicodeMaps = toUnicodeMaps_x; 5329 char *target = target_x; 5330 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; 5331 UErrorCode status = U_ZERO_ERROR; 5332 UConverter *cnv = 0; 5333 5334 cnv = ucnv_open("ebcdic-xml-us", &status); 5335 if (U_FAILURE(status) || cnv == 0) { 5336 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); 5337 return; 5338 } 5339 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status); 5340 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) { 5341 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", 5342 u_errorName(status)); 5343 printUSeqErr(unicodes_x, 3); 5344 printUSeqErr(toUnicodeMaps, 3); 5345 } 5346 status = U_ZERO_ERROR; 5347 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status); 5348 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) { 5349 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", 5350 u_errorName(status)); 5351 printSeqErr((const unsigned char*)target_x, 3); 5352 printSeqErr((const unsigned char*)fromUnicodeMaps, 3); 5353 } 5354 ucnv_close(cnv); 5355 } 5356 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ 5357 5358 #if !UCONFIG_NO_COLLATION 5359 5360 static void TestJitterbug981(){ 5361 const UChar* rules; 5362 int32_t rules_length, target_cap, bytes_needed, buff_size; 5363 UErrorCode status = U_ZERO_ERROR; 5364 UConverter *utf8cnv; 5365 UCollator* myCollator; 5366 char *buff; 5367 int numNeeded=0; 5368 utf8cnv = ucnv_open ("utf8", &status); 5369 if(U_FAILURE(status)){ 5370 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status)); 5371 return; 5372 } 5373 myCollator = ucol_open("zh", &status); 5374 if(U_FAILURE(status)){ 5375 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status)); 5376 ucnv_close(utf8cnv); 5377 return; 5378 } 5379 5380 rules = ucol_getRules(myCollator, &rules_length); 5381 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); 5382 buff = malloc(buff_size); 5383 5384 target_cap = 0; 5385 do { 5386 ucnv_reset(utf8cnv); 5387 status = U_ZERO_ERROR; 5388 if(target_cap >= buff_size) { 5389 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size); 5390 break; 5391 } 5392 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, 5393 rules, rules_length, &status); 5394 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5395 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5396 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5397 break; 5398 } 5399 numNeeded = bytes_needed; 5400 } while (status == U_BUFFER_OVERFLOW_ERROR); 5401 ucol_close(myCollator); 5402 ucnv_close(utf8cnv); 5403 free(buff); 5404 } 5405 5406 #endif 5407 5408 static void TestJitterbug1293(){ 5409 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; 5410 char target[256]; 5411 UErrorCode status = U_ZERO_ERROR; 5412 UConverter* conv=NULL; 5413 int32_t target_cap, bytes_needed, numNeeded = 0; 5414 conv = ucnv_open("shift-jis",&status); 5415 if(U_FAILURE(status)){ 5416 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status)); 5417 return; 5418 } 5419 5420 do{ 5421 target_cap =0; 5422 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status); 5423 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5424 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5425 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5426 } 5427 numNeeded = bytes_needed; 5428 } while (status == U_BUFFER_OVERFLOW_ERROR); 5429 if(U_FAILURE(status)){ 5430 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status)); 5431 return; 5432 } 5433 ucnv_close(conv); 5434 } 5435 static void TestJB5275_1(){ 5436 5437 static const char* data = "\x3B\xB3\x0A" /* Easy characters */ 5438 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5439 /* Switch script: */ 5440 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */ 5441 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/ 5442 "\xEF\x40\x3B\xB3\x0A"; 5443 static const UChar expected[] ={ 5444 0x003b, 0x0a15, 0x000a, /* Easy characters */ 5445 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */ 5446 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/ 5447 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/ 5448 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ 5449 }; 5450 5451 UErrorCode status = U_ZERO_ERROR; 5452 UConverter* conv = ucnv_open("iscii-gur", &status); 5453 UChar dest[100] = {'\0'}; 5454 UChar* target = dest; 5455 UChar* targetLimit = dest+100; 5456 const char* source = data; 5457 const char* sourceLimit = data+strlen(data); 5458 const UChar* exp = expected; 5459 5460 if (U_FAILURE(status)) { 5461 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status)); 5462 return; 5463 } 5464 5465 log_verbose("Testing switching back to default script when new line is encountered.\n"); 5466 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5467 if(U_FAILURE(status)){ 5468 log_err("conversion failed: %s \n", u_errorName(status)); 5469 } 5470 targetLimit = target; 5471 target = dest; 5472 printUSeq(target, targetLimit-target); 5473 while(target<targetLimit){ 5474 if(*exp!=*target){ 5475 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5476 } 5477 target++; 5478 exp++; 5479 } 5480 ucnv_close(conv); 5481 } 5482 5483 static void TestJB5275(){ 5484 static const char* data = 5485 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */ 5486 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */ 5487 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */ 5488 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5489 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ 5490 "\xEF\x48\x38\xB3\x0A" /* Kannada test */ 5491 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ 5492 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ 5493 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ 5494 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; 5495 static const UChar expected[] ={ 5496 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */ 5497 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */ 5498 0x0038, 0x0C95, 0x000A, /* Kannada test */ 5499 0x0039, 0x0D15, 0x000A, /* Malayalam test */ 5500 0x003A, 0x0A95, 0x000A, /* Gujarati test */ 5501 0x003B, 0x0A15, 0x000A, /* Punjabi test */ 5502 }; 5503 5504 UErrorCode status = U_ZERO_ERROR; 5505 UConverter* conv = ucnv_open("iscii", &status); 5506 UChar dest[100] = {'\0'}; 5507 UChar* target = dest; 5508 UChar* targetLimit = dest+100; 5509 const char* source = data; 5510 const char* sourceLimit = data+strlen(data); 5511 const UChar* exp = expected; 5512 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5513 if(U_FAILURE(status)){ 5514 log_err("conversion failed: %s \n", u_errorName(status)); 5515 } 5516 targetLimit = target; 5517 target = dest; 5518 5519 printUSeq(target, targetLimit-target); 5520 5521 while(target<targetLimit){ 5522 if(*exp!=*target){ 5523 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5524 } 5525 target++; 5526 exp++; 5527 } 5528 ucnv_close(conv); 5529 } 5530