1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2011, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************* 7 * 8 * File CCONVTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Steven R. Loomis 7/8/1999 Adding input buffer test 13 ******************************************************************************** 14 */ 15 #include <stdio.h> 16 #include "cstring.h" 17 #include "unicode/uloc.h" 18 #include "unicode/ucnv.h" 19 #include "unicode/ucnv_err.h" 20 #include "unicode/ucnv_cb.h" 21 #include "cintltst.h" 22 #include "unicode/utypes.h" 23 #include "unicode/ustring.h" 24 #include "unicode/ucol.h" 25 #include "cmemory.h" 26 #include "nucnvtst.h" 27 28 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) 29 30 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); 31 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); 32 #if !UCONFIG_NO_COLLATION 33 static void TestJitterbug981(void); 34 #endif 35 static void TestJitterbug1293(void); 36 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; 37 static void TestConverterTypesAndStarters(void); 38 static void TestAmbiguous(void); 39 static void TestSignatureDetection(void); 40 static void TestUTF7(void); 41 static void TestIMAP(void); 42 static void TestUTF8(void); 43 static void TestCESU8(void); 44 static void TestUTF16(void); 45 static void TestUTF16BE(void); 46 static void TestUTF16LE(void); 47 static void TestUTF32(void); 48 static void TestUTF32BE(void); 49 static void TestUTF32LE(void); 50 static void TestLATIN1(void); 51 52 #if !UCONFIG_NO_LEGACY_CONVERSION 53 static void TestSBCS(void); 54 static void TestDBCS(void); 55 static void TestMBCS(void); 56 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 57 static void TestICCRunout(void); 58 #endif 59 60 #ifdef U_ENABLE_GENERIC_ISO_2022 61 static void TestISO_2022(void); 62 #endif 63 64 static void TestISO_2022_JP(void); 65 static void TestISO_2022_JP_1(void); 66 static void TestISO_2022_JP_2(void); 67 static void TestISO_2022_KR(void); 68 static void TestISO_2022_KR_1(void); 69 static void TestISO_2022_CN(void); 70 #if 0 71 /* 72 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 73 */ 74 static void TestISO_2022_CN_EXT(void); 75 #endif 76 static void TestJIS(void); 77 static void TestHZ(void); 78 #endif 79 80 static void TestSCSU(void); 81 82 #if !UCONFIG_NO_LEGACY_CONVERSION 83 static void TestEBCDIC_STATEFUL(void); 84 static void TestGB18030(void); 85 static void TestLMBCS(void); 86 static void TestJitterbug255(void); 87 static void TestEBCDICUS4XML(void); 88 #if 0 89 /* 90 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 91 */ 92 static void TestJitterbug915(void); 93 #endif 94 static void TestISCII(void); 95 96 static void TestCoverageMBCS(void); 97 static void TestJitterbug2346(void); 98 static void TestJitterbug2411(void); 99 static void TestJB5275(void); 100 static void TestJB5275_1(void); 101 static void TestJitterbug6175(void); 102 103 static void TestIsFixedWidth(void); 104 #endif 105 106 static void TestInBufSizes(void); 107 108 static void TestRoundTrippingAllUTF(void); 109 static void TestConv(const uint16_t in[], 110 int len, 111 const char* conv, 112 const char* lang, 113 char byteArr[], 114 int byteArrLen); 115 116 /* open a converter, using test data if it begins with '@' */ 117 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); 118 119 120 #define NEW_MAX_BUFFER 999 121 122 static int32_t gInBufferSize = NEW_MAX_BUFFER; 123 static int32_t gOutBufferSize = NEW_MAX_BUFFER; 124 static char gNuConvTestName[1024]; 125 126 #define nct_min(x,y) ((x<y) ? x : y) 127 128 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) 129 { 130 if(cnv && cnv[0] == '@') { 131 return ucnv_openPackage(loadTestData(err), cnv+1, err); 132 } else { 133 return ucnv_open(cnv, err); 134 } 135 } 136 137 static void printSeq(const unsigned char* a, int len) 138 { 139 int i=0; 140 log_verbose("{"); 141 while (i<len) 142 log_verbose("0x%02x ", a[i++]); 143 log_verbose("}\n"); 144 } 145 146 static void printUSeq(const UChar* a, int len) 147 { 148 int i=0; 149 log_verbose("{U+"); 150 while (i<len) log_verbose("0x%04x ", a[i++]); 151 log_verbose("}\n"); 152 } 153 154 static void printSeqErr(const unsigned char* a, int len) 155 { 156 int i=0; 157 fprintf(stderr, "{"); 158 while (i<len) 159 fprintf(stderr, "0x%02x ", a[i++]); 160 fprintf(stderr, "}\n"); 161 } 162 163 static void printUSeqErr(const UChar* a, int len) 164 { 165 int i=0; 166 fprintf(stderr, "{U+"); 167 while (i<len) 168 fprintf(stderr, "0x%04x ", a[i++]); 169 fprintf(stderr,"}\n"); 170 } 171 172 static void 173 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message) 174 { 175 const char* s0; 176 const char* s=(char*)source; 177 const int32_t *r=results; 178 UErrorCode errorCode=U_ZERO_ERROR; 179 UChar32 c; 180 181 while(s<limit) { 182 s0=s; 183 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 184 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 185 break; /* no more significant input */ 186 } else if(U_FAILURE(errorCode)) { 187 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 188 break; 189 } else if( 190 /* test the expected number of input bytes only if >=0 */ 191 (*r>=0 && (int32_t)(s-s0)!=*r) || 192 c!=*(r+1) 193 ) { 194 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 195 message, c, (s-s0), *(r+1), *r); 196 break; 197 } 198 r+=2; 199 } 200 } 201 202 static void 203 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message) 204 { 205 const char* s=(char*)source; 206 UErrorCode errorCode=U_ZERO_ERROR; 207 uint32_t c; 208 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 209 if(errorCode != expected){ 210 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode)); 211 } 212 if(c != 0xFFFD && c != 0xffff){ 213 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c); 214 } 215 216 } 217 218 static void TestInBufSizes(void) 219 { 220 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); 221 #if 1 222 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); 223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); 224 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); 225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); 226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); 227 TestNewConvertWithBufferSizes(1,1); 228 TestNewConvertWithBufferSizes(2,3); 229 TestNewConvertWithBufferSizes(3,2); 230 #endif 231 } 232 233 static void TestOutBufSizes(void) 234 { 235 #if 1 236 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); 237 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); 238 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); 239 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); 240 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); 241 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); 242 243 #endif 244 } 245 246 247 void addTestNewConvert(TestNode** root) 248 { 249 #if !UCONFIG_NO_FILE_IO 250 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); 251 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); 252 #endif 253 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); 254 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); 255 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); 256 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); 257 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); 258 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); 259 260 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ 261 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); 262 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); 263 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); 264 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); 265 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); 266 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); 267 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); 268 269 #if !UCONFIG_NO_LEGACY_CONVERSION 270 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); 271 #endif 272 273 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); 274 275 #if !UCONFIG_NO_LEGACY_CONVERSION 276 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); 277 #if !UCONFIG_NO_FILE_IO 278 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); 279 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); 280 #endif 281 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); 282 283 #ifdef U_ENABLE_GENERIC_ISO_2022 284 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); 285 #endif 286 /* BEGIN android-changed 287 To save space, Android does not build full ISO2022 CJK tables. 288 We turn off the tests here. 289 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); 290 END android-changed */ 291 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); 292 /* BEGIN android-changed 293 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); 294 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); 295 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); 296 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); 297 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); 298 END android-changed */ 299 /* 300 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 301 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); 302 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); 303 */ 304 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); 305 #endif 306 307 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); 308 309 #if !UCONFIG_NO_LEGACY_CONVERSION 310 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); 311 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); 312 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); 313 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); 314 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); 315 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); 316 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); 317 #if !UCONFIG_NO_COLLATION 318 /* BEGIN android-removed 319 To save space, Android does not include the collation tailoring rules. 320 Skip the related tests. 321 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); 322 END android-removed */ 323 #endif 324 325 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); 326 #endif 327 328 329 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 330 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); 331 #endif 332 333 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); 334 335 #if !UCONFIG_NO_LEGACY_CONVERSION 336 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); 337 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); 338 /* BEGIN android-removed 339 To save space, Android does not build full ISO2022 CJK tables. 340 We turn off the tests here. 341 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); 342 END android-removed */ 343 344 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth"); 345 #endif 346 } 347 348 349 /* Note that this test already makes use of statics, so it's not really 350 multithread safe. 351 This convenience function lets us make the error messages actually useful. 352 */ 353 354 static void setNuConvTestName(const char *codepage, const char *direction) 355 { 356 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 357 codepage, 358 direction, 359 (int)gInBufferSize, 360 (int)gOutBufferSize); 361 } 362 363 typedef enum 364 { 365 TC_OK = 0, /* test was OK */ 366 TC_MISMATCH = 1, /* Match failed - err was printed */ 367 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */ 368 } ETestConvertResult; 369 370 /* Note: This function uses global variables and it will not do offset 371 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 372 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 373 const char *codepage, const int32_t *expectOffsets , UBool useFallback) 374 { 375 UErrorCode status = U_ZERO_ERROR; 376 UConverter *conv = 0; 377 char junkout[NEW_MAX_BUFFER]; /* FIX */ 378 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 379 char *p; 380 const UChar *src; 381 char *end; 382 char *targ; 383 int32_t *offs; 384 int i; 385 int32_t realBufferSize; 386 char *realBufferEnd; 387 const UChar *realSourceEnd; 388 const UChar *sourceLimit; 389 UBool checkOffsets = TRUE; 390 UBool doFlush; 391 392 for(i=0;i<NEW_MAX_BUFFER;i++) 393 junkout[i] = (char)0xF0; 394 for(i=0;i<NEW_MAX_BUFFER;i++) 395 junokout[i] = 0xFF; 396 397 setNuConvTestName(codepage, "FROM"); 398 399 log_verbose("\n========= %s\n", gNuConvTestName); 400 401 conv = my_ucnv_open(codepage, &status); 402 403 if(U_FAILURE(status)) 404 { 405 log_data_err("Couldn't open converter %s\n",codepage); 406 return TC_FAIL; 407 } 408 if(useFallback){ 409 ucnv_setFallback(conv,useFallback); 410 } 411 412 log_verbose("Converter opened..\n"); 413 414 src = source; 415 targ = junkout; 416 offs = junokout; 417 418 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 419 realBufferEnd = junkout + realBufferSize; 420 realSourceEnd = source + sourceLen; 421 422 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 423 checkOffsets = FALSE; 424 425 do 426 { 427 end = nct_min(targ + gOutBufferSize, realBufferEnd); 428 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 429 430 doFlush = (UBool)(sourceLimit == realSourceEnd); 431 432 if(targ == realBufferEnd) { 433 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 434 return TC_FAIL; 435 } 436 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 437 438 439 status = U_ZERO_ERROR; 440 441 ucnv_fromUnicode (conv, 442 &targ, 443 end, 444 &src, 445 sourceLimit, 446 checkOffsets ? offs : NULL, 447 doFlush, /* flush if we're at the end of the input data */ 448 &status); 449 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 450 451 if(U_FAILURE(status)) { 452 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 453 return TC_FAIL; 454 } 455 456 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 457 sourceLen, targ-junkout); 458 459 if(getTestOption(VERBOSITY_OPTION)) 460 { 461 char junk[9999]; 462 char offset_str[9999]; 463 char *ptr; 464 465 junk[0] = 0; 466 offset_str[0] = 0; 467 for(ptr = junkout;ptr<targ;ptr++) { 468 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); 469 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout])); 470 } 471 472 log_verbose(junk); 473 printSeq((const uint8_t *)expect, expectLen); 474 if ( checkOffsets ) { 475 log_verbose("\nOffsets:"); 476 log_verbose(offset_str); 477 } 478 log_verbose("\n"); 479 } 480 ucnv_close(conv); 481 482 if(expectLen != targ-junkout) { 483 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 484 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 485 fprintf(stderr, "Got:\n"); 486 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 487 fprintf(stderr, "Expected:\n"); 488 printSeqErr((const unsigned char*)expect, expectLen); 489 return TC_MISMATCH; 490 } 491 492 if (checkOffsets && (expectOffsets != 0) ) { 493 log_verbose("comparing %d offsets..\n", targ-junkout); 494 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 495 log_err("did not get the expected offsets. %s\n", gNuConvTestName); 496 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 497 log_err("\n"); 498 log_err("Got : "); 499 for(p=junkout;p<targ;p++) { 500 log_err("%d,", junokout[p-junkout]); 501 } 502 log_err("\n"); 503 log_err("Expected: "); 504 for(i=0; i<(targ-junkout); i++) { 505 log_err("%d,", expectOffsets[i]); 506 } 507 log_err("\n"); 508 } 509 } 510 511 log_verbose("comparing..\n"); 512 if(!memcmp(junkout, expect, expectLen)) { 513 log_verbose("Matches!\n"); 514 return TC_OK; 515 } else { 516 log_err("String does not match u->%s\n", gNuConvTestName); 517 printUSeqErr(source, sourceLen); 518 fprintf(stderr, "Got:\n"); 519 printSeqErr((const unsigned char *)junkout, expectLen); 520 fprintf(stderr, "Expected:\n"); 521 printSeqErr((const unsigned char *)expect, expectLen); 522 523 return TC_MISMATCH; 524 } 525 } 526 527 /* Note: This function uses global variables and it will not do offset 528 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 529 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 530 const char *codepage, const int32_t *expectOffsets, UBool useFallback) 531 { 532 UErrorCode status = U_ZERO_ERROR; 533 UConverter *conv = 0; 534 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 535 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 536 const char *src; 537 const char *realSourceEnd; 538 const char *srcLimit; 539 UChar *p; 540 UChar *targ; 541 UChar *end; 542 int32_t *offs; 543 int i; 544 UBool checkOffsets = TRUE; 545 546 int32_t realBufferSize; 547 UChar *realBufferEnd; 548 549 550 for(i=0;i<NEW_MAX_BUFFER;i++) 551 junkout[i] = 0xFFFE; 552 553 for(i=0;i<NEW_MAX_BUFFER;i++) 554 junokout[i] = -1; 555 556 setNuConvTestName(codepage, "TO"); 557 558 log_verbose("\n========= %s\n", gNuConvTestName); 559 560 conv = my_ucnv_open(codepage, &status); 561 562 if(U_FAILURE(status)) 563 { 564 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 565 return TC_FAIL; 566 } 567 if(useFallback){ 568 ucnv_setFallback(conv,useFallback); 569 } 570 log_verbose("Converter opened..\n"); 571 572 src = (const char *)source; 573 targ = junkout; 574 offs = junokout; 575 576 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 577 realBufferEnd = junkout + realBufferSize; 578 realSourceEnd = src + sourcelen; 579 580 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 581 checkOffsets = FALSE; 582 583 do 584 { 585 end = nct_min( targ + gOutBufferSize, realBufferEnd); 586 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 587 588 if(targ == realBufferEnd) 589 { 590 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); 591 return TC_FAIL; 592 } 593 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 594 595 /* oldTarg = targ; */ 596 597 status = U_ZERO_ERROR; 598 599 ucnv_toUnicode (conv, 600 &targ, 601 end, 602 &src, 603 srcLimit, 604 checkOffsets ? offs : NULL, 605 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ 606 &status); 607 608 /* offs += (targ-oldTarg); */ 609 610 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 611 612 if(U_FAILURE(status)) 613 { 614 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 615 return TC_FAIL; 616 } 617 618 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 619 sourcelen, targ-junkout); 620 if(getTestOption(VERBOSITY_OPTION)) 621 { 622 char junk[9999]; 623 char offset_str[9999]; 624 UChar *ptr; 625 626 junk[0] = 0; 627 offset_str[0] = 0; 628 629 for(ptr = junkout;ptr<targ;ptr++) 630 { 631 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 632 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 633 } 634 635 log_verbose(junk); 636 printUSeq(expect, expectlen); 637 if ( checkOffsets ) 638 { 639 log_verbose("\nOffsets:"); 640 log_verbose(offset_str); 641 } 642 log_verbose("\n"); 643 } 644 ucnv_close(conv); 645 646 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 647 648 if (checkOffsets && (expectOffsets != 0)) 649 { 650 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 651 log_err("did not get the expected offsets. %s\n",gNuConvTestName); 652 log_err("Got: "); 653 for(p=junkout;p<targ;p++) { 654 log_err("%d,", junokout[p-junkout]); 655 } 656 log_err("\n"); 657 log_err("Expected: "); 658 for(i=0; i<(targ-junkout); i++) { 659 log_err("%d,", expectOffsets[i]); 660 } 661 log_err("\n"); 662 log_err("output: "); 663 for(i=0; i<(targ-junkout); i++) { 664 log_err("%X,", junkout[i]); 665 } 666 log_err("\n"); 667 log_err("input: "); 668 for(i=0; i<(src-(const char *)source); i++) { 669 log_err("%X,", (unsigned char)source[i]); 670 } 671 log_err("\n"); 672 } 673 } 674 675 if(!memcmp(junkout, expect, expectlen*2)) 676 { 677 log_verbose("Matches!\n"); 678 return TC_OK; 679 } 680 else 681 { 682 log_err("String does not match. %s\n", gNuConvTestName); 683 log_verbose("String does not match. %s\n", gNuConvTestName); 684 printf("\nGot:"); 685 printUSeqErr(junkout, expectlen); 686 printf("\nExpected:"); 687 printUSeqErr(expect, expectlen); 688 return TC_MISMATCH; 689 } 690 } 691 692 693 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 694 { 695 /** test chars #1 */ 696 /* 1 2 3 1Han 2Han 3Han . */ 697 static const UChar sampleText[] = 698 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 }; 699 static const UChar sampleTextRoundTripUnmappable[] = 700 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; 701 702 703 static const uint8_t expectedUTF8[] = 704 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; 705 static const int32_t toUTF8Offs[] = 706 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; 707 static const int32_t fmUTF8Offs[] = 708 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e }; 709 710 #ifdef U_ENABLE_GENERIC_ISO_2022 711 /* Same as UTF8, but with ^[%B preceeding */ 712 static const const uint8_t expectedISO2022[] = 713 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 714 static const int32_t toISO2022Offs[] = 715 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 716 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ 717 static const int32_t fmISO2022Offs[] = 718 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */ 719 #endif 720 721 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ 722 static const uint8_t expectedIBM930[] = 723 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f }; 724 static const int32_t toIBM930Offs[] = 725 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 }; 726 static const int32_t fmIBM930Offs[] = 727 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; 728 729 /* 1 2 3 0 h1 h2 h3 . MBCS*/ 730 static const uint8_t expectedIBM943[] = 731 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc }; 732 static const int32_t toIBM943Offs [] = 733 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 }; 734 static const int32_t fmIBM943Offs[] = 735 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; 736 737 /* 1 2 3 0 h1 h2 h3 . DBCS*/ 738 static const uint8_t expectedIBM9027[] = 739 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; 740 static const int32_t toIBM9027Offs [] = 741 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; 742 743 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 744 static const uint8_t expectedIBM920[] = 745 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; 746 static const int32_t toIBM920Offs [] = 747 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 748 749 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 750 static const uint8_t expectedISO88593[] = 751 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 752 static const int32_t toISO88593Offs[] = 753 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 754 755 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ 756 static const uint8_t expectedLATIN1[] = 757 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 758 static const int32_t toLATIN1Offs[] = 759 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 760 761 762 /* etc */ 763 static const uint8_t expectedUTF16BE[] = 764 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; 765 static const int32_t toUTF16BEOffs[]= 766 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 767 static const int32_t fmUTF16BEOffs[] = 768 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 769 770 static const uint8_t expectedUTF16LE[] = 771 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; 772 static const int32_t toUTF16LEOffs[]= 773 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 774 static const int32_t fmUTF16LEOffs[] = 775 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 776 777 static const uint8_t expectedUTF32BE[] = 778 { 0x00, 0x00, 0x00, 0x31, 779 0x00, 0x00, 0x00, 0x32, 780 0x00, 0x00, 0x00, 0x33, 781 0x00, 0x00, 0x00, 0x00, 782 0x00, 0x00, 0x4e, 0x00, 783 0x00, 0x00, 0x4e, 0x8c, 784 0x00, 0x00, 0x4e, 0x09, 785 0x00, 0x00, 0x00, 0x2e, 786 0x00, 0x02, 0x00, 0x21 }; 787 static const int32_t toUTF32BEOffs[]= 788 { 0x00, 0x00, 0x00, 0x00, 789 0x01, 0x01, 0x01, 0x01, 790 0x02, 0x02, 0x02, 0x02, 791 0x03, 0x03, 0x03, 0x03, 792 0x04, 0x04, 0x04, 0x04, 793 0x05, 0x05, 0x05, 0x05, 794 0x06, 0x06, 0x06, 0x06, 795 0x07, 0x07, 0x07, 0x07, 796 0x08, 0x08, 0x08, 0x08, 797 0x08, 0x08, 0x08, 0x08 }; 798 static const int32_t fmUTF32BEOffs[] = 799 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 800 801 static const uint8_t expectedUTF32LE[] = 802 { 0x31, 0x00, 0x00, 0x00, 803 0x32, 0x00, 0x00, 0x00, 804 0x33, 0x00, 0x00, 0x00, 805 0x00, 0x00, 0x00, 0x00, 806 0x00, 0x4e, 0x00, 0x00, 807 0x8c, 0x4e, 0x00, 0x00, 808 0x09, 0x4e, 0x00, 0x00, 809 0x2e, 0x00, 0x00, 0x00, 810 0x21, 0x00, 0x02, 0x00 }; 811 static const int32_t toUTF32LEOffs[]= 812 { 0x00, 0x00, 0x00, 0x00, 813 0x01, 0x01, 0x01, 0x01, 814 0x02, 0x02, 0x02, 0x02, 815 0x03, 0x03, 0x03, 0x03, 816 0x04, 0x04, 0x04, 0x04, 817 0x05, 0x05, 0x05, 0x05, 818 0x06, 0x06, 0x06, 0x06, 819 0x07, 0x07, 0x07, 0x07, 820 0x08, 0x08, 0x08, 0x08, 821 0x08, 0x08, 0x08, 0x08 }; 822 static const int32_t fmUTF32LEOffs[] = 823 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 824 825 826 827 828 /** Test chars #2 **/ 829 830 /* Sahha [health], slashed h's */ 831 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; 832 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; 833 834 /* LMBCS */ 835 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; 836 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; 837 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; 838 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; 839 /*********************************** START OF CODE finally *************/ 840 841 gInBufferSize = insize; 842 gOutBufferSize = outsize; 843 844 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); 845 846 847 /*UTF-8*/ 848 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 849 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); 850 851 log_verbose("Test surrogate behaviour for UTF8\n"); 852 { 853 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; 854 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, 855 0xf0, 0x90, 0x90, 0x81, 856 0xef, 0xbf, 0xbd 857 }; 858 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; 859 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), 860 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); 861 862 863 } 864 865 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 866 /*ISO-2022*/ 867 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 868 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); 869 #endif 870 871 /*UTF16 LE*/ 872 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 873 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); 874 /*UTF16 BE*/ 875 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 876 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); 877 /*UTF32 LE*/ 878 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 879 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); 880 /*UTF32 BE*/ 881 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 882 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); 883 884 /*LATIN_1*/ 885 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 886 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); 887 888 #if !UCONFIG_NO_LEGACY_CONVERSION 889 /*EBCDIC_STATEFUL*/ 890 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 891 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); 892 893 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 894 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 895 896 /*MBCS*/ 897 898 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 899 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); 900 /*DBCS*/ 901 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 902 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); 903 /*SBCS*/ 904 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 905 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); 906 /*SBCS*/ 907 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 908 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 909 #endif 910 911 912 /****/ 913 914 /*UTF-8*/ 915 testConvertToU(expectedUTF8, sizeof(expectedUTF8), 916 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE); 917 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 918 /*ISO-2022*/ 919 testConvertToU(expectedISO2022, sizeof(expectedISO2022), 920 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE); 921 #endif 922 923 /*UTF16 LE*/ 924 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 925 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 926 /*UTF16 BE*/ 927 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), 928 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE); 929 /*UTF32 LE*/ 930 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), 931 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE); 932 /*UTF32 BE*/ 933 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), 934 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE); 935 936 #if !UCONFIG_NO_LEGACY_CONVERSION 937 /*EBCDIC_STATEFUL*/ 938 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable, 939 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE); 940 /*MBCS*/ 941 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable, 942 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE); 943 #endif 944 945 /* Try it again to make sure it still works */ 946 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 947 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 948 949 #if !UCONFIG_NO_LEGACY_CONVERSION 950 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), 951 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE); 952 953 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), 954 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); 955 956 /*LMBCS*/ 957 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), 958 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); 959 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), 960 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE); 961 #endif 962 963 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ 964 { 965 /* encode directly set D and set O */ 966 static const uint8_t utf7[] = { 967 /* 968 Hi Mom -+Jjo--! 969 A+ImIDkQ. 970 +- 971 +ZeVnLIqe- 972 */ 973 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 974 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 975 0x2b, 0x2d, 976 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d 977 }; 978 static const UChar unicode[] = { 979 /* 980 Hi Mom -<WHITE SMILING FACE>-! 981 A<NOT IDENTICAL TO><ALPHA>. 982 + 983 [Japanese word "nihongo"] 984 */ 985 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 986 0x41, 0x2262, 0x0391, 0x2e, 987 0x2b, 988 0x65e5, 0x672c, 0x8a9e 989 }; 990 static const int32_t toUnicodeOffsets[] = { 991 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 992 15, 17, 19, 23, 993 24, 994 27, 29, 32 995 }; 996 static const int32_t fromUnicodeOffsets[] = { 997 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 998 11, 12, 12, 12, 13, 13, 13, 13, 14, 999 15, 15, 1000 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 1001 }; 1002 1003 /* same but escaping set O (the exclamation mark) */ 1004 static const uint8_t utf7Restricted[] = { 1005 /* 1006 Hi Mom -+Jjo--+ACE- 1007 A+ImIDkQ. 1008 +- 1009 +ZeVnLIqe- 1010 */ 1011 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, 1012 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 1013 0x2b, 0x2d, 1014 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d 1015 }; 1016 static const int32_t toUnicodeOffsetsR[] = { 1017 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, 1018 19, 21, 23, 27, 1019 28, 1020 31, 33, 36 1021 }; 1022 static const int32_t fromUnicodeOffsetsR[] = { 1023 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, 1024 11, 12, 12, 12, 13, 13, 13, 13, 14, 1025 15, 15, 1026 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 1027 }; 1028 1029 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); 1030 1031 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE); 1032 1033 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); 1034 1035 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); 1036 } 1037 1038 /* 1039 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, 1040 * modified according to RFC 2060, 1041 * and supplemented with the one example in RFC 2060 itself. 1042 */ 1043 { 1044 static const uint8_t imap[] = { 1045 /* Hi Mom -&Jjo--! 1046 A&ImIDkQ-. 1047 &- 1048 &ZeVnLIqe- 1049 \ 1050 ~peter 1051 /mail 1052 /&ZeVnLIqe- 1053 /&U,BTFw- 1054 */ 1055 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 1056 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, 1057 0x26, 0x2d, 1058 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1059 0x5c, 1060 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1061 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1062 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1063 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d 1064 }; 1065 static const UChar unicode[] = { 1066 /* Hi Mom -<WHITE SMILING FACE>-! 1067 A<NOT IDENTICAL TO><ALPHA>. 1068 & 1069 [Japanese word "nihongo"] 1070 \ 1071 ~peter 1072 /mail 1073 /<65e5, 672c, 8a9e> 1074 /<53f0, 5317> 1075 */ 1076 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 1077 0x41, 0x2262, 0x0391, 0x2e, 1078 0x26, 1079 0x65e5, 0x672c, 0x8a9e, 1080 0x5c, 1081 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1082 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1083 0x2f, 0x65e5, 0x672c, 0x8a9e, 1084 0x2f, 0x53f0, 0x5317 1085 }; 1086 static const int32_t toUnicodeOffsets[] = { 1087 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 1088 15, 17, 19, 24, 1089 25, 1090 28, 30, 33, 1091 37, 1092 38, 39, 40, 41, 42, 43, 1093 44, 45, 46, 47, 48, 1094 49, 51, 53, 56, 1095 60, 62, 64 1096 }; 1097 static const int32_t fromUnicodeOffsets[] = { 1098 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 1099 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, 1100 15, 15, 1101 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 1102 19, 1103 20, 21, 22, 23, 24, 25, 1104 26, 27, 28, 29, 30, 1105 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 1106 35, 36, 36, 36, 37, 37, 37, 37, 37 1107 }; 1108 1109 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); 1110 1111 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); 1112 } 1113 1114 /* Test UTF-8 bad data handling*/ 1115 { 1116 static const uint8_t utf8[]={ 1117 0x61, 1118 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1119 0x00, 1120 0x62, 1121 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1122 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1123 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ 1124 0xdf, 0xbf, /* 7ff */ 1125 0xbf, /* truncated tail */ 1126 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ 1127 0x02 1128 }; 1129 1130 static const uint16_t utf8Expected[]={ 1131 0x0061, 1132 0xfffd, 1133 0x0000, 1134 0x0062, 1135 0xfffd, 1136 0xfffd, 1137 0xdbff, 0xdfff, 1138 0x07ff, 1139 0xfffd, 1140 0xfffd, 1141 0x0002 1142 }; 1143 1144 static const int32_t utf8Offsets[]={ 1145 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 1146 }; 1147 testConvertToU(utf8, sizeof(utf8), 1148 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE); 1149 1150 } 1151 1152 /* Test UTF-32BE bad data handling*/ 1153 { 1154 static const uint8_t utf32[]={ 1155 0x00, 0x00, 0x00, 0x61, 1156 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 1157 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1158 0x00, 0x00, 0x00, 0x62, 1159 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1160 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 1161 0x00, 0x00, 0x01, 0x62, 1162 0x00, 0x00, 0x02, 0x62 1163 }; 1164 static const uint16_t utf32Expected[]={ 1165 0x0061, 1166 0xfffd, /* 0x110000 out of range */ 1167 0xDBFF, /* 0x10FFFF in range */ 1168 0xDFFF, 1169 0x0062, 1170 0xfffd, /* 0xffffffff out of range */ 1171 0xfffd, /* 0x7fffffff out of range */ 1172 0x0162, 1173 0x0262 1174 }; 1175 static const int32_t utf32Offsets[]={ 1176 0, 4, 8, 8, 12, 16, 20, 24, 28 1177 }; 1178 static const uint8_t utf32ExpectedBack[]={ 1179 0x00, 0x00, 0x00, 0x61, 1180 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ 1181 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1182 0x00, 0x00, 0x00, 0x62, 1183 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ 1184 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ 1185 0x00, 0x00, 0x01, 0x62, 1186 0x00, 0x00, 0x02, 0x62 1187 }; 1188 static const int32_t utf32OffsetsBack[]={ 1189 0,0,0,0, 1190 1,1,1,1, 1191 2,2,2,2, 1192 4,4,4,4, 1193 5,5,5,5, 1194 6,6,6,6, 1195 7,7,7,7, 1196 8,8,8,8 1197 }; 1198 1199 testConvertToU(utf32, sizeof(utf32), 1200 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE); 1201 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1202 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE); 1203 } 1204 1205 /* Test UTF-32LE bad data handling*/ 1206 { 1207 static const uint8_t utf32[]={ 1208 0x61, 0x00, 0x00, 0x00, 1209 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 1210 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1211 0x62, 0x00, 0x00, 0x00, 1212 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1213 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 1214 0x62, 0x01, 0x00, 0x00, 1215 0x62, 0x02, 0x00, 0x00, 1216 }; 1217 1218 static const uint16_t utf32Expected[]={ 1219 0x0061, 1220 0xfffd, /* 0x110000 out of range */ 1221 0xDBFF, /* 0x10FFFF in range */ 1222 0xDFFF, 1223 0x0062, 1224 0xfffd, /* 0xffffffff out of range */ 1225 0xfffd, /* 0x7fffffff out of range */ 1226 0x0162, 1227 0x0262 1228 }; 1229 static const int32_t utf32Offsets[]={ 1230 0, 4, 8, 8, 12, 16, 20, 24, 28 1231 }; 1232 static const uint8_t utf32ExpectedBack[]={ 1233 0x61, 0x00, 0x00, 0x00, 1234 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ 1235 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1236 0x62, 0x00, 0x00, 0x00, 1237 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ 1238 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ 1239 0x62, 0x01, 0x00, 0x00, 1240 0x62, 0x02, 0x00, 0x00 1241 }; 1242 static const int32_t utf32OffsetsBack[]={ 1243 0,0,0,0, 1244 1,1,1,1, 1245 2,2,2,2, 1246 4,4,4,4, 1247 5,5,5,5, 1248 6,6,6,6, 1249 7,7,7,7, 1250 8,8,8,8 1251 }; 1252 testConvertToU(utf32, sizeof(utf32), 1253 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE ); 1254 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1255 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE); 1256 } 1257 } 1258 1259 static void TestCoverageMBCS(){ 1260 #if 0 1261 UErrorCode status = U_ZERO_ERROR; 1262 const char *directory = loadTestData(&status); 1263 char* tdpath = NULL; 1264 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); 1265 int len = strlen(directory); 1266 char* index=NULL; 1267 1268 tdpath = (char*) malloc(sizeof(char) * (len * 2)); 1269 uprv_strcpy(saveDirectory,u_getDataDirectory()); 1270 log_verbose("Retrieved data directory %s \n",saveDirectory); 1271 uprv_strcpy(tdpath,directory); 1272 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); 1273 1274 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ 1275 *(index+1)=0; 1276 } 1277 u_setDataDirectory(tdpath); 1278 log_verbose("ICU data directory is set to: %s \n" ,tdpath); 1279 #endif 1280 1281 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm 1282 which is test file for MBCS conversion with single-byte codepage data.*/ 1283 { 1284 1285 /* MBCS with single byte codepage data test1.ucm*/ 1286 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; 1287 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; 1288 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; 1289 1290 /*from Unicode*/ 1291 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1292 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); 1293 } 1294 1295 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm 1296 which is test file for MBCS conversion with three-byte codepage data.*/ 1297 { 1298 1299 /* MBCS with three byte codepage data test3.ucm*/ 1300 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1301 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; 1302 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; 1303 1304 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; 1305 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1306 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; 1307 1308 /*from Unicode*/ 1309 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1310 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); 1311 1312 /*to Unicode*/ 1313 testConvertToU(test3input, sizeof(test3input), 1314 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE); 1315 1316 } 1317 1318 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm 1319 which is test file for MBCS conversion with four-byte codepage data.*/ 1320 { 1321 1322 /* MBCS with three byte codepage data test4.ucm*/ 1323 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1324 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; 1325 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; 1326 1327 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; 1328 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1329 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; 1330 1331 /*from Unicode*/ 1332 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1333 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); 1334 1335 /*to Unicode*/ 1336 testConvertToU(test4input, sizeof(test4input), 1337 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE ); 1338 1339 } 1340 #if 0 1341 free(tdpath); 1342 /* restore the original data directory */ 1343 log_verbose("Setting the data directory to %s \n", saveDirectory); 1344 u_setDataDirectory(saveDirectory); 1345 free(saveDirectory); 1346 #endif 1347 1348 } 1349 1350 static void TestConverterType(const char *convName, UConverterType convType) { 1351 UConverter* myConverter; 1352 UErrorCode err = U_ZERO_ERROR; 1353 1354 myConverter = my_ucnv_open(convName, &err); 1355 1356 if (U_FAILURE(err)) { 1357 log_data_err("Failed to create an %s converter\n", convName); 1358 return; 1359 } 1360 else 1361 { 1362 if (ucnv_getType(myConverter)!=convType) { 1363 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", 1364 convName, convType); 1365 } 1366 else { 1367 log_verbose("ucnv_getType %s ok\n", convName); 1368 } 1369 } 1370 ucnv_close(myConverter); 1371 } 1372 1373 static void TestConverterTypesAndStarters() 1374 { 1375 #if !UCONFIG_NO_LEGACY_CONVERSION 1376 UConverter* myConverter; 1377 UErrorCode err = U_ZERO_ERROR; 1378 UBool mystarters[256]; 1379 1380 /* const UBool expectedKSCstarters[256] = { 1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1383 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1384 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1385 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1386 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1387 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1388 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1389 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1390 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1391 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1392 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1393 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1394 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1395 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1396 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1397 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1398 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, 1399 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1400 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1401 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1402 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1403 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1404 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1405 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1406 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ 1407 1408 1409 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); 1410 1411 myConverter = ucnv_open("ksc", &err); 1412 if (U_FAILURE(err)) { 1413 log_data_err("Failed to create an ibm-ksc converter\n"); 1414 return; 1415 } 1416 else 1417 { 1418 if (ucnv_getType(myConverter)!=UCNV_MBCS) 1419 log_err("ucnv_getType Failed for ibm-949\n"); 1420 else 1421 log_verbose("ucnv_getType ibm-949 ok\n"); 1422 1423 if(myConverter!=NULL) 1424 ucnv_getStarters(myConverter, mystarters, &err); 1425 1426 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) 1427 log_err("Failed ucnv_getStarters for ksc\n"); 1428 else 1429 log_verbose("ucnv_getStarters ok\n");*/ 1430 1431 } 1432 ucnv_close(myConverter); 1433 1434 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); 1435 TestConverterType("ibm-878", UCNV_SBCS); 1436 #endif 1437 1438 TestConverterType("iso-8859-1", UCNV_LATIN_1); 1439 1440 TestConverterType("ibm-1208", UCNV_UTF8); 1441 1442 TestConverterType("utf-8", UCNV_UTF8); 1443 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); 1444 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); 1445 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); 1446 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); 1447 1448 #if !UCONFIG_NO_LEGACY_CONVERSION 1449 1450 #if defined(U_ENABLE_GENERIC_ISO_2022) 1451 TestConverterType("iso-2022", UCNV_ISO_2022); 1452 #endif 1453 1454 TestConverterType("hz", UCNV_HZ); 1455 #endif 1456 1457 TestConverterType("scsu", UCNV_SCSU); 1458 1459 #if !UCONFIG_NO_LEGACY_CONVERSION 1460 TestConverterType("x-iscii-de", UCNV_ISCII); 1461 #endif 1462 1463 TestConverterType("ascii", UCNV_US_ASCII); 1464 TestConverterType("utf-7", UCNV_UTF7); 1465 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); 1466 TestConverterType("bocu-1", UCNV_BOCU1); 1467 } 1468 1469 static void 1470 TestAmbiguousConverter(UConverter *cnv) { 1471 static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; 1472 UChar outUnicode[20]={ 0, 0, 0, 0 }; 1473 1474 const char *s; 1475 UChar *u; 1476 UErrorCode errorCode; 1477 UBool isAmbiguous; 1478 1479 /* try to convert an 'a', a square bracket and a US-ASCII backslash */ 1480 errorCode=U_ZERO_ERROR; 1481 s=inBytes; 1482 u=outUnicode; 1483 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); 1484 if(U_FAILURE(errorCode)) { 1485 /* we do not care about general failures in this test; the input may just not be mappable */ 1486 return; 1487 } 1488 1489 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { 1490 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ 1491 /* There are some encodings that are partially ASCII based, 1492 like the ISO-7 and GSM series of codepages, which we ignore. */ 1493 return; 1494 } 1495 1496 isAmbiguous=ucnv_isAmbiguous(cnv); 1497 1498 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ 1499 if((outUnicode[2]!=0x5c)!=isAmbiguous) { 1500 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", 1501 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); 1502 return; 1503 } 1504 1505 if(outUnicode[2]!=0x5c) { 1506 /* needs fixup, fix it */ 1507 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); 1508 if(outUnicode[2]!=0x5c) { 1509 /* the fix failed */ 1510 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); 1511 return; 1512 } 1513 } 1514 } 1515 1516 static void TestAmbiguous() 1517 { 1518 UErrorCode status = U_ZERO_ERROR; 1519 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; 1520 static const char target[] = { 1521 /* "\\usr\\local\\share\\data\\icutest.txt" */ 1522 0x5c, 0x75, 0x73, 0x72, 1523 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 1524 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, 1525 0x5c, 0x64, 0x61, 0x74, 0x61, 1526 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, 1527 0 1528 }; 1529 UChar asciiResult[200], sjisResult[200]; 1530 int32_t /*asciiLength = 0,*/ sjisLength = 0, i; 1531 const char *name; 1532 1533 /* enumerate all converters */ 1534 status=U_ZERO_ERROR; 1535 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { 1536 cnv=ucnv_open(name, &status); 1537 if(U_SUCCESS(status)) { 1538 /* BEGIN android-changed 1539 To save space, Android does not build full ISO2022 CJK tables. 1540 We skip the tests for ISO-2022. */ 1541 const char* cnvName = ucnv_getName(cnv, &status); 1542 if (strlen(cnvName) < 8 || 1543 strncmp(cnvName, "ISO_2022", 8) != 0) { 1544 TestAmbiguousConverter(cnv); 1545 } 1546 /* END android-changed */ 1547 } else { 1548 log_err("error: unable to open available converter \"%s\"\n", name); 1549 status=U_ZERO_ERROR; 1550 } 1551 } 1552 1553 #if !UCONFIG_NO_LEGACY_CONVERSION 1554 sjis_cnv = ucnv_open("ibm-943", &status); 1555 if (U_FAILURE(status)) 1556 { 1557 log_data_err("Failed to create a SJIS converter\n"); 1558 return; 1559 } 1560 ascii_cnv = ucnv_open("LATIN-1", &status); 1561 if (U_FAILURE(status)) 1562 { 1563 log_data_err("Failed to create a LATIN-1 converter\n"); 1564 ucnv_close(sjis_cnv); 1565 return; 1566 } 1567 /* convert target from SJIS to Unicode */ 1568 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1569 if (U_FAILURE(status)) 1570 { 1571 log_err("Failed to convert the SJIS string.\n"); 1572 ucnv_close(sjis_cnv); 1573 ucnv_close(ascii_cnv); 1574 return; 1575 } 1576 /* convert target from Latin-1 to Unicode */ 1577 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1578 if (U_FAILURE(status)) 1579 { 1580 log_err("Failed to convert the Latin-1 string.\n"); 1581 ucnv_close(sjis_cnv); 1582 ucnv_close(ascii_cnv); 1583 return; 1584 } 1585 if (!ucnv_isAmbiguous(sjis_cnv)) 1586 { 1587 log_err("SJIS converter should contain ambiguous character mappings.\n"); 1588 ucnv_close(sjis_cnv); 1589 ucnv_close(ascii_cnv); 1590 return; 1591 } 1592 if (u_strcmp(sjisResult, asciiResult) == 0) 1593 { 1594 log_err("File separators for SJIS don't need to be fixed.\n"); 1595 } 1596 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); 1597 if (u_strcmp(sjisResult, asciiResult) != 0) 1598 { 1599 log_err("Fixing file separator for SJIS failed.\n"); 1600 } 1601 ucnv_close(sjis_cnv); 1602 ucnv_close(ascii_cnv); 1603 #endif 1604 } 1605 1606 static void 1607 TestSignatureDetection(){ 1608 /* with null terminated strings */ 1609 { 1610 static const char* data[] = { 1611 "\xFE\xFF\x00\x00", /* UTF-16BE */ 1612 "\xFF\xFE\x00\x00", /* UTF-16LE */ 1613 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1614 "\x0E\xFE\xFF\x00", /* SCSU */ 1615 1616 "\xFE\xFF", /* UTF-16BE */ 1617 "\xFF\xFE", /* UTF-16LE */ 1618 "\xEF\xBB\xBF", /* UTF-8 */ 1619 "\x0E\xFE\xFF", /* SCSU */ 1620 1621 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1622 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1623 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1624 "\x0E\xFE\xFF\x41", /* SCSU */ 1625 1626 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ 1627 "\x2B\x2F\x76\x38\x41", /* UTF-7 */ 1628 "\x2B\x2F\x76\x39\x41", /* UTF-7 */ 1629 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ 1630 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ 1631 1632 "\xDD\x73\x66\x73" /* UTF-EBCDIC */ 1633 }; 1634 static const char* expected[] = { 1635 "UTF-16BE", 1636 "UTF-16LE", 1637 "UTF-8", 1638 "SCSU", 1639 1640 "UTF-16BE", 1641 "UTF-16LE", 1642 "UTF-8", 1643 "SCSU", 1644 1645 "UTF-16BE", 1646 "UTF-16LE", 1647 "UTF-8", 1648 "SCSU", 1649 1650 "UTF-7", 1651 "UTF-7", 1652 "UTF-7", 1653 "UTF-7", 1654 "UTF-7", 1655 "UTF-EBCDIC" 1656 }; 1657 static const int32_t expectedLength[] ={ 1658 2, 1659 2, 1660 3, 1661 3, 1662 1663 2, 1664 2, 1665 3, 1666 3, 1667 1668 2, 1669 2, 1670 3, 1671 3, 1672 1673 5, 1674 4, 1675 4, 1676 4, 1677 4, 1678 4 1679 }; 1680 int i=0; 1681 UErrorCode err; 1682 int32_t signatureLength = -1; 1683 const char* source = NULL; 1684 const char* enc = NULL; 1685 for( ; i<sizeof(data)/sizeof(char*); i++){ 1686 err = U_ZERO_ERROR; 1687 source = data[i]; 1688 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err); 1689 if(U_FAILURE(err)){ 1690 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1691 continue; 1692 } 1693 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1694 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1695 continue; 1696 } 1697 if(signatureLength != expectedLength[i]){ 1698 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1699 } 1700 } 1701 } 1702 { 1703 static const char* data[] = { 1704 "\xFE\xFF\x00", /* UTF-16BE */ 1705 "\xFF\xFE\x00", /* UTF-16LE */ 1706 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1707 "\x0E\xFE\xFF\x00", /* SCSU */ 1708 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1709 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1710 "\xFE\xFF", /* UTF-16BE */ 1711 "\xFF\xFE", /* UTF-16LE */ 1712 "\xEF\xBB\xBF", /* UTF-8 */ 1713 "\x0E\xFE\xFF", /* SCSU */ 1714 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1715 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1716 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1717 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1718 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1719 "\x0E\xFE\xFF\x41", /* SCSU */ 1720 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ 1721 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ 1722 "\xFB\xEE\x28", /* BOCU-1 */ 1723 "\xFF\x41\x42" /* NULL */ 1724 }; 1725 static const int len[] = { 1726 3, 1727 3, 1728 4, 1729 4, 1730 4, 1731 4, 1732 2, 1733 2, 1734 3, 1735 3, 1736 4, 1737 4, 1738 4, 1739 4, 1740 4, 1741 4, 1742 5, 1743 5, 1744 3, 1745 3 1746 }; 1747 1748 static const char* expected[] = { 1749 "UTF-16BE", 1750 "UTF-16LE", 1751 "UTF-8", 1752 "SCSU", 1753 "UTF-32BE", 1754 "UTF-32LE", 1755 "UTF-16BE", 1756 "UTF-16LE", 1757 "UTF-8", 1758 "SCSU", 1759 "UTF-32BE", 1760 "UTF-32LE", 1761 "UTF-16BE", 1762 "UTF-16LE", 1763 "UTF-8", 1764 "SCSU", 1765 "UTF-32BE", 1766 "UTF-32LE", 1767 "BOCU-1", 1768 NULL 1769 }; 1770 static const int32_t expectedLength[] ={ 1771 2, 1772 2, 1773 3, 1774 3, 1775 4, 1776 4, 1777 2, 1778 2, 1779 3, 1780 3, 1781 4, 1782 4, 1783 2, 1784 2, 1785 3, 1786 3, 1787 4, 1788 4, 1789 3, 1790 0 1791 }; 1792 int i=0; 1793 UErrorCode err; 1794 int32_t signatureLength = -1; 1795 int32_t sourceLength=-1; 1796 const char* source = NULL; 1797 const char* enc = NULL; 1798 for( ; i<sizeof(data)/sizeof(char*); i++){ 1799 err = U_ZERO_ERROR; 1800 source = data[i]; 1801 sourceLength = len[i]; 1802 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err); 1803 if(U_FAILURE(err)){ 1804 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1805 continue; 1806 } 1807 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1808 if(expected[i] !=NULL){ 1809 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1810 continue; 1811 } 1812 } 1813 if(signatureLength != expectedLength[i]){ 1814 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1815 } 1816 } 1817 } 1818 } 1819 1820 static void TestUTF7() { 1821 /* test input */ 1822 static const uint8_t in[]={ 1823 /* H - +Jjo- - ! +- +2AHcAQ */ 1824 0x48, 1825 0x2d, 1826 0x2b, 0x4a, 0x6a, 0x6f, 1827 0x2d, 0x2d, 1828 0x21, 1829 0x2b, 0x2d, 1830 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 1831 }; 1832 1833 /* expected test results */ 1834 static const int32_t results[]={ 1835 /* number of bytes read, code point */ 1836 1, 0x48, 1837 1, 0x2d, 1838 4, 0x263a, /* <WHITE SMILING FACE> */ 1839 2, 0x2d, 1840 1, 0x21, 1841 2, 0x2b, 1842 7, 0x10401 1843 }; 1844 1845 const char *cnvName; 1846 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1847 UErrorCode errorCode=U_ZERO_ERROR; 1848 UConverter *cnv=ucnv_open("UTF-7", &errorCode); 1849 if(U_FAILURE(errorCode)) { 1850 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1851 return; 1852 } 1853 TestNextUChar(cnv, source, limit, results, "UTF-7"); 1854 /* Test the condition when source >= sourceLimit */ 1855 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1856 cnvName = ucnv_getName(cnv, &errorCode); 1857 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { 1858 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1859 } 1860 ucnv_close(cnv); 1861 } 1862 1863 static void TestIMAP() { 1864 /* test input */ 1865 static const uint8_t in[]={ 1866 /* H - &Jjo- - ! &- &2AHcAQ- \ */ 1867 0x48, 1868 0x2d, 1869 0x26, 0x4a, 0x6a, 0x6f, 1870 0x2d, 0x2d, 1871 0x21, 1872 0x26, 0x2d, 1873 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d 1874 }; 1875 1876 /* expected test results */ 1877 static const int32_t results[]={ 1878 /* number of bytes read, code point */ 1879 1, 0x48, 1880 1, 0x2d, 1881 4, 0x263a, /* <WHITE SMILING FACE> */ 1882 2, 0x2d, 1883 1, 0x21, 1884 2, 0x26, 1885 7, 0x10401 1886 }; 1887 1888 const char *cnvName; 1889 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1890 UErrorCode errorCode=U_ZERO_ERROR; 1891 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); 1892 if(U_FAILURE(errorCode)) { 1893 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1894 return; 1895 } 1896 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); 1897 /* Test the condition when source >= sourceLimit */ 1898 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1899 cnvName = ucnv_getName(cnv, &errorCode); 1900 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { 1901 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1902 } 1903 ucnv_close(cnv); 1904 } 1905 1906 static void TestUTF8() { 1907 /* test input */ 1908 static const uint8_t in[]={ 1909 0x61, 1910 0xc2, 0x80, 1911 0xe0, 0xa0, 0x80, 1912 0xf0, 0x90, 0x80, 0x80, 1913 0xf4, 0x84, 0x8c, 0xa1, 1914 0xf0, 0x90, 0x90, 0x81 1915 }; 1916 1917 /* expected test results */ 1918 static const int32_t results[]={ 1919 /* number of bytes read, code point */ 1920 1, 0x61, 1921 2, 0x80, 1922 3, 0x800, 1923 4, 0x10000, 1924 4, 0x104321, 1925 4, 0x10401 1926 }; 1927 1928 /* error test input */ 1929 static const uint8_t in2[]={ 1930 0x61, 1931 0xc0, 0x80, /* illegal non-shortest form */ 1932 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1933 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1934 0xc0, 0xc0, /* illegal trail byte */ 1935 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1936 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1937 0xfe, /* illegal byte altogether */ 1938 0x62 1939 }; 1940 1941 /* expected error test results */ 1942 static const int32_t results2[]={ 1943 /* number of bytes read, code point */ 1944 1, 0x61, 1945 22, 0x62 1946 }; 1947 1948 UConverterToUCallback cb; 1949 const void *p; 1950 1951 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 1952 UErrorCode errorCode=U_ZERO_ERROR; 1953 UConverter *cnv=ucnv_open("UTF-8", &errorCode); 1954 if(U_FAILURE(errorCode)) { 1955 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); 1956 return; 1957 } 1958 TestNextUChar(cnv, source, limit, results, "UTF-8"); 1959 /* Test the condition when source >= sourceLimit */ 1960 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1961 1962 /* test error behavior with a skip callback */ 1963 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 1964 source=(const char *)in2; 1965 limit=(const char *)(in2+sizeof(in2)); 1966 TestNextUChar(cnv, source, limit, results2, "UTF-8"); 1967 1968 ucnv_close(cnv); 1969 } 1970 1971 static void TestCESU8() { 1972 /* test input */ 1973 static const uint8_t in[]={ 1974 0x61, 1975 0xc2, 0x80, 1976 0xe0, 0xa0, 0x80, 1977 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 1978 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, 1979 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1980 0xef, 0xbf, 0xbc 1981 }; 1982 1983 /* expected test results */ 1984 static const int32_t results[]={ 1985 /* number of bytes read, code point */ 1986 1, 0x61, 1987 2, 0x80, 1988 3, 0x800, 1989 6, 0x10000, 1990 3, 0xdc01, 1991 -1,0xd802, /* may read 3 or 6 bytes */ 1992 -1,0x10ffff,/* may read 0 or 3 bytes */ 1993 3, 0xfffc 1994 }; 1995 1996 /* error test input */ 1997 static const uint8_t in2[]={ 1998 0x61, 1999 0xc0, 0x80, /* illegal non-shortest form */ 2000 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 2001 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 2002 0xc0, 0xc0, /* illegal trail byte */ 2003 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ 2004 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ 2005 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ 2006 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 2007 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 2008 0xfe, /* illegal byte altogether */ 2009 0x62 2010 }; 2011 2012 /* expected error test results */ 2013 static const int32_t results2[]={ 2014 /* number of bytes read, code point */ 2015 1, 0x61, 2016 34, 0x62 2017 }; 2018 2019 UConverterToUCallback cb; 2020 const void *p; 2021 2022 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 2023 UErrorCode errorCode=U_ZERO_ERROR; 2024 UConverter *cnv=ucnv_open("CESU-8", &errorCode); 2025 if(U_FAILURE(errorCode)) { 2026 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); 2027 return; 2028 } 2029 TestNextUChar(cnv, source, limit, results, "CESU-8"); 2030 /* Test the condition when source >= sourceLimit */ 2031 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2032 2033 /* test error behavior with a skip callback */ 2034 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2035 source=(const char *)in2; 2036 limit=(const char *)(in2+sizeof(in2)); 2037 TestNextUChar(cnv, source, limit, results2, "CESU-8"); 2038 2039 ucnv_close(cnv); 2040 } 2041 2042 static void TestUTF16() { 2043 /* test input */ 2044 static const uint8_t in1[]={ 2045 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff 2046 }; 2047 static const uint8_t in2[]={ 2048 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff 2049 }; 2050 static const uint8_t in3[]={ 2051 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 2052 }; 2053 2054 /* expected test results */ 2055 static const int32_t results1[]={ 2056 /* number of bytes read, code point */ 2057 4, 0x4e00, 2058 2, 0xfeff 2059 }; 2060 static const int32_t results2[]={ 2061 /* number of bytes read, code point */ 2062 4, 0x004e, 2063 2, 0xfffe 2064 }; 2065 static const int32_t results3[]={ 2066 /* number of bytes read, code point */ 2067 2, 0xfefe, 2068 2, 0x4e00, 2069 2, 0xfeff, 2070 4, 0x20001 2071 }; 2072 2073 const char *source, *limit; 2074 2075 UErrorCode errorCode=U_ZERO_ERROR; 2076 UConverter *cnv=ucnv_open("UTF-16", &errorCode); 2077 if(U_FAILURE(errorCode)) { 2078 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); 2079 return; 2080 } 2081 2082 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2083 TestNextUChar(cnv, source, limit, results1, "UTF-16"); 2084 2085 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2086 ucnv_resetToUnicode(cnv); 2087 TestNextUChar(cnv, source, limit, results2, "UTF-16"); 2088 2089 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2090 ucnv_resetToUnicode(cnv); 2091 TestNextUChar(cnv, source, limit, results3, "UTF-16"); 2092 2093 /* Test the condition when source >= sourceLimit */ 2094 ucnv_resetToUnicode(cnv); 2095 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2096 2097 ucnv_close(cnv); 2098 } 2099 2100 static void TestUTF16BE() { 2101 /* test input */ 2102 static const uint8_t in[]={ 2103 0x00, 0x61, 2104 0x00, 0xc0, 2105 0x00, 0x31, 2106 0x00, 0xf4, 2107 0xce, 0xfe, 2108 0xd8, 0x01, 0xdc, 0x01 2109 }; 2110 2111 /* expected test results */ 2112 static const int32_t results[]={ 2113 /* number of bytes read, code point */ 2114 2, 0x61, 2115 2, 0xc0, 2116 2, 0x31, 2117 2, 0xf4, 2118 2, 0xcefe, 2119 4, 0x10401 2120 }; 2121 2122 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2123 UErrorCode errorCode=U_ZERO_ERROR; 2124 UConverter *cnv=ucnv_open("utf-16be", &errorCode); 2125 if(U_FAILURE(errorCode)) { 2126 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); 2127 return; 2128 } 2129 TestNextUChar(cnv, source, limit, results, "UTF-16BE"); 2130 /* Test the condition when source >= sourceLimit */ 2131 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2132 /*Test for the condition where there is an invalid character*/ 2133 { 2134 static const uint8_t source2[]={0x61}; 2135 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2136 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2137 } 2138 #if 0 2139 /* 2140 * Test disabled because currently the UTF-16BE/LE converters are supposed 2141 * to not set errors for unpaired surrogates. 2142 * This may change with 2143 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2144 */ 2145 2146 /*Test for the condition where there is a surrogate pair*/ 2147 { 2148 const uint8_t source2[]={0xd8, 0x01}; 2149 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2150 } 2151 #endif 2152 ucnv_close(cnv); 2153 } 2154 2155 static void 2156 TestUTF16LE() { 2157 /* test input */ 2158 static const uint8_t in[]={ 2159 0x61, 0x00, 2160 0x31, 0x00, 2161 0x4e, 0x2e, 2162 0x4e, 0x00, 2163 0x01, 0xd8, 0x01, 0xdc 2164 }; 2165 2166 /* expected test results */ 2167 static const int32_t results[]={ 2168 /* number of bytes read, code point */ 2169 2, 0x61, 2170 2, 0x31, 2171 2, 0x2e4e, 2172 2, 0x4e, 2173 4, 0x10401 2174 }; 2175 2176 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2177 UErrorCode errorCode=U_ZERO_ERROR; 2178 UConverter *cnv=ucnv_open("utf-16le", &errorCode); 2179 if(U_FAILURE(errorCode)) { 2180 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); 2181 return; 2182 } 2183 TestNextUChar(cnv, source, limit, results, "UTF-16LE"); 2184 /* Test the condition when source >= sourceLimit */ 2185 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2186 /*Test for the condition where there is an invalid character*/ 2187 { 2188 static const uint8_t source2[]={0x61}; 2189 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2190 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2191 } 2192 #if 0 2193 /* 2194 * Test disabled because currently the UTF-16BE/LE converters are supposed 2195 * to not set errors for unpaired surrogates. 2196 * This may change with 2197 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2198 */ 2199 2200 /*Test for the condition where there is a surrogate character*/ 2201 { 2202 static const uint8_t source2[]={0x01, 0xd8}; 2203 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2204 } 2205 #endif 2206 2207 ucnv_close(cnv); 2208 } 2209 2210 static void TestUTF32() { 2211 /* test input */ 2212 static const uint8_t in1[]={ 2213 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff 2214 }; 2215 static const uint8_t in2[]={ 2216 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 2217 }; 2218 static const uint8_t in3[]={ 2219 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 2220 }; 2221 2222 /* expected test results */ 2223 static const int32_t results1[]={ 2224 /* number of bytes read, code point */ 2225 8, 0x100f00, 2226 4, 0xfeff 2227 }; 2228 static const int32_t results2[]={ 2229 /* number of bytes read, code point */ 2230 8, 0x0f1000, 2231 4, 0xfffe 2232 }; 2233 static const int32_t results3[]={ 2234 /* number of bytes read, code point */ 2235 4, 0xfefe, 2236 4, 0x100f00, 2237 4, 0xfffd, /* unmatched surrogate */ 2238 4, 0xfffd /* unmatched surrogate */ 2239 }; 2240 2241 const char *source, *limit; 2242 2243 UErrorCode errorCode=U_ZERO_ERROR; 2244 UConverter *cnv=ucnv_open("UTF-32", &errorCode); 2245 if(U_FAILURE(errorCode)) { 2246 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); 2247 return; 2248 } 2249 2250 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2251 TestNextUChar(cnv, source, limit, results1, "UTF-32"); 2252 2253 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2254 ucnv_resetToUnicode(cnv); 2255 TestNextUChar(cnv, source, limit, results2, "UTF-32"); 2256 2257 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2258 ucnv_resetToUnicode(cnv); 2259 TestNextUChar(cnv, source, limit, results3, "UTF-32"); 2260 2261 /* Test the condition when source >= sourceLimit */ 2262 ucnv_resetToUnicode(cnv); 2263 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2264 2265 ucnv_close(cnv); 2266 } 2267 2268 static void 2269 TestUTF32BE() { 2270 /* test input */ 2271 static const uint8_t in[]={ 2272 0x00, 0x00, 0x00, 0x61, 2273 0x00, 0x00, 0x30, 0x61, 2274 0x00, 0x00, 0xdc, 0x00, 2275 0x00, 0x00, 0xd8, 0x00, 2276 0x00, 0x00, 0xdf, 0xff, 2277 0x00, 0x00, 0xff, 0xfe, 2278 0x00, 0x10, 0xab, 0xcd, 2279 0x00, 0x10, 0xff, 0xff 2280 }; 2281 2282 /* expected test results */ 2283 static const int32_t results[]={ 2284 /* number of bytes read, code point */ 2285 4, 0x61, 2286 4, 0x3061, 2287 4, 0xfffd, 2288 4, 0xfffd, 2289 4, 0xfffd, 2290 4, 0xfffe, 2291 4, 0x10abcd, 2292 4, 0x10ffff 2293 }; 2294 2295 /* error test input */ 2296 static const uint8_t in2[]={ 2297 0x00, 0x00, 0x00, 0x61, 2298 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 2299 0x00, 0x00, 0x00, 0x62, 2300 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2301 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 2302 0x00, 0x00, 0x01, 0x62, 2303 0x00, 0x00, 0x02, 0x62 2304 }; 2305 2306 /* expected error test results */ 2307 static const int32_t results2[]={ 2308 /* number of bytes read, code point */ 2309 4, 0x61, 2310 8, 0x62, 2311 12, 0x162, 2312 4, 0x262 2313 }; 2314 2315 UConverterToUCallback cb; 2316 const void *p; 2317 2318 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2319 UErrorCode errorCode=U_ZERO_ERROR; 2320 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); 2321 if(U_FAILURE(errorCode)) { 2322 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); 2323 return; 2324 } 2325 TestNextUChar(cnv, source, limit, results, "UTF-32BE"); 2326 2327 /* Test the condition when source >= sourceLimit */ 2328 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2329 2330 /* test error behavior with a skip callback */ 2331 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2332 source=(const char *)in2; 2333 limit=(const char *)(in2+sizeof(in2)); 2334 TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); 2335 2336 ucnv_close(cnv); 2337 } 2338 2339 static void 2340 TestUTF32LE() { 2341 /* test input */ 2342 static const uint8_t in[]={ 2343 0x61, 0x00, 0x00, 0x00, 2344 0x61, 0x30, 0x00, 0x00, 2345 0x00, 0xdc, 0x00, 0x00, 2346 0x00, 0xd8, 0x00, 0x00, 2347 0xff, 0xdf, 0x00, 0x00, 2348 0xfe, 0xff, 0x00, 0x00, 2349 0xcd, 0xab, 0x10, 0x00, 2350 0xff, 0xff, 0x10, 0x00 2351 }; 2352 2353 /* expected test results */ 2354 static const int32_t results[]={ 2355 /* number of bytes read, code point */ 2356 4, 0x61, 2357 4, 0x3061, 2358 4, 0xfffd, 2359 4, 0xfffd, 2360 4, 0xfffd, 2361 4, 0xfffe, 2362 4, 0x10abcd, 2363 4, 0x10ffff 2364 }; 2365 2366 /* error test input */ 2367 static const uint8_t in2[]={ 2368 0x61, 0x00, 0x00, 0x00, 2369 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 2370 0x62, 0x00, 0x00, 0x00, 2371 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2372 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 2373 0x62, 0x01, 0x00, 0x00, 2374 0x62, 0x02, 0x00, 0x00, 2375 }; 2376 2377 /* expected error test results */ 2378 static const int32_t results2[]={ 2379 /* number of bytes read, code point */ 2380 4, 0x61, 2381 8, 0x62, 2382 12, 0x162, 2383 4, 0x262, 2384 }; 2385 2386 UConverterToUCallback cb; 2387 const void *p; 2388 2389 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2390 UErrorCode errorCode=U_ZERO_ERROR; 2391 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); 2392 if(U_FAILURE(errorCode)) { 2393 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); 2394 return; 2395 } 2396 TestNextUChar(cnv, source, limit, results, "UTF-32LE"); 2397 2398 /* Test the condition when source >= sourceLimit */ 2399 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2400 2401 /* test error behavior with a skip callback */ 2402 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2403 source=(const char *)in2; 2404 limit=(const char *)(in2+sizeof(in2)); 2405 TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); 2406 2407 ucnv_close(cnv); 2408 } 2409 2410 static void 2411 TestLATIN1() { 2412 /* test input */ 2413 static const uint8_t in[]={ 2414 0x61, 2415 0x31, 2416 0x32, 2417 0xc0, 2418 0xf0, 2419 0xf4, 2420 }; 2421 2422 /* expected test results */ 2423 static const int32_t results[]={ 2424 /* number of bytes read, code point */ 2425 1, 0x61, 2426 1, 0x31, 2427 1, 0x32, 2428 1, 0xc0, 2429 1, 0xf0, 2430 1, 0xf4, 2431 }; 2432 static const uint16_t in1[] = { 2433 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2434 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2435 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2436 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2437 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2438 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2439 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2440 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2441 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2442 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2443 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2444 0xcb, 0x82 2445 }; 2446 static const uint8_t out1[] = { 2447 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2448 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2449 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2450 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2451 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2452 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2453 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2454 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2455 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2456 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2457 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2458 0xcb, 0x82 2459 }; 2460 static const uint16_t in2[]={ 2461 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2462 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2463 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2464 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2465 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2466 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2467 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2468 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2469 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2470 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2471 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2472 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2473 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2474 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2475 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2476 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2477 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2478 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2479 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2480 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2481 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2482 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2483 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2484 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2485 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2486 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2487 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2488 0x37, 0x20, 0x2A, 0x2F, 2489 }; 2490 static const unsigned char out2[]={ 2491 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2492 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2493 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2494 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2495 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2496 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2497 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2498 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2499 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2500 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2501 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2502 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2503 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2504 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2505 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2506 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2507 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2508 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2509 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2510 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2511 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2512 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2513 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2514 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2515 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2516 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2517 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2518 0x37, 0x20, 0x2A, 0x2F, 2519 }; 2520 const char *source=(const char *)in; 2521 const char *limit=(const char *)in+sizeof(in); 2522 2523 UErrorCode errorCode=U_ZERO_ERROR; 2524 UConverter *cnv=ucnv_open("LATIN_1", &errorCode); 2525 if(U_FAILURE(errorCode)) { 2526 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode)); 2527 return; 2528 } 2529 TestNextUChar(cnv, source, limit, results, "LATIN_1"); 2530 /* Test the condition when source >= sourceLimit */ 2531 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2532 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1)); 2533 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2)); 2534 2535 ucnv_close(cnv); 2536 } 2537 2538 static void 2539 TestSBCS() { 2540 /* test input */ 2541 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; 2542 /* expected test results */ 2543 static const int32_t results[]={ 2544 /* number of bytes read, code point */ 2545 1, 0x61, 2546 1, 0xbf, 2547 1, 0xc4, 2548 1, 0x2021, 2549 1, 0xf8ff, 2550 1, 0x00d9 2551 }; 2552 2553 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2554 UErrorCode errorCode=U_ZERO_ERROR; 2555 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); 2556 if(U_FAILURE(errorCode)) { 2557 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode)); 2558 return; 2559 } 2560 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); 2561 /* Test the condition when source >= sourceLimit */ 2562 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2563 /*Test for Illegal character */ /* 2564 { 2565 static const uint8_t input1[]={ 0xA1 }; 2566 const char* illegalsource=(const char*)input1; 2567 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte"); 2568 } 2569 */ 2570 ucnv_close(cnv); 2571 } 2572 2573 static void 2574 TestDBCS() { 2575 /* test input */ 2576 static const uint8_t in[]={ 2577 0x44, 0x6a, 2578 0xc4, 0x9c, 2579 0x7a, 0x74, 2580 0x46, 0xab, 2581 0x42, 0x5b, 2582 2583 }; 2584 2585 /* expected test results */ 2586 static const int32_t results[]={ 2587 /* number of bytes read, code point */ 2588 2, 0x00a7, 2589 2, 0xe1d2, 2590 2, 0x6962, 2591 2, 0xf842, 2592 2, 0xffe5, 2593 }; 2594 2595 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2596 UErrorCode errorCode=U_ZERO_ERROR; 2597 2598 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); 2599 if(U_FAILURE(errorCode)) { 2600 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode)); 2601 return; 2602 } 2603 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); 2604 /* Test the condition when source >= sourceLimit */ 2605 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2606 /*Test for the condition where there is an invalid character*/ 2607 { 2608 static const uint8_t source2[]={0x1a, 0x1b}; 2609 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2610 } 2611 /*Test for the condition where we have a truncated char*/ 2612 { 2613 static const uint8_t source1[]={0xc4}; 2614 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2615 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2616 } 2617 ucnv_close(cnv); 2618 } 2619 2620 static void 2621 TestMBCS() { 2622 /* test input */ 2623 static const uint8_t in[]={ 2624 0x01, 2625 0xa6, 0xa3, 2626 0x00, 2627 0xa6, 0xa1, 2628 0x08, 2629 0xc2, 0x76, 2630 0xc2, 0x78, 2631 2632 }; 2633 2634 /* expected test results */ 2635 static const int32_t results[]={ 2636 /* number of bytes read, code point */ 2637 1, 0x0001, 2638 2, 0x250c, 2639 1, 0x0000, 2640 2, 0x2500, 2641 1, 0x0008, 2642 2, 0xd60c, 2643 2, 0xd60e, 2644 }; 2645 2646 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2647 UErrorCode errorCode=U_ZERO_ERROR; 2648 2649 UConverter *cnv=ucnv_open("ibm-1363", &errorCode); 2650 if(U_FAILURE(errorCode)) { 2651 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode)); 2652 return; 2653 } 2654 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); 2655 /* Test the condition when source >= sourceLimit */ 2656 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2657 /*Test for the condition where there is an invalid character*/ 2658 { 2659 static const uint8_t source2[]={0xa1, 0x80}; 2660 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2661 } 2662 /*Test for the condition where we have a truncated char*/ 2663 { 2664 static const uint8_t source1[]={0xc4}; 2665 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2666 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2667 } 2668 ucnv_close(cnv); 2669 2670 } 2671 2672 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 2673 static void 2674 TestICCRunout() { 2675 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */ 2676 2677 const char *cnvName = "ibm-1363"; 2678 UErrorCode status = U_ZERO_ERROR; 2679 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 }; 2680 /* UChar expectUData[] = { 0x00a1, 0x001a }; */ 2681 const char *source = sourceData; 2682 const char *sourceLim = sourceData+sizeof(sourceData); 2683 UChar c1, c2, c3; 2684 UConverter *cnv=ucnv_open(cnvName, &status); 2685 if(U_FAILURE(status)) { 2686 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status)); 2687 return; 2688 } 2689 2690 #if 0 2691 { 2692 UChar targetBuf[256]; 2693 UChar *target = targetBuf; 2694 UChar *targetLim = target+256; 2695 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status); 2696 2697 log_info("After convert: target@%d, source@%d, status%s\n", 2698 target-targetBuf, source-sourceData, u_errorName(status)); 2699 2700 if(U_FAILURE(status)) { 2701 log_err("Failed to convert: %s\n", u_errorName(status)); 2702 } else { 2703 2704 } 2705 } 2706 #endif 2707 2708 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2709 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status)); 2710 2711 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2712 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status)); 2713 2714 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2715 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status)); 2716 2717 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { 2718 log_verbose("OK\n"); 2719 } else { 2720 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n"); 2721 } 2722 2723 ucnv_close(cnv); 2724 2725 } 2726 #endif 2727 2728 #ifdef U_ENABLE_GENERIC_ISO_2022 2729 2730 static void 2731 TestISO_2022() { 2732 /* test input */ 2733 static const uint8_t in[]={ 2734 0x1b, 0x25, 0x42, 2735 0x31, 2736 0x32, 2737 0x61, 2738 0xc2, 0x80, 2739 0xe0, 0xa0, 0x80, 2740 0xf0, 0x90, 0x80, 0x80 2741 }; 2742 2743 2744 2745 /* expected test results */ 2746 static const int32_t results[]={ 2747 /* number of bytes read, code point */ 2748 4, 0x0031, /* 4 bytes including the escape sequence */ 2749 1, 0x0032, 2750 1, 0x61, 2751 2, 0x80, 2752 3, 0x800, 2753 4, 0x10000 2754 }; 2755 2756 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2757 UErrorCode errorCode=U_ZERO_ERROR; 2758 UConverter *cnv; 2759 2760 cnv=ucnv_open("ISO_2022", &errorCode); 2761 if(U_FAILURE(errorCode)) { 2762 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 2763 return; 2764 } 2765 TestNextUChar(cnv, source, limit, results, "ISO_2022"); 2766 2767 /* Test the condition when source >= sourceLimit */ 2768 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source"); 2769 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2770 /*Test for the condition where we have a truncated char*/ 2771 { 2772 static const uint8_t source1[]={0xc4}; 2773 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2774 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2775 } 2776 /*Test for the condition where there is an invalid character*/ 2777 { 2778 static const uint8_t source2[]={0xa1, 0x01}; 2779 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); 2780 } 2781 ucnv_close(cnv); 2782 } 2783 2784 #endif 2785 2786 static void 2787 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2788 const UChar* uSource; 2789 const UChar* uSourceLimit; 2790 const char* cSource; 2791 const char* cSourceLimit; 2792 UChar *uTargetLimit =NULL; 2793 UChar *uTarget; 2794 char *cTarget; 2795 const char *cTargetLimit; 2796 char *cBuf; 2797 UChar *uBuf; /*,*test;*/ 2798 int32_t uBufSize = 120; 2799 int len=0; 2800 int i=2; 2801 UErrorCode errorCode=U_ZERO_ERROR; 2802 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2803 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2804 ucnv_reset(cnv); 2805 for(;--i>0; ){ 2806 uSource = (UChar*) source; 2807 uSourceLimit=(const UChar*)sourceLimit; 2808 cTarget = cBuf; 2809 uTarget = uBuf; 2810 cSource = cBuf; 2811 cTargetLimit = cBuf; 2812 uTargetLimit = uBuf; 2813 2814 do{ 2815 2816 cTargetLimit = cTargetLimit+ i; 2817 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2818 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2819 errorCode=U_ZERO_ERROR; 2820 continue; 2821 } 2822 2823 if(U_FAILURE(errorCode)){ 2824 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2825 return; 2826 } 2827 2828 }while (uSource<uSourceLimit); 2829 2830 cSourceLimit =cTarget; 2831 do{ 2832 uTargetLimit=uTargetLimit+i; 2833 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2834 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2835 errorCode=U_ZERO_ERROR; 2836 continue; 2837 } 2838 if(U_FAILURE(errorCode)){ 2839 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2840 return; 2841 } 2842 }while(cSource<cSourceLimit); 2843 2844 uSource = source; 2845 /*test =uBuf;*/ 2846 for(len=0;len<(int)(source - sourceLimit);len++){ 2847 if(uBuf[len]!=uSource[len]){ 2848 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2849 } 2850 } 2851 } 2852 free(uBuf); 2853 free(cBuf); 2854 } 2855 /* Test for Jitterbug 778 */ 2856 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2857 const UChar* uSource; 2858 const UChar* uSourceLimit; 2859 const char* cSource; 2860 UChar *uTargetLimit =NULL; 2861 UChar *uTarget; 2862 char *cTarget; 2863 const char *cTargetLimit; 2864 char *cBuf; 2865 UChar *uBuf,*test; 2866 int32_t uBufSize = 120; 2867 int numCharsInTarget=0; 2868 UErrorCode errorCode=U_ZERO_ERROR; 2869 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2870 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 2871 uSource = source; 2872 uSourceLimit=sourceLimit; 2873 cTarget = cBuf; 2874 cTargetLimit = cBuf +uBufSize*5; 2875 uTarget = uBuf; 2876 uTargetLimit = uBuf+ uBufSize*5; 2877 ucnv_reset(cnv); 2878 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); 2879 if(U_FAILURE(errorCode)){ 2880 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2881 return; 2882 } 2883 cSource = cBuf; 2884 test =uBuf; 2885 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode); 2886 if(U_FAILURE(errorCode)){ 2887 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode)); 2888 return; 2889 } 2890 uSource = source; 2891 while(uSource<uSourceLimit){ 2892 if(*test!=*uSource){ 2893 2894 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 2895 } 2896 uSource++; 2897 test++; 2898 } 2899 free(uBuf); 2900 free(cBuf); 2901 } 2902 2903 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2904 const UChar* uSource; 2905 const UChar* uSourceLimit; 2906 const char* cSource; 2907 const char* cSourceLimit; 2908 UChar *uTargetLimit =NULL; 2909 UChar *uTarget; 2910 char *cTarget; 2911 const char *cTargetLimit; 2912 char *cBuf; 2913 UChar *uBuf; /*,*test;*/ 2914 int32_t uBufSize = 120; 2915 int len=0; 2916 int i=2; 2917 const UChar *temp = sourceLimit; 2918 UErrorCode errorCode=U_ZERO_ERROR; 2919 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2920 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2921 2922 ucnv_reset(cnv); 2923 for(;--i>0;){ 2924 uSource = (UChar*) source; 2925 cTarget = cBuf; 2926 uTarget = uBuf; 2927 cSource = cBuf; 2928 cTargetLimit = cBuf; 2929 uTargetLimit = uBuf+uBufSize*5; 2930 cTargetLimit = cTargetLimit+uBufSize*10; 2931 uSourceLimit=uSource; 2932 do{ 2933 2934 if (uSourceLimit < sourceLimit) { 2935 uSourceLimit = uSourceLimit+1; 2936 } 2937 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2938 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2939 errorCode=U_ZERO_ERROR; 2940 continue; 2941 } 2942 2943 if(U_FAILURE(errorCode)){ 2944 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2945 return; 2946 } 2947 2948 }while (uSource<temp); 2949 2950 cSourceLimit =cBuf; 2951 do{ 2952 if (cSourceLimit < cBuf + (cTarget - cBuf)) { 2953 cSourceLimit = cSourceLimit+1; 2954 } 2955 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2956 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2957 errorCode=U_ZERO_ERROR; 2958 continue; 2959 } 2960 if(U_FAILURE(errorCode)){ 2961 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2962 return; 2963 } 2964 }while(cSource<cTarget); 2965 2966 uSource = source; 2967 /*test =uBuf;*/ 2968 for(;len<(int)(source - sourceLimit);len++){ 2969 if(uBuf[len]!=uSource[len]){ 2970 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2971 } 2972 } 2973 } 2974 free(uBuf); 2975 free(cBuf); 2976 } 2977 static void 2978 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, 2979 const uint16_t results[], const char* message){ 2980 /* const char* s0; */ 2981 const char* s=(char*)source; 2982 const uint16_t *r=results; 2983 UErrorCode errorCode=U_ZERO_ERROR; 2984 uint32_t c,exC; 2985 ucnv_reset(cnv); 2986 while(s<limit) { 2987 /* s0=s; */ 2988 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 2989 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 2990 break; /* no more significant input */ 2991 } else if(U_FAILURE(errorCode)) { 2992 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 2993 break; 2994 } else { 2995 if(UTF_IS_FIRST_SURROGATE(*r)){ 2996 int i =0, len = 2; 2997 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE); 2998 r++; 2999 }else{ 3000 exC = *r; 3001 } 3002 if(c!=(uint32_t)(exC)) 3003 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c); 3004 } 3005 r++; 3006 } 3007 } 3008 3009 static int TestJitterbug930(const char* enc){ 3010 UErrorCode err = U_ZERO_ERROR; 3011 UConverter*converter; 3012 char out[80]; 3013 char*target = out; 3014 UChar in[4]; 3015 const UChar*source = in; 3016 int32_t off[80]; 3017 int32_t* offsets = off; 3018 int numOffWritten=0; 3019 UBool flush = 0; 3020 converter = my_ucnv_open(enc, &err); 3021 3022 in[0] = 0x41; /* 0x4E00;*/ 3023 in[1] = 0x4E01; 3024 in[2] = 0x4E02; 3025 in[3] = 0x4E03; 3026 3027 memset(off, '*', sizeof(off)); 3028 3029 ucnv_fromUnicode (converter, 3030 &target, 3031 target+2, 3032 &source, 3033 source+3, 3034 offsets, 3035 flush, 3036 &err); 3037 3038 /* writes three bytes into the output buffer: 41 1B 24 3039 * but offsets contains 0 1 1 3040 */ 3041 while(*offsets< off[10]){ 3042 numOffWritten++; 3043 offsets++; 3044 } 3045 log_verbose("Testing Jitterbug 930 for encoding %s",enc); 3046 if(numOffWritten!= (int)(target-out)){ 3047 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten); 3048 } 3049 3050 err = U_ZERO_ERROR; 3051 3052 memset(off,'*' , sizeof(off)); 3053 3054 flush = 1; 3055 offsets=off; 3056 ucnv_fromUnicode (converter, 3057 &target, 3058 target+4, 3059 &source, 3060 source, 3061 offsets, 3062 flush, 3063 &err); 3064 numOffWritten=0; 3065 while(*offsets< off[10]){ 3066 numOffWritten++; 3067 if(*offsets!= -1){ 3068 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ; 3069 } 3070 offsets++; 3071 } 3072 3073 /* writes 42 43 7A into output buffer, 3074 * offsets contains -1 -1 -1 3075 */ 3076 ucnv_close(converter); 3077 return 0; 3078 } 3079 3080 static void 3081 TestHZ() { 3082 /* test input */ 3083 static const uint16_t in[]={ 3084 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 3085 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 3086 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 3087 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 3088 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 3089 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 3090 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 3091 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 3092 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 3093 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 3094 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 3095 0x005A, 0x005B, 0x005C, 0x000A 3096 }; 3097 const UChar* uSource; 3098 const UChar* uSourceLimit; 3099 const char* cSource; 3100 const char* cSourceLimit; 3101 UChar *uTargetLimit =NULL; 3102 UChar *uTarget; 3103 char *cTarget; 3104 const char *cTargetLimit; 3105 char *cBuf; 3106 UChar *uBuf,*test; 3107 int32_t uBufSize = 120; 3108 UErrorCode errorCode=U_ZERO_ERROR; 3109 UConverter *cnv; 3110 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3111 int32_t* myOff= offsets; 3112 cnv=ucnv_open("HZ", &errorCode); 3113 if(U_FAILURE(errorCode)) { 3114 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)); 3115 return; 3116 } 3117 3118 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3119 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3120 uSource = (const UChar*)in; 3121 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3122 cTarget = cBuf; 3123 cTargetLimit = cBuf +uBufSize*5; 3124 uTarget = uBuf; 3125 uTargetLimit = uBuf+ uBufSize*5; 3126 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3127 if(U_FAILURE(errorCode)){ 3128 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3129 return; 3130 } 3131 cSource = cBuf; 3132 cSourceLimit =cTarget; 3133 test =uBuf; 3134 myOff=offsets; 3135 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3136 if(U_FAILURE(errorCode)){ 3137 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3138 return; 3139 } 3140 uSource = (const UChar*)in; 3141 while(uSource<uSourceLimit){ 3142 if(*test!=*uSource){ 3143 3144 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3145 } 3146 uSource++; 3147 test++; 3148 } 3149 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); 3150 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3151 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3152 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3153 TestJitterbug930("csISO2022JP"); 3154 ucnv_close(cnv); 3155 free(offsets); 3156 free(uBuf); 3157 free(cBuf); 3158 } 3159 3160 static void 3161 TestISCII(){ 3162 /* test input */ 3163 static const uint16_t in[]={ 3164 /* test full range of Devanagari */ 3165 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, 3166 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, 3167 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, 3168 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, 3169 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, 3170 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, 3171 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, 3172 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, 3173 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, 3174 0x096D,0x096E,0x096F, 3175 /* test Soft halant*/ 3176 0x0915,0x094d, 0x200D, 3177 /* test explicit halant */ 3178 0x0915,0x094d, 0x200c, 3179 /* test double danda */ 3180 0x965, 3181 /* test ASCII */ 3182 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3183 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3184 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3185 /* tests from Lotus */ 3186 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, 3187 0x0930,0x094D,0x200D, 3188 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, 3189 0x0915,0x0921,0x002B,0x095F, 3190 /* tamil range */ 3191 0x0B86, 0xB87, 0xB88, 3192 /* telugu range */ 3193 0x0C05, 0x0C02, 0x0C03,0x0c31, 3194 /* kannada range */ 3195 0x0C85, 0xC82, 0x0C83, 3196 /* test Abbr sign and Anudatta */ 3197 0x0970, 0x952, 3198 /* 0x0958, 3199 0x0959, 3200 0x095A, 3201 0x095B, 3202 0x095C, 3203 0x095D, 3204 0x095E, 3205 0x095F,*/ 3206 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, 3207 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, 3208 0x090C , 3209 0x0962, 3210 0x0961 /* Vocallic LL 0xa6, 0xE9 */, 3211 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, 3212 0x0950 /* OM Symbol 0xa1, 0xE9,*/, 3213 0x093D /* Avagraha 0xEA, 0xE9*/, 3214 0x0958, 3215 0x0959, 3216 0x095A, 3217 0x095B, 3218 0x095C, 3219 0x095D, 3220 0x095E, 3221 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 3222 }; 3223 static const unsigned char byteArr[]={ 3224 3225 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, 3226 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, 3227 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, 3228 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, 3229 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, 3230 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 3231 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, 3232 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, 3233 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, 3234 0xf8,0xf9,0xfa, 3235 /* test soft halant */ 3236 0xb3, 0xE8, 0xE9, 3237 /* test explicit halant */ 3238 0xb3, 0xE8, 0xE8, 3239 /* test double danda */ 3240 0xea, 0xea, 3241 /* test ASCII */ 3242 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3243 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3244 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3245 /* test ATR code */ 3246 3247 /* tests from Lotus */ 3248 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, 3249 0xEF,0x42,0xCF,0xE8,0xD9, 3250 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, 3251 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, 3252 /* tamil range */ 3253 0xEF, 0x44, 0xa5, 0xa6, 0xa7, 3254 /* telugu range */ 3255 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, 3256 /* kannada range */ 3257 0xEF, 0x48,0xa4, 0xa2, 0xa3, 3258 /* anudatta and abbreviation sign */ 3259 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, 3260 3261 3262 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ 3263 3264 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ 3265 3266 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ 3267 3268 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ 3269 3270 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ 3271 3272 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ 3273 3274 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ 3275 3276 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ 3277 3278 0xB3, 0xE9, /* Ka + NUKTA */ 3279 3280 0xB4, 0xE9, /* Kha + NUKTA */ 3281 3282 0xB5, 0xE9, /* Ga + NUKTA */ 3283 3284 0xBA, 0xE9, 3285 3286 0xBF, 0xE9, 3287 3288 0xC0, 0xE9, 3289 3290 0xC9, 0xE9, 3291 /* INV halant RA */ 3292 0xD9, 0xE8, 0xCF, 3293 0x00, 0x00A0, 3294 /* just consume unhandled codepoints */ 3295 0xEF, 0x30, 3296 3297 }; 3298 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE); 3299 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr)); 3300 3301 } 3302 3303 static void 3304 TestISO_2022_JP() { 3305 /* test input */ 3306 static const uint16_t in[]={ 3307 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, 3308 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3309 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3310 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3311 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3312 0x201D, 0x3014, 0x000D, 0x000A, 3313 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3314 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3315 }; 3316 const UChar* uSource; 3317 const UChar* uSourceLimit; 3318 const char* cSource; 3319 const char* cSourceLimit; 3320 UChar *uTargetLimit =NULL; 3321 UChar *uTarget; 3322 char *cTarget; 3323 const char *cTargetLimit; 3324 char *cBuf; 3325 UChar *uBuf,*test; 3326 int32_t uBufSize = 120; 3327 UErrorCode errorCode=U_ZERO_ERROR; 3328 UConverter *cnv; 3329 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3330 int32_t* myOff= offsets; 3331 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3332 if(U_FAILURE(errorCode)) { 3333 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode)); 3334 return; 3335 } 3336 3337 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3338 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3339 uSource = (const UChar*)in; 3340 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3341 cTarget = cBuf; 3342 cTargetLimit = cBuf +uBufSize*5; 3343 uTarget = uBuf; 3344 uTargetLimit = uBuf+ uBufSize*5; 3345 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3346 if(U_FAILURE(errorCode)){ 3347 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3348 return; 3349 } 3350 cSource = cBuf; 3351 cSourceLimit =cTarget; 3352 test =uBuf; 3353 myOff=offsets; 3354 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3355 if(U_FAILURE(errorCode)){ 3356 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3357 return; 3358 } 3359 3360 uSource = (const UChar*)in; 3361 while(uSource<uSourceLimit){ 3362 if(*test!=*uSource){ 3363 3364 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3365 } 3366 uSource++; 3367 test++; 3368 } 3369 3370 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3371 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3372 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); 3373 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3374 TestJitterbug930("csISO2022JP"); 3375 ucnv_close(cnv); 3376 free(uBuf); 3377 free(cBuf); 3378 free(offsets); 3379 } 3380 3381 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){ 3382 const UChar* uSource; 3383 const UChar* uSourceLimit; 3384 const char* cSource; 3385 const char* cSourceLimit; 3386 UChar *uTargetLimit =NULL; 3387 UChar *uTarget; 3388 char *cTarget; 3389 const char *cTargetLimit; 3390 char *cBuf; 3391 UChar *uBuf,*test; 3392 int32_t uBufSize = 120*10; 3393 UErrorCode errorCode=U_ZERO_ERROR; 3394 UConverter *cnv; 3395 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); 3396 int32_t* myOff= offsets; 3397 cnv=my_ucnv_open(conv, &errorCode); 3398 if(U_FAILURE(errorCode)) { 3399 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode)); 3400 return; 3401 } 3402 3403 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); 3404 cBuf =(char*)malloc(uBufSize * sizeof(char)); 3405 uSource = (const UChar*)in; 3406 uSourceLimit=uSource+len; 3407 cTarget = cBuf; 3408 cTargetLimit = cBuf +uBufSize; 3409 uTarget = uBuf; 3410 uTargetLimit = uBuf+ uBufSize; 3411 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3412 if(U_FAILURE(errorCode)){ 3413 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3414 return; 3415 } 3416 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/ 3417 cSource = cBuf; 3418 cSourceLimit =cTarget; 3419 test =uBuf; 3420 myOff=offsets; 3421 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3422 if(U_FAILURE(errorCode)){ 3423 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode)); 3424 return; 3425 } 3426 3427 uSource = (const UChar*)in; 3428 while(uSource<uSourceLimit){ 3429 if(*test!=*uSource){ 3430 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ; 3431 } 3432 uSource++; 3433 test++; 3434 } 3435 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); 3436 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); 3437 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); 3438 if(byteArr && byteArrLen!=0){ 3439 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); 3440 TestToAndFromUChars(in,(const UChar*)&in[len],cnv); 3441 { 3442 cSource = byteArr; 3443 cSourceLimit = cSource+byteArrLen; 3444 test=uBuf; 3445 myOff = offsets; 3446 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3447 if(U_FAILURE(errorCode)){ 3448 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3449 return; 3450 } 3451 3452 uSource = (const UChar*)in; 3453 while(uSource<uSourceLimit){ 3454 if(*test!=*uSource){ 3455 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3456 } 3457 uSource++; 3458 test++; 3459 } 3460 } 3461 } 3462 3463 ucnv_close(cnv); 3464 free(uBuf); 3465 free(cBuf); 3466 free(offsets); 3467 } 3468 static UChar U_CALLCONV 3469 _charAt(int32_t offset, void *context) { 3470 return ((char*)context)[offset]; 3471 } 3472 3473 static int32_t 3474 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){ 3475 int32_t srcIndex=0; 3476 int32_t dstIndex=0; 3477 if(U_FAILURE(*status)){ 3478 return 0; 3479 } 3480 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ 3481 *status = U_ILLEGAL_ARGUMENT_ERROR; 3482 return 0; 3483 } 3484 if(srcLen==-1){ 3485 srcLen = (int32_t)uprv_strlen(src); 3486 } 3487 3488 for (; srcIndex<srcLen; ) { 3489 UChar32 c = src[srcIndex++]; 3490 if (c == 0x005C /*'\\'*/) { 3491 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/ 3492 if (c == (UChar32)0xFFFFFFFF) { 3493 *status=U_INVALID_CHAR_FOUND; /* return empty string */ 3494 break; /* invalid escape sequence */ 3495 } 3496 } 3497 if(dstIndex < dstLen){ 3498 if(c>0xFFFF){ 3499 dst[dstIndex++] = UTF16_LEAD(c); 3500 if(dstIndex<dstLen){ 3501 dst[dstIndex]=UTF16_TRAIL(c); 3502 }else{ 3503 *status=U_BUFFER_OVERFLOW_ERROR; 3504 } 3505 }else{ 3506 dst[dstIndex]=(UChar)c; 3507 } 3508 3509 }else{ 3510 *status = U_BUFFER_OVERFLOW_ERROR; 3511 } 3512 dstIndex++; /* for preflighting */ 3513 } 3514 return dstIndex; 3515 } 3516 3517 static void 3518 TestFullRoundtrip(const char* cp){ 3519 UChar usource[10] ={0}; 3520 UChar nsrc[10] = {0}; 3521 uint32_t i=1; 3522 int len=0, ulen; 3523 nsrc[0]=0x0061; 3524 /* Test codepoint 0 */ 3525 TestConv(usource,1,cp,"",NULL,0); 3526 TestConv(usource,2,cp,"",NULL,0); 3527 nsrc[2]=0x5555; 3528 TestConv(nsrc,3,cp,"",NULL,0); 3529 3530 for(;i<=0x10FFFF;i++){ 3531 if(i==0xD800){ 3532 i=0xDFFF; 3533 continue; 3534 } 3535 if(i<=0xFFFF){ 3536 usource[0] =(UChar) i; 3537 len=1; 3538 }else{ 3539 usource[0]=UTF16_LEAD(i); 3540 usource[1]=UTF16_TRAIL(i); 3541 len=2; 3542 } 3543 ulen=len; 3544 if(i==0x80) { 3545 usource[2]=0; 3546 } 3547 /* Test only single code points */ 3548 TestConv(usource,ulen,cp,"",NULL,0); 3549 /* Test codepoint repeated twice */ 3550 usource[ulen]=usource[0]; 3551 usource[ulen+1]=usource[1]; 3552 ulen+=len; 3553 TestConv(usource,ulen,cp,"",NULL,0); 3554 /* Test codepoint repeated 3 times */ 3555 usource[ulen]=usource[0]; 3556 usource[ulen+1]=usource[1]; 3557 ulen+=len; 3558 TestConv(usource,ulen,cp,"",NULL,0); 3559 /* Test codepoint in between 2 codepoints */ 3560 nsrc[1]=usource[0]; 3561 nsrc[2]=usource[1]; 3562 nsrc[len+1]=0x5555; 3563 TestConv(nsrc,len+2,cp,"",NULL,0); 3564 uprv_memset(usource,0,sizeof(UChar)*10); 3565 } 3566 } 3567 3568 static void 3569 TestRoundTrippingAllUTF(void){ 3570 if(!getTestOption(QUICK_OPTION)){ 3571 log_verbose("Running exhaustive round trip test for BOCU-1\n"); 3572 TestFullRoundtrip("BOCU-1"); 3573 log_verbose("Running exhaustive round trip test for SCSU\n"); 3574 TestFullRoundtrip("SCSU"); 3575 log_verbose("Running exhaustive round trip test for UTF-8\n"); 3576 TestFullRoundtrip("UTF-8"); 3577 log_verbose("Running exhaustive round trip test for CESU-8\n"); 3578 TestFullRoundtrip("CESU-8"); 3579 log_verbose("Running exhaustive round trip test for UTF-16BE\n"); 3580 TestFullRoundtrip("UTF-16BE"); 3581 log_verbose("Running exhaustive round trip test for UTF-16LE\n"); 3582 TestFullRoundtrip("UTF-16LE"); 3583 log_verbose("Running exhaustive round trip test for UTF-16\n"); 3584 TestFullRoundtrip("UTF-16"); 3585 log_verbose("Running exhaustive round trip test for UTF-32BE\n"); 3586 TestFullRoundtrip("UTF-32BE"); 3587 log_verbose("Running exhaustive round trip test for UTF-32LE\n"); 3588 TestFullRoundtrip("UTF-32LE"); 3589 log_verbose("Running exhaustive round trip test for UTF-32\n"); 3590 TestFullRoundtrip("UTF-32"); 3591 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3592 TestFullRoundtrip("UTF-7"); 3593 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3594 TestFullRoundtrip("UTF-7,version=1"); 3595 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"); 3596 TestFullRoundtrip("IMAP-mailbox-name"); 3597 /* 3598 * 3599 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of 3600 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA). 3601 * The old mappings remain as fallbacks. 3602 * This test may be reintroduced at a later time. 3603 * 3604 * 110118 - mow 3605 */ 3606 /* 3607 log_verbose("Running exhaustive round trip test for GB18030\n"); 3608 TestFullRoundtrip("GB18030"); 3609 */ 3610 } 3611 } 3612 3613 static void 3614 TestSCSU() { 3615 3616 static const uint16_t germanUTF16[]={ 3617 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 3618 }; 3619 3620 static const uint8_t germanSCSU[]={ 3621 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 3622 }; 3623 3624 static const uint16_t russianUTF16[]={ 3625 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 3626 }; 3627 3628 static const uint8_t russianSCSU[]={ 3629 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 3630 }; 3631 3632 static const uint16_t japaneseUTF16[]={ 3633 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 3634 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 3635 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 3636 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 3637 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 3638 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 3639 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 3640 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 3641 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 3642 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 3643 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 3644 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 3645 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 3646 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 3647 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 3648 }; 3649 3650 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 3651 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */ 3652 static const uint8_t japaneseSCSU[]={ 3653 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 3654 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 3655 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 3656 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 3657 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 3658 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 3659 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 3660 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 3661 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 3662 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 3663 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 3664 0xcb, 0x82 3665 }; 3666 3667 static const uint16_t allFeaturesUTF16[]={ 3668 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 3669 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 3670 0x01df, 0xf000, 0xdbff, 0xdfff 3671 }; 3672 3673 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter 3674 * result here (34B vs. 35B) 3675 */ 3676 static const uint8_t allFeaturesSCSU[]={ 3677 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, 3678 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, 3679 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, 3680 0xdf, 0x14, 0x80, 0x15, 0xff 3681 }; 3682 static const uint16_t monkeyIn[]={ 3683 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3684 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3685 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3686 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3687 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3688 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3689 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3690 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3691 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3692 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3693 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3694 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3695 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3696 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3697 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3698 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3699 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3700 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3701 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 3702 /* test non-BMP code points */ 3703 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 3704 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 3705 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 3706 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 3707 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 3708 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 3709 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 3710 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 3711 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 3712 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 3713 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 3714 3715 3716 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 3717 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 3718 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 3719 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 3720 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 3721 }; 3722 static const char *fTestCases [] = { 3723 "\\ud800\\udc00", /* smallest surrogate*/ 3724 "\\ud8ff\\udcff", 3725 "\\udBff\\udFff", /* largest surrogate pair*/ 3726 "\\ud834\\udc00", 3727 "\\U0010FFFF", 3728 "Hello \\u9292 \\u9192 World!", 3729 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", 3730 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3731 3732 "\\u0648\\u06c8", /* catch missing reset*/ 3733 "\\u0648\\u06c8", 3734 3735 "\\u4444\\uE001", /* lowest quotable*/ 3736 "\\u4444\\uf2FF", /* highest quotable*/ 3737 "\\u4444\\uf188\\u4444", 3738 "\\u4444\\uf188\\uf288", 3739 "\\u4444\\uf188abc\\u0429\\uf288", 3740 "\\u9292\\u2222", 3741 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", 3742 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3743 "Hello World!123456", 3744 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ 3745 3746 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ 3747 "abc\\u4411d", /* uses SQU*/ 3748 "abc\\u4411\\u4412d",/* uses SCU*/ 3749 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ 3750 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/ 3751 "\\u9292\\u2222", 3752 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", 3753 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c", 3754 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", 3755 3756 "", /* empty input*/ 3757 "\\u0000", /* smallest BMP character*/ 3758 "\\uFFFF", /* largest BMP character*/ 3759 3760 /* regression tests*/ 3761 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", 3762 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff", 3763 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", 3764 "\\u0041\\u00df\\u0401\\u015f", 3765 "\\u9066\\u2123abc", 3766 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5", 3767 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489", 3768 }; 3769 int i=0; 3770 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){ 3771 const char* cSrc = fTestCases[i]; 3772 UErrorCode status = U_ZERO_ERROR; 3773 int32_t cSrcLen,srcLen; 3774 UChar* src; 3775 /* UConverter* cnv = ucnv_open("SCSU",&status); */ 3776 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); 3777 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); 3778 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); 3779 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); 3780 TestConv(src,srcLen,"SCSU","Coverage",NULL,0); 3781 free(src); 3782 } 3783 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3784 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3785 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3786 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3787 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU)); 3788 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU)); 3789 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); 3790 } 3791 3792 #if !UCONFIG_NO_LEGACY_CONVERSION 3793 static void TestJitterbug2346(){ 3794 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, 3795 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; 3796 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; 3797 3798 UChar uTarget[500]={'\0'}; 3799 UChar* utarget=uTarget; 3800 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 3801 3802 char cTarget[500]={'\0'}; 3803 char* ctarget=cTarget; 3804 char* ctargetLimit=cTarget+sizeof(cTarget); 3805 const char* csource=source; 3806 UChar* temp = expected; 3807 UErrorCode err=U_ZERO_ERROR; 3808 3809 UConverter* conv =ucnv_open("ISO_2022_JP",&err); 3810 if(U_FAILURE(err)) { 3811 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 3812 return; 3813 } 3814 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err); 3815 if(U_FAILURE(err)) { 3816 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err)); 3817 return; 3818 } 3819 utargetLimit=utarget; 3820 utarget = uTarget; 3821 while(utarget<utargetLimit){ 3822 if(*temp!=*utarget){ 3823 3824 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ; 3825 } 3826 utarget++; 3827 temp++; 3828 } 3829 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 3830 if(U_FAILURE(err)) { 3831 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err)); 3832 return; 3833 } 3834 ctargetLimit=ctarget; 3835 ctarget =cTarget; 3836 ucnv_close(conv); 3837 3838 3839 } 3840 3841 static void 3842 TestISO_2022_JP_1() { 3843 /* test input */ 3844 static const uint16_t in[]={ 3845 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A, 3846 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3847 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A, 3848 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3849 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3850 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3851 0x201D, 0x000D, 0x000A, 3852 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3853 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A, 3854 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3855 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A, 3856 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A, 3857 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A 3858 }; 3859 const UChar* uSource; 3860 const UChar* uSourceLimit; 3861 const char* cSource; 3862 const char* cSourceLimit; 3863 UChar *uTargetLimit =NULL; 3864 UChar *uTarget; 3865 char *cTarget; 3866 const char *cTargetLimit; 3867 char *cBuf; 3868 UChar *uBuf,*test; 3869 int32_t uBufSize = 120; 3870 UErrorCode errorCode=U_ZERO_ERROR; 3871 UConverter *cnv; 3872 3873 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3874 if(U_FAILURE(errorCode)) { 3875 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3876 return; 3877 } 3878 3879 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3880 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3881 uSource = (const UChar*)in; 3882 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3883 cTarget = cBuf; 3884 cTargetLimit = cBuf +uBufSize*5; 3885 uTarget = uBuf; 3886 uTargetLimit = uBuf+ uBufSize*5; 3887 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode); 3888 if(U_FAILURE(errorCode)){ 3889 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3890 return; 3891 } 3892 cSource = cBuf; 3893 cSourceLimit =cTarget; 3894 test =uBuf; 3895 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode); 3896 if(U_FAILURE(errorCode)){ 3897 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3898 return; 3899 } 3900 uSource = (const UChar*)in; 3901 while(uSource<uSourceLimit){ 3902 if(*test!=*uSource){ 3903 3904 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3905 } 3906 uSource++; 3907 test++; 3908 } 3909 /*ucnv_close(cnv); 3910 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ 3911 /*Test for the condition where there is an invalid character*/ 3912 ucnv_reset(cnv); 3913 { 3914 static const uint8_t source2[]={0x0e,0x24,0x053}; 3915 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); 3916 } 3917 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3918 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3919 ucnv_close(cnv); 3920 free(uBuf); 3921 free(cBuf); 3922 } 3923 3924 static void 3925 TestISO_2022_JP_2() { 3926 /* test input */ 3927 static const uint16_t in[]={ 3928 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3929 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3930 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3931 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3932 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3933 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3934 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3935 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3936 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3937 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3938 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3939 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3940 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3941 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3942 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3943 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3944 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3945 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3946 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A 3947 }; 3948 const UChar* uSource; 3949 const UChar* uSourceLimit; 3950 const char* cSource; 3951 const char* cSourceLimit; 3952 UChar *uTargetLimit =NULL; 3953 UChar *uTarget; 3954 char *cTarget; 3955 const char *cTargetLimit; 3956 char *cBuf; 3957 UChar *uBuf,*test; 3958 int32_t uBufSize = 120; 3959 UErrorCode errorCode=U_ZERO_ERROR; 3960 UConverter *cnv; 3961 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3962 int32_t* myOff= offsets; 3963 cnv=ucnv_open("ISO_2022_JP_2", &errorCode); 3964 if(U_FAILURE(errorCode)) { 3965 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3966 return; 3967 } 3968 3969 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3970 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3971 uSource = (const UChar*)in; 3972 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3973 cTarget = cBuf; 3974 cTargetLimit = cBuf +uBufSize*5; 3975 uTarget = uBuf; 3976 uTargetLimit = uBuf+ uBufSize*5; 3977 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3978 if(U_FAILURE(errorCode)){ 3979 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3980 return; 3981 } 3982 cSource = cBuf; 3983 cSourceLimit =cTarget; 3984 test =uBuf; 3985 myOff=offsets; 3986 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3987 if(U_FAILURE(errorCode)){ 3988 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3989 return; 3990 } 3991 uSource = (const UChar*)in; 3992 while(uSource<uSourceLimit){ 3993 if(*test!=*uSource){ 3994 3995 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3996 } 3997 uSource++; 3998 test++; 3999 } 4000 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4001 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4002 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4003 /*Test for the condition where there is an invalid character*/ 4004 ucnv_reset(cnv); 4005 { 4006 static const uint8_t source2[]={0x0e,0x24,0x053}; 4007 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); 4008 } 4009 ucnv_close(cnv); 4010 free(uBuf); 4011 free(cBuf); 4012 free(offsets); 4013 } 4014 4015 static void 4016 TestISO_2022_KR() { 4017 /* test input */ 4018 static const uint16_t in[]={ 4019 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4020 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4021 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4022 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4023 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4024 ,0x53E3,0x53E4,0x000A,0x000D}; 4025 const UChar* uSource; 4026 const UChar* uSourceLimit; 4027 const char* cSource; 4028 const char* cSourceLimit; 4029 UChar *uTargetLimit =NULL; 4030 UChar *uTarget; 4031 char *cTarget; 4032 const char *cTargetLimit; 4033 char *cBuf; 4034 UChar *uBuf,*test; 4035 int32_t uBufSize = 120; 4036 UErrorCode errorCode=U_ZERO_ERROR; 4037 UConverter *cnv; 4038 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4039 int32_t* myOff= offsets; 4040 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); 4041 if(U_FAILURE(errorCode)) { 4042 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4043 return; 4044 } 4045 4046 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4047 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4048 uSource = (const UChar*)in; 4049 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4050 cTarget = cBuf; 4051 cTargetLimit = cBuf +uBufSize*5; 4052 uTarget = uBuf; 4053 uTargetLimit = uBuf+ uBufSize*5; 4054 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4055 if(U_FAILURE(errorCode)){ 4056 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4057 return; 4058 } 4059 cSource = cBuf; 4060 cSourceLimit =cTarget; 4061 test =uBuf; 4062 myOff=offsets; 4063 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4064 if(U_FAILURE(errorCode)){ 4065 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4066 return; 4067 } 4068 uSource = (const UChar*)in; 4069 while(uSource<uSourceLimit){ 4070 if(*test!=*uSource){ 4071 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4072 } 4073 uSource++; 4074 test++; 4075 } 4076 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4077 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4078 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4079 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4080 TestJitterbug930("csISO2022KR"); 4081 /*Test for the condition where there is an invalid character*/ 4082 ucnv_reset(cnv); 4083 { 4084 static const uint8_t source2[]={0x1b,0x24,0x053}; 4085 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4086 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4087 } 4088 ucnv_close(cnv); 4089 free(uBuf); 4090 free(cBuf); 4091 free(offsets); 4092 } 4093 4094 static void 4095 TestISO_2022_KR_1() { 4096 /* test input */ 4097 static const uint16_t in[]={ 4098 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4099 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4100 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4101 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4102 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4103 ,0x53E3,0x53E4,0x000A,0x000D}; 4104 const UChar* uSource; 4105 const UChar* uSourceLimit; 4106 const char* cSource; 4107 const char* cSourceLimit; 4108 UChar *uTargetLimit =NULL; 4109 UChar *uTarget; 4110 char *cTarget; 4111 const char *cTargetLimit; 4112 char *cBuf; 4113 UChar *uBuf,*test; 4114 int32_t uBufSize = 120; 4115 UErrorCode errorCode=U_ZERO_ERROR; 4116 UConverter *cnv; 4117 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4118 int32_t* myOff= offsets; 4119 cnv=ucnv_open("ibm-25546", &errorCode); 4120 if(U_FAILURE(errorCode)) { 4121 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4122 return; 4123 } 4124 4125 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4126 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4127 uSource = (const UChar*)in; 4128 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4129 cTarget = cBuf; 4130 cTargetLimit = cBuf +uBufSize*5; 4131 uTarget = uBuf; 4132 uTargetLimit = uBuf+ uBufSize*5; 4133 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4134 if(U_FAILURE(errorCode)){ 4135 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4136 return; 4137 } 4138 cSource = cBuf; 4139 cSourceLimit =cTarget; 4140 test =uBuf; 4141 myOff=offsets; 4142 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4143 if(U_FAILURE(errorCode)){ 4144 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4145 return; 4146 } 4147 uSource = (const UChar*)in; 4148 while(uSource<uSourceLimit){ 4149 if(*test!=*uSource){ 4150 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4151 } 4152 uSource++; 4153 test++; 4154 } 4155 ucnv_reset(cnv); 4156 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4157 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4158 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4159 ucnv_reset(cnv); 4160 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4161 /*Test for the condition where there is an invalid character*/ 4162 ucnv_reset(cnv); 4163 { 4164 static const uint8_t source2[]={0x1b,0x24,0x053}; 4165 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4166 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4167 } 4168 ucnv_close(cnv); 4169 free(uBuf); 4170 free(cBuf); 4171 free(offsets); 4172 } 4173 4174 static void TestJitterbug2411(){ 4175 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" 4176 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; 4177 UConverter* kr=NULL, *kr1=NULL; 4178 UErrorCode errorCode = U_ZERO_ERROR; 4179 UChar tgt[100]={'\0'}; 4180 UChar* target = tgt; 4181 UChar* targetLimit = target+100; 4182 kr=ucnv_open("iso-2022-kr", &errorCode); 4183 if(U_FAILURE(errorCode)) { 4184 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode)); 4185 return; 4186 } 4187 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4188 if(U_FAILURE(errorCode)) { 4189 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4190 return; 4191 } 4192 kr1 = ucnv_open("ibm-25546", &errorCode); 4193 if(U_FAILURE(errorCode)) { 4194 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode)); 4195 return; 4196 } 4197 target = tgt; 4198 targetLimit = target+100; 4199 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4200 4201 if(U_FAILURE(errorCode)) { 4202 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4203 return; 4204 } 4205 4206 ucnv_close(kr); 4207 ucnv_close(kr1); 4208 4209 } 4210 4211 static void 4212 TestJIS(){ 4213 /* From Unicode moved to testdata/conversion.txt */ 4214 /*To Unicode*/ 4215 { 4216 static const uint8_t sampleTextJIS[] = { 4217 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ 4218 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4219 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4220 }; 4221 static const uint16_t expectedISO2022JIS[] = { 4222 0x0041, 0x0042, 4223 0xFF81, 0xFF82, 4224 0x3000 4225 }; 4226 static const int32_t toISO2022JISOffs[]={ 4227 3,4, 4228 8,9, 4229 16 4230 }; 4231 4232 static const uint8_t sampleTextJIS7[] = { 4233 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ 4234 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4235 0x1b,0x24,0x42,0x21,0x21, 4236 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ 4237 0x21,0x22, 4238 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4239 }; 4240 static const uint16_t expectedISO2022JIS7[] = { 4241 0x0041, 0x0042, 4242 0xFF81, 0xFF82, 4243 0x3000, 4244 0xFF81, 0xFF82, 4245 0x3001, 4246 0x3000 4247 }; 4248 static const int32_t toISO2022JIS7Offs[]={ 4249 3,4, 4250 8,9, 4251 13,16, 4252 17, 4253 19,27 4254 }; 4255 static const uint8_t sampleTextJIS8[] = { 4256 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ 4257 0xa1,0xc8,0xd9,/*Katakana Set*/ 4258 0x1b,0x28,0x42, 4259 0x41,0x42, 4260 0xb1,0xc3, /*Katakana Set*/ 4261 0x1b,0x24,0x42,0x21,0x21 4262 }; 4263 static const uint16_t expectedISO2022JIS8[] = { 4264 0x0041, 0x0042, 4265 0xff61, 0xff88, 0xff99, 4266 0x0041, 0x0042, 4267 0xff71, 0xff83, 4268 0x3000 4269 }; 4270 static const int32_t toISO2022JIS8Offs[]={ 4271 3, 4, 5, 6, 4272 7, 11, 12, 13, 4273 14, 18, 4274 }; 4275 4276 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, 4277 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE); 4278 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, 4279 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE); 4280 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, 4281 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE); 4282 } 4283 4284 } 4285 4286 4287 #if 0 4288 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 4289 4290 static void TestJitterbug915(){ 4291 /* tests for roundtripping of the below sequence 4292 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / 4293 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / 4294 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / 4295 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / 4296 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / 4297 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / 4298 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / 4299 */ 4300 static const char cSource[]={ 4301 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 4302 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 4303 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 4304 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 4305 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 4306 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 4307 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, 4308 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 4309 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 4310 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 4311 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 4312 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 4313 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 4314 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 4315 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4316 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4317 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 4318 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 4319 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4320 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4321 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 4322 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 4323 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 4324 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 4325 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 4326 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 4327 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 4328 0x37, 0x20, 0x2A, 0x2F 4329 }; 4330 UChar uTarget[500]={'\0'}; 4331 UChar* utarget=uTarget; 4332 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 4333 4334 char cTarget[500]={'\0'}; 4335 char* ctarget=cTarget; 4336 char* ctargetLimit=cTarget+sizeof(cTarget); 4337 const char* csource=cSource; 4338 const char* tempSrc = cSource; 4339 UErrorCode err=U_ZERO_ERROR; 4340 4341 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); 4342 if(U_FAILURE(err)) { 4343 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 4344 return; 4345 } 4346 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err); 4347 if(U_FAILURE(err)) { 4348 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err)); 4349 return; 4350 } 4351 utargetLimit=utarget; 4352 utarget = uTarget; 4353 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 4354 if(U_FAILURE(err)) { 4355 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err)); 4356 return; 4357 } 4358 ctargetLimit=ctarget; 4359 ctarget =cTarget; 4360 while(ctarget<ctargetLimit){ 4361 if(*ctarget != *tempSrc){ 4362 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ; 4363 } 4364 ++ctarget; 4365 ++tempSrc; 4366 } 4367 4368 ucnv_close(conv); 4369 } 4370 4371 static void 4372 TestISO_2022_CN_EXT() { 4373 /* test input */ 4374 static const uint16_t in[]={ 4375 /* test Non-BMP code points */ 4376 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 4377 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 4378 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 4379 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 4380 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 4381 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 4382 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 4383 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 4384 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 4385 0xD869, 0xDED5, 4386 4387 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 4388 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 4389 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 4390 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4391 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4392 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4393 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4394 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4395 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4396 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4397 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4398 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4399 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4400 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A, 4401 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 4402 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A, 4403 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A, 4404 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A, 4405 4406 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A 4407 4408 }; 4409 4410 const UChar* uSource; 4411 const UChar* uSourceLimit; 4412 const char* cSource; 4413 const char* cSourceLimit; 4414 UChar *uTargetLimit =NULL; 4415 UChar *uTarget; 4416 char *cTarget; 4417 const char *cTargetLimit; 4418 char *cBuf; 4419 UChar *uBuf,*test; 4420 int32_t uBufSize = 180; 4421 UErrorCode errorCode=U_ZERO_ERROR; 4422 UConverter *cnv; 4423 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4424 int32_t* myOff= offsets; 4425 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); 4426 if(U_FAILURE(errorCode)) { 4427 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4428 return; 4429 } 4430 4431 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4432 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4433 uSource = (const UChar*)in; 4434 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4435 cTarget = cBuf; 4436 cTargetLimit = cBuf +uBufSize*5; 4437 uTarget = uBuf; 4438 uTargetLimit = uBuf+ uBufSize*5; 4439 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4440 if(U_FAILURE(errorCode)){ 4441 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4442 return; 4443 } 4444 cSource = cBuf; 4445 cSourceLimit =cTarget; 4446 test =uBuf; 4447 myOff=offsets; 4448 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4449 if(U_FAILURE(errorCode)){ 4450 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4451 return; 4452 } 4453 uSource = (const UChar*)in; 4454 while(uSource<uSourceLimit){ 4455 if(*test!=*uSource){ 4456 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4457 } 4458 else{ 4459 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4460 } 4461 uSource++; 4462 test++; 4463 } 4464 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4465 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4466 /*Test for the condition where there is an invalid character*/ 4467 ucnv_reset(cnv); 4468 { 4469 static const uint8_t source2[]={0x0e,0x24,0x053}; 4470 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); 4471 } 4472 ucnv_close(cnv); 4473 free(uBuf); 4474 free(cBuf); 4475 free(offsets); 4476 } 4477 #endif 4478 4479 static void 4480 TestISO_2022_CN() { 4481 /* test input */ 4482 static const uint16_t in[]={ 4483 /* jitterbug 951 */ 4484 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52, 4485 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 4486 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52, 4487 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45, 4488 0x0020, 0x0045, 0x004e, 0x0044, 4489 /**/ 4490 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A, 4491 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A, 4492 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A, 4493 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4494 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4495 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4496 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4497 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4498 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4499 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4500 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4501 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4502 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A, 4503 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A, 4504 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4505 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 4506 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A, 4507 4508 }; 4509 const UChar* uSource; 4510 const UChar* uSourceLimit; 4511 const char* cSource; 4512 const char* cSourceLimit; 4513 UChar *uTargetLimit =NULL; 4514 UChar *uTarget; 4515 char *cTarget; 4516 const char *cTargetLimit; 4517 char *cBuf; 4518 UChar *uBuf,*test; 4519 int32_t uBufSize = 180; 4520 UErrorCode errorCode=U_ZERO_ERROR; 4521 UConverter *cnv; 4522 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4523 int32_t* myOff= offsets; 4524 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); 4525 if(U_FAILURE(errorCode)) { 4526 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4527 return; 4528 } 4529 4530 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4531 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4532 uSource = (const UChar*)in; 4533 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4534 cTarget = cBuf; 4535 cTargetLimit = cBuf +uBufSize*5; 4536 uTarget = uBuf; 4537 uTargetLimit = uBuf+ uBufSize*5; 4538 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4539 if(U_FAILURE(errorCode)){ 4540 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4541 return; 4542 } 4543 cSource = cBuf; 4544 cSourceLimit =cTarget; 4545 test =uBuf; 4546 myOff=offsets; 4547 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4548 if(U_FAILURE(errorCode)){ 4549 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4550 return; 4551 } 4552 uSource = (const UChar*)in; 4553 while(uSource<uSourceLimit){ 4554 if(*test!=*uSource){ 4555 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4556 } 4557 else{ 4558 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4559 } 4560 uSource++; 4561 test++; 4562 } 4563 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); 4564 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4565 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4566 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4567 TestJitterbug930("csISO2022CN"); 4568 /*Test for the condition where there is an invalid character*/ 4569 ucnv_reset(cnv); 4570 { 4571 static const uint8_t source2[]={0x0e,0x24,0x053}; 4572 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); 4573 } 4574 4575 ucnv_close(cnv); 4576 free(uBuf); 4577 free(cBuf); 4578 free(offsets); 4579 } 4580 4581 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */ 4582 typedef struct { 4583 const char * converterName; 4584 const char * inputText; 4585 int inputTextLength; 4586 } EmptySegmentTest; 4587 4588 /* Callback for TestJitterbug6175, should only get called for empty segment errors */ 4589 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, 4590 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) { 4591 if (reason > UCNV_IRREGULAR) { 4592 return; 4593 } 4594 if (reason != UCNV_IRREGULAR) { 4595 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); 4596 } 4597 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ 4598 *err = U_ZERO_ERROR; 4599 ucnv_cbToUWriteSub(toArgs,0,err); 4600 } 4601 4602 enum { kEmptySegmentToUCharsMax = 64 }; 4603 static void TestJitterbug6175(void) { 4604 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; 4605 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; 4606 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; 4607 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; 4608 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; 4609 static const EmptySegmentTest emptySegmentTests[] = { 4610 /* converterName inputText inputTextLength */ 4611 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, 4612 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, 4613 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, 4614 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, 4615 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, 4616 /* terminator: */ 4617 { NULL, NULL, 0, } 4618 }; 4619 const EmptySegmentTest * testPtr; 4620 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { 4621 UErrorCode err = U_ZERO_ERROR; 4622 UConverter * cnv = ucnv_open(testPtr->converterName, &err); 4623 if (U_FAILURE(err)) { 4624 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4625 return; 4626 } 4627 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); 4628 if (U_FAILURE(err)) { 4629 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4630 ucnv_close(cnv); 4631 return; 4632 } 4633 { 4634 UChar toUChars[kEmptySegmentToUCharsMax]; 4635 UChar * toUCharsPtr = toUChars; 4636 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; 4637 const char * inCharsPtr = testPtr->inputText; 4638 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; 4639 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); 4640 } 4641 ucnv_close(cnv); 4642 } 4643 } 4644 4645 static void 4646 TestEBCDIC_STATEFUL() { 4647 /* test input */ 4648 static const uint8_t in[]={ 4649 0x61, 4650 0x1a, 4651 0x0f, 0x4b, 4652 0x42, 4653 0x40, 4654 0x36, 4655 }; 4656 4657 /* expected test results */ 4658 static const int32_t results[]={ 4659 /* number of bytes read, code point */ 4660 1, 0x002f, 4661 1, 0x0092, 4662 2, 0x002e, 4663 1, 0xff62, 4664 1, 0x0020, 4665 1, 0x0096, 4666 4667 }; 4668 static const uint8_t in2[]={ 4669 0x0f, 4670 0xa1, 4671 0x01 4672 }; 4673 4674 /* expected test results */ 4675 static const int32_t results2[]={ 4676 /* number of bytes read, code point */ 4677 2, 0x203E, 4678 1, 0x0001, 4679 }; 4680 4681 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 4682 UErrorCode errorCode=U_ZERO_ERROR; 4683 UConverter *cnv=ucnv_open("ibm-930", &errorCode); 4684 if(U_FAILURE(errorCode)) { 4685 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode)); 4686 return; 4687 } 4688 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); 4689 ucnv_reset(cnv); 4690 /* Test the condition when source >= sourceLimit */ 4691 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 4692 ucnv_reset(cnv); 4693 /*Test for the condition where source > sourcelimit after consuming the shift chracter */ 4694 { 4695 static const uint8_t source1[]={0x0f}; 4696 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); 4697 } 4698 /*Test for the condition where there is an invalid character*/ 4699 ucnv_reset(cnv); 4700 { 4701 static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; 4702 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); 4703 } 4704 ucnv_reset(cnv); 4705 source=(const char*)in2; 4706 limit=(const char*)in2+sizeof(in2); 4707 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); 4708 ucnv_close(cnv); 4709 4710 } 4711 4712 static void 4713 TestGB18030() { 4714 /* test input */ 4715 static const uint8_t in[]={ 4716 0x24, 4717 0x7f, 4718 0x81, 0x30, 0x81, 0x30, 4719 0xa8, 0xbf, 4720 0xa2, 0xe3, 4721 0xd2, 0xbb, 4722 0x82, 0x35, 0x8f, 0x33, 4723 0x84, 0x31, 0xa4, 0x39, 4724 0x90, 0x30, 0x81, 0x30, 4725 0xe3, 0x32, 0x9a, 0x35 4726 #if 0 4727 /* 4728 * Feature removed markus 2000-oct-26 4729 * Only some codepages must match surrogate pairs into supplementary code points - 4730 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c . 4731 * GB 18030 provides direct encodings for supplementary code points, therefore 4732 * it must not combine two single-encoded surrogates into one code point. 4733 */ 4734 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */ 4735 #endif 4736 }; 4737 4738 /* expected test results */ 4739 static const int32_t results[]={ 4740 /* number of bytes read, code point */ 4741 1, 0x24, 4742 1, 0x7f, 4743 4, 0x80, 4744 2, 0x1f9, 4745 2, 0x20ac, 4746 2, 0x4e00, 4747 4, 0x9fa6, 4748 4, 0xffff, 4749 4, 0x10000, 4750 4, 0x10ffff 4751 #if 0 4752 /* Feature removed. See comment above. */ 4753 8, 0x10000 4754 #endif 4755 }; 4756 4757 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ 4758 UErrorCode errorCode=U_ZERO_ERROR; 4759 UConverter *cnv=ucnv_open("gb18030", &errorCode); 4760 if(U_FAILURE(errorCode)) { 4761 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode)); 4762 return; 4763 } 4764 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030"); 4765 ucnv_close(cnv); 4766 } 4767 4768 static void 4769 TestLMBCS() { 4770 /* LMBCS-1 string */ 4771 static const uint8_t pszLMBCS[]={ 4772 0x61, 4773 0x01, 0x29, 4774 0x81, 4775 0xA0, 4776 0x0F, 0x27, 4777 0x0F, 0x91, 4778 0x14, 0x0a, 0x74, 4779 0x14, 0xF6, 0x02, 4780 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ 4781 0x10, 0x88, 0xA0, 4782 }; 4783 4784 /* Unicode UChar32 equivalents */ 4785 static const UChar32 pszUnicode32[]={ 4786 /* code point */ 4787 0x00000061, 4788 0x00002013, 4789 0x000000FC, 4790 0x000000E1, 4791 0x00000007, 4792 0x00000091, 4793 0x00000a74, 4794 0x00000200, 4795 0x00023456, /* code point for surrogate pair */ 4796 0x00005516 4797 }; 4798 4799 /* Unicode UChar equivalents */ 4800 static const UChar pszUnicode[]={ 4801 /* code point */ 4802 0x0061, 4803 0x2013, 4804 0x00FC, 4805 0x00E1, 4806 0x0007, 4807 0x0091, 4808 0x0a74, 4809 0x0200, 4810 0xD84D, /* low surrogate */ 4811 0xDC56, /* high surrogate */ 4812 0x5516 4813 }; 4814 4815 /* expected test results */ 4816 static const int offsets32[]={ 4817 /* number of bytes read, code point */ 4818 0, 4819 1, 4820 3, 4821 4, 4822 5, 4823 7, 4824 9, 4825 12, 4826 15, 4827 21, 4828 24 4829 }; 4830 4831 /* expected test results */ 4832 static const int offsets[]={ 4833 /* number of bytes read, code point */ 4834 0, 4835 1, 4836 3, 4837 4, 4838 5, 4839 7, 4840 9, 4841 12, 4842 15, 4843 18, 4844 21, 4845 24 4846 }; 4847 4848 4849 UConverter *cnv; 4850 4851 #define NAME_LMBCS_1 "LMBCS-1" 4852 #define NAME_LMBCS_2 "LMBCS-2" 4853 4854 4855 /* Some basic open/close/property tests on some LMBCS converters */ 4856 { 4857 4858 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ 4859 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ 4860 char get_subchars [1]; 4861 const char * get_name; 4862 UConverter *cnv1; 4863 UConverter *cnv2; 4864 4865 int8_t len = sizeof(get_subchars); 4866 4867 UErrorCode errorCode=U_ZERO_ERROR; 4868 4869 /* Open */ 4870 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); 4871 if(U_FAILURE(errorCode)) { 4872 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4873 return; 4874 } 4875 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); 4876 if(U_FAILURE(errorCode)) { 4877 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode)); 4878 return; 4879 } 4880 4881 /* Name */ 4882 get_name = ucnv_getName (cnv1, &errorCode); 4883 if (strcmp(NAME_LMBCS_1,get_name)){ 4884 log_err("Unexpected converter name: %s\n", get_name); 4885 } 4886 get_name = ucnv_getName (cnv2, &errorCode); 4887 if (strcmp(NAME_LMBCS_2,get_name)){ 4888 log_err("Unexpected converter name: %s\n", get_name); 4889 } 4890 4891 /* substitution chars */ 4892 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); 4893 if(U_FAILURE(errorCode)) { 4894 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4895 } 4896 if (len!=1){ 4897 log_err("Unexpected length of sub chars\n"); 4898 } 4899 if (get_subchars[0] != expected_subchars[0]){ 4900 log_err("Unexpected value of sub chars\n"); 4901 } 4902 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); 4903 if(U_FAILURE(errorCode)) { 4904 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); 4905 } 4906 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); 4907 if(U_FAILURE(errorCode)) { 4908 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4909 } 4910 if (len!=1){ 4911 log_err("Unexpected length of sub chars\n"); 4912 } 4913 if (get_subchars[0] != new_subchars[0]){ 4914 log_err("Unexpected value of sub chars\n"); 4915 } 4916 ucnv_close(cnv1); 4917 ucnv_close(cnv2); 4918 4919 } 4920 4921 /* LMBCS to Unicode - offsets */ 4922 { 4923 UErrorCode errorCode=U_ZERO_ERROR; 4924 4925 const char * pSource = (const char *)pszLMBCS; 4926 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 4927 4928 UChar Out [sizeof(pszUnicode) + 1]; 4929 UChar * pOut = Out; 4930 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 4931 4932 int32_t off [sizeof(offsets)]; 4933 4934 /* last 'offset' in expected results is just the final size. 4935 (Makes other tests easier). Compensate here: */ 4936 4937 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); 4938 4939 4940 4941 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ 4942 if(U_FAILURE(errorCode)) { 4943 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode)); 4944 return; 4945 } 4946 4947 4948 4949 ucnv_toUnicode (cnv, 4950 &pOut, 4951 OutLimit, 4952 &pSource, 4953 sourceLimit, 4954 off, 4955 TRUE, 4956 &errorCode); 4957 4958 4959 if (memcmp(off,offsets,sizeof(offsets))) 4960 { 4961 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n"); 4962 } 4963 if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) 4964 { 4965 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n"); 4966 } 4967 ucnv_close(cnv); 4968 } 4969 { 4970 /* LMBCS to Unicode - getNextUChar */ 4971 const char * sourceStart; 4972 const char *source=(const char *)pszLMBCS; 4973 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); 4974 const UChar32 *results= pszUnicode32; 4975 const int *off = offsets32; 4976 4977 UErrorCode errorCode=U_ZERO_ERROR; 4978 UChar32 uniChar; 4979 4980 cnv=ucnv_open("LMBCS-1", &errorCode); 4981 if(U_FAILURE(errorCode)) { 4982 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4983 return; 4984 } 4985 else 4986 { 4987 4988 while(source<limit) { 4989 sourceStart=source; 4990 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode); 4991 if(U_FAILURE(errorCode)) { 4992 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode)); 4993 break; 4994 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) { 4995 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 4996 uniChar, (source-sourceStart), *results, *off); 4997 break; 4998 } 4999 results++; 5000 off++; 5001 } 5002 } 5003 ucnv_close(cnv); 5004 } 5005 { /* test locale & optimization group operations: Unicode to LMBCS */ 5006 5007 UErrorCode errorCode=U_ZERO_ERROR; 5008 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); 5009 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); 5010 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); 5011 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ 5012 const UChar * pUniOut = uniString; 5013 UChar * pUniIn = uniString; 5014 uint8_t lmbcsString [4]; 5015 const char * pLMBCSOut = (const char *)lmbcsString; 5016 char * pLMBCSIn = (char *)lmbcsString; 5017 5018 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */ 5019 ucnv_fromUnicode (cnv16he, 5020 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5021 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5022 NULL, 1, &errorCode); 5023 5024 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) 5025 { 5026 log_err("LMBCS-16,locale=he gives unexpected translation\n"); 5027 } 5028 5029 pLMBCSIn= (char *)lmbcsString; 5030 pUniOut = uniString; 5031 ucnv_fromUnicode (cnv01us, 5032 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5033 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5034 NULL, 1, &errorCode); 5035 5036 if (lmbcsString[0] != 0x9F) 5037 { 5038 log_err("LMBCS-1,locale=US gives unexpected translation\n"); 5039 } 5040 5041 /* single byte char from mbcs char set */ 5042 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ 5043 pLMBCSOut = (const char *)lmbcsString; 5044 pUniIn = uniString; 5045 ucnv_toUnicode (cnv16jp, 5046 &pUniIn, pUniIn + 1, 5047 &pLMBCSOut, (pLMBCSOut + 1), 5048 NULL, 1, &errorCode); 5049 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5050 { 5051 log_err("Unexpected results from LMBCS-16 single byte char\n"); 5052 } 5053 /* convert to group 1: should be 3 bytes */ 5054 pLMBCSIn = (char *)lmbcsString; 5055 pUniOut = uniString; 5056 ucnv_fromUnicode (cnv01us, 5057 &pLMBCSIn, (const char *)(pLMBCSIn + 3), 5058 &pUniOut, pUniOut + 1, 5059 NULL, 1, &errorCode); 5060 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1 5061 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE) 5062 { 5063 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); 5064 } 5065 pLMBCSOut = (const char *)lmbcsString; 5066 pUniIn = uniString; 5067 ucnv_toUnicode (cnv01us, 5068 &pUniIn, pUniIn + 1, 5069 &pLMBCSOut, (const char *)(pLMBCSOut + 3), 5070 NULL, 1, &errorCode); 5071 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5072 { 5073 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); 5074 } 5075 pLMBCSIn = (char *)lmbcsString; 5076 pUniOut = uniString; 5077 ucnv_fromUnicode (cnv16jp, 5078 &pLMBCSIn, (const char *)(pLMBCSIn + 1), 5079 &pUniOut, pUniOut + 1, 5080 NULL, 1, &errorCode); 5081 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE) 5082 { 5083 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); 5084 } 5085 ucnv_close(cnv16he); 5086 ucnv_close(cnv16jp); 5087 ucnv_close(cnv01us); 5088 } 5089 { 5090 /* Small source buffer testing, LMBCS -> Unicode */ 5091 5092 UErrorCode errorCode=U_ZERO_ERROR; 5093 5094 const char * pSource = (const char *)pszLMBCS; 5095 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 5096 int codepointCount = 0; 5097 5098 UChar Out [sizeof(pszUnicode) + 1]; 5099 UChar * pOut = Out; 5100 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 5101 5102 5103 cnv = ucnv_open(NAME_LMBCS_1, &errorCode); 5104 if(U_FAILURE(errorCode)) { 5105 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 5106 return; 5107 } 5108 5109 5110 while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) 5111 { 5112 ucnv_toUnicode (cnv, 5113 &pOut, 5114 OutLimit, 5115 &pSource, 5116 (pSource+1), /* claim that this is a 1- byte buffer */ 5117 NULL, 5118 FALSE, /* FALSE means there might be more chars in the next buffer */ 5119 &errorCode); 5120 5121 if (U_SUCCESS (errorCode)) 5122 { 5123 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1]) 5124 { 5125 /* we are on to the next code point: check value */ 5126 5127 if (Out[0] != pszUnicode[codepointCount]){ 5128 log_err("LMBCS->Uni result %lx should have been %lx \n", 5129 Out[0], pszUnicode[codepointCount]); 5130 } 5131 5132 pOut = Out; /* reset for accumulating next code point */ 5133 codepointCount++; 5134 } 5135 } 5136 else 5137 { 5138 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode)); 5139 } 5140 } 5141 { 5142 /* limits & surrogate error testing */ 5143 char LIn [sizeof(pszLMBCS)]; 5144 const char * pLIn = LIn; 5145 5146 char LOut [sizeof(pszLMBCS)]; 5147 char * pLOut = LOut; 5148 5149 UChar UOut [sizeof(pszUnicode)]; 5150 UChar * pUOut = UOut; 5151 5152 UChar UIn [sizeof(pszUnicode)]; 5153 const UChar * pUIn = UIn; 5154 5155 int32_t off [sizeof(offsets)]; 5156 UChar32 uniChar; 5157 5158 errorCode=U_ZERO_ERROR; 5159 5160 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ 5161 pUIn++; 5162 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode); 5163 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5164 { 5165 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); 5166 } 5167 pUIn--; 5168 5169 errorCode=U_ZERO_ERROR; 5170 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); 5171 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5172 { 5173 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode)); 5174 } 5175 errorCode=U_ZERO_ERROR; 5176 5177 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode); 5178 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5179 { 5180 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode)); 5181 } 5182 errorCode=U_ZERO_ERROR; 5183 5184 /* 0 byte source request - no error, no pointer movement */ 5185 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode); 5186 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); 5187 if(U_FAILURE(errorCode)) { 5188 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode)); 5189 } 5190 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn)) 5191 { 5192 log_err("Unexpected pointer move in 0 byte source request \n"); 5193 } 5194 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ 5195 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); 5196 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) 5197 { 5198 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); 5199 } 5200 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ 5201 { 5202 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n"); 5203 } 5204 errorCode = U_ZERO_ERROR; 5205 5206 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ 5207 5208 pUIn = pszUnicode; 5209 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode); 5210 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) 5211 { 5212 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); 5213 } 5214 5215 errorCode = U_ZERO_ERROR; 5216 5217 pLIn = (const char *)pszLMBCS; 5218 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); 5219 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4]) 5220 { 5221 log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); 5222 } 5223 5224 /* unpaired or chopped LMBCS surrogates */ 5225 5226 /* OK high surrogate, Low surrogate is chopped */ 5227 LIn [0] = (char)0x14; 5228 LIn [1] = (char)0xD8; 5229 LIn [2] = (char)0x01; 5230 LIn [3] = (char)0x14; 5231 LIn [4] = (char)0xDC; 5232 pLIn = LIn; 5233 errorCode = U_ZERO_ERROR; 5234 pUOut = UOut; 5235 5236 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 5237 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5238 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5239 { 5240 log_err("Unexpected results on chopped low surrogate\n"); 5241 } 5242 5243 /* chopped at surrogate boundary */ 5244 LIn [0] = (char)0x14; 5245 LIn [1] = (char)0xD8; 5246 LIn [2] = (char)0x01; 5247 pLIn = LIn; 5248 errorCode = U_ZERO_ERROR; 5249 pUOut = UOut; 5250 5251 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); 5252 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) 5253 { 5254 log_err("Unexpected results on chopped at surrogate boundary \n"); 5255 } 5256 5257 /* unpaired surrogate plus valid Unichar */ 5258 LIn [0] = (char)0x14; 5259 LIn [1] = (char)0xD8; 5260 LIn [2] = (char)0x01; 5261 LIn [3] = (char)0x14; 5262 LIn [4] = (char)0xC9; 5263 LIn [5] = (char)0xD0; 5264 pLIn = LIn; 5265 errorCode = U_ZERO_ERROR; 5266 pUOut = UOut; 5267 5268 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); 5269 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) 5270 { 5271 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); 5272 } 5273 5274 /* unpaired surrogate plus chopped Unichar */ 5275 LIn [0] = (char)0x14; 5276 LIn [1] = (char)0xD8; 5277 LIn [2] = (char)0x01; 5278 LIn [3] = (char)0x14; 5279 LIn [4] = (char)0xC9; 5280 5281 pLIn = LIn; 5282 errorCode = U_ZERO_ERROR; 5283 pUOut = UOut; 5284 5285 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5286 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5287 { 5288 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); 5289 } 5290 5291 /* unpaired surrogate plus valid non-Unichar */ 5292 LIn [0] = (char)0x14; 5293 LIn [1] = (char)0xD8; 5294 LIn [2] = (char)0x01; 5295 LIn [3] = (char)0x0F; 5296 LIn [4] = (char)0x3B; 5297 5298 pLIn = LIn; 5299 errorCode = U_ZERO_ERROR; 5300 pUOut = UOut; 5301 5302 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5303 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) 5304 { 5305 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); 5306 } 5307 5308 /* unpaired surrogate plus chopped non-Unichar */ 5309 LIn [0] = (char)0x14; 5310 LIn [1] = (char)0xD8; 5311 LIn [2] = (char)0x01; 5312 LIn [3] = (char)0x0F; 5313 5314 pLIn = LIn; 5315 errorCode = U_ZERO_ERROR; 5316 pUOut = UOut; 5317 5318 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); 5319 5320 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) 5321 { 5322 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n"); 5323 } 5324 } 5325 } 5326 ucnv_close(cnv); /* final cleanup */ 5327 } 5328 5329 5330 static void TestJitterbug255() 5331 { 5332 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; 5333 const char *testBuffer = (const char *)testBytes; 5334 const char *testEnd = (const char *)testBytes + sizeof(testBytes); 5335 UErrorCode status = U_ZERO_ERROR; 5336 /*UChar32 result;*/ 5337 UConverter *cnv = 0; 5338 5339 cnv = ucnv_open("shift-jis", &status); 5340 if (U_FAILURE(status) || cnv == 0) { 5341 log_data_err("Failed to open the converter for SJIS.\n"); 5342 return; 5343 } 5344 while (testBuffer != testEnd) 5345 { 5346 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); 5347 if (U_FAILURE(status)) 5348 { 5349 log_err("Failed to convert the next UChar for SJIS.\n"); 5350 break; 5351 } 5352 } 5353 ucnv_close(cnv); 5354 } 5355 5356 static void TestEBCDICUS4XML() 5357 { 5358 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; 5359 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; 5360 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; 5361 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; 5362 char target_x[] = {0x00, 0x00, 0x00, 0x00}; 5363 UChar *unicodes = unicodes_x; 5364 const UChar *toUnicodeMaps = toUnicodeMaps_x; 5365 char *target = target_x; 5366 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; 5367 UErrorCode status = U_ZERO_ERROR; 5368 UConverter *cnv = 0; 5369 5370 cnv = ucnv_open("ebcdic-xml-us", &status); 5371 if (U_FAILURE(status) || cnv == 0) { 5372 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); 5373 return; 5374 } 5375 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status); 5376 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) { 5377 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", 5378 u_errorName(status)); 5379 printUSeqErr(unicodes_x, 3); 5380 printUSeqErr(toUnicodeMaps, 3); 5381 } 5382 status = U_ZERO_ERROR; 5383 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status); 5384 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) { 5385 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", 5386 u_errorName(status)); 5387 printSeqErr((const unsigned char*)target_x, 3); 5388 printSeqErr((const unsigned char*)fromUnicodeMaps, 3); 5389 } 5390 ucnv_close(cnv); 5391 } 5392 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ 5393 5394 #if !UCONFIG_NO_COLLATION 5395 5396 static void TestJitterbug981(){ 5397 const UChar* rules; 5398 int32_t rules_length, target_cap, bytes_needed, buff_size; 5399 UErrorCode status = U_ZERO_ERROR; 5400 UConverter *utf8cnv; 5401 UCollator* myCollator; 5402 char *buff; 5403 int numNeeded=0; 5404 utf8cnv = ucnv_open ("utf8", &status); 5405 if(U_FAILURE(status)){ 5406 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status)); 5407 return; 5408 } 5409 myCollator = ucol_open("zh", &status); 5410 if(U_FAILURE(status)){ 5411 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status)); 5412 ucnv_close(utf8cnv); 5413 return; 5414 } 5415 5416 rules = ucol_getRules(myCollator, &rules_length); 5417 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); 5418 buff = malloc(buff_size); 5419 5420 target_cap = 0; 5421 do { 5422 ucnv_reset(utf8cnv); 5423 status = U_ZERO_ERROR; 5424 if(target_cap >= buff_size) { 5425 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size); 5426 break; 5427 } 5428 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, 5429 rules, rules_length, &status); 5430 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5431 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5432 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5433 break; 5434 } 5435 numNeeded = bytes_needed; 5436 } while (status == U_BUFFER_OVERFLOW_ERROR); 5437 ucol_close(myCollator); 5438 ucnv_close(utf8cnv); 5439 free(buff); 5440 } 5441 5442 #endif 5443 5444 static void TestJitterbug1293(){ 5445 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; 5446 char target[256]; 5447 UErrorCode status = U_ZERO_ERROR; 5448 UConverter* conv=NULL; 5449 int32_t target_cap, bytes_needed, numNeeded = 0; 5450 conv = ucnv_open("shift-jis",&status); 5451 if(U_FAILURE(status)){ 5452 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status)); 5453 return; 5454 } 5455 5456 do{ 5457 target_cap =0; 5458 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status); 5459 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5460 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5461 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5462 } 5463 numNeeded = bytes_needed; 5464 } while (status == U_BUFFER_OVERFLOW_ERROR); 5465 if(U_FAILURE(status)){ 5466 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status)); 5467 return; 5468 } 5469 ucnv_close(conv); 5470 } 5471 static void TestJB5275_1(){ 5472 5473 static const char* data = "\x3B\xB3\x0A" /* Easy characters */ 5474 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5475 /* Switch script: */ 5476 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */ 5477 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/ 5478 "\xEF\x40\x3B\xB3\x0A"; 5479 static const UChar expected[] ={ 5480 0x003b, 0x0a15, 0x000a, /* Easy characters */ 5481 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */ 5482 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/ 5483 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/ 5484 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ 5485 }; 5486 5487 UErrorCode status = U_ZERO_ERROR; 5488 UConverter* conv = ucnv_open("iscii-gur", &status); 5489 UChar dest[100] = {'\0'}; 5490 UChar* target = dest; 5491 UChar* targetLimit = dest+100; 5492 const char* source = data; 5493 const char* sourceLimit = data+strlen(data); 5494 const UChar* exp = expected; 5495 5496 if (U_FAILURE(status)) { 5497 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status)); 5498 return; 5499 } 5500 5501 log_verbose("Testing switching back to default script when new line is encountered.\n"); 5502 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5503 if(U_FAILURE(status)){ 5504 log_err("conversion failed: %s \n", u_errorName(status)); 5505 } 5506 targetLimit = target; 5507 target = dest; 5508 printUSeq(target, targetLimit-target); 5509 while(target<targetLimit){ 5510 if(*exp!=*target){ 5511 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5512 } 5513 target++; 5514 exp++; 5515 } 5516 ucnv_close(conv); 5517 } 5518 5519 static void TestJB5275(){ 5520 static const char* data = 5521 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */ 5522 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */ 5523 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */ 5524 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5525 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ 5526 "\xEF\x48\x38\xB3\x0A" /* Kannada test */ 5527 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ 5528 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ 5529 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ 5530 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; 5531 static const UChar expected[] ={ 5532 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */ 5533 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */ 5534 0x0038, 0x0C95, 0x000A, /* Kannada test */ 5535 0x0039, 0x0D15, 0x000A, /* Malayalam test */ 5536 0x003A, 0x0A95, 0x000A, /* Gujarati test */ 5537 0x003B, 0x0A15, 0x000A, /* Punjabi test */ 5538 }; 5539 5540 UErrorCode status = U_ZERO_ERROR; 5541 UConverter* conv = ucnv_open("iscii", &status); 5542 UChar dest[100] = {'\0'}; 5543 UChar* target = dest; 5544 UChar* targetLimit = dest+100; 5545 const char* source = data; 5546 const char* sourceLimit = data+strlen(data); 5547 const UChar* exp = expected; 5548 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5549 if(U_FAILURE(status)){ 5550 log_err("conversion failed: %s \n", u_errorName(status)); 5551 } 5552 targetLimit = target; 5553 target = dest; 5554 5555 printUSeq(target, targetLimit-target); 5556 5557 while(target<targetLimit){ 5558 if(*exp!=*target){ 5559 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5560 } 5561 target++; 5562 exp++; 5563 } 5564 ucnv_close(conv); 5565 } 5566 5567 static void 5568 TestIsFixedWidth() { 5569 UErrorCode status = U_ZERO_ERROR; 5570 UConverter *cnv = NULL; 5571 int32_t i; 5572 5573 const char *fixedWidth[] = { 5574 "US-ASCII", 5575 "UTF32", 5576 "ibm-5478_P100-1995" 5577 }; 5578 5579 const char *notFixedWidth[] = { 5580 "GB18030", 5581 "UTF8", 5582 "windows-949-2000", 5583 "UTF16" 5584 }; 5585 5586 for (i = 0; i < LENGTHOF(fixedWidth); i++) { 5587 cnv = ucnv_open(fixedWidth[i], &status); 5588 if (cnv == NULL || U_FAILURE(status)) { 5589 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status)); 5590 continue; 5591 } 5592 5593 if (!ucnv_isFixedWidth(cnv, &status)) { 5594 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]); 5595 } 5596 ucnv_close(cnv); 5597 } 5598 5599 for (i = 0; i < LENGTHOF(notFixedWidth); i++) { 5600 cnv = ucnv_open(notFixedWidth[i], &status); 5601 if (cnv == NULL || U_FAILURE(status)) { 5602 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status)); 5603 continue; 5604 } 5605 5606 if (ucnv_isFixedWidth(cnv, &status)) { 5607 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]); 5608 } 5609 ucnv_close(cnv); 5610 } 5611 } 5612