1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2012, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************* 7 * 8 * File CCONVTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Steven R. Loomis 7/8/1999 Adding input buffer test 13 ******************************************************************************** 14 */ 15 #include <stdio.h> 16 #include "cstring.h" 17 #include "unicode/uloc.h" 18 #include "unicode/ucnv.h" 19 #include "unicode/ucnv_err.h" 20 #include "unicode/ucnv_cb.h" 21 #include "cintltst.h" 22 #include "unicode/utypes.h" 23 #include "unicode/ustring.h" 24 #include "unicode/ucol.h" 25 #include "unicode/utf16.h" 26 #include "cmemory.h" 27 #include "nucnvtst.h" 28 29 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) 30 31 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); 32 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); 33 #if !UCONFIG_NO_COLLATION 34 static void TestJitterbug981(void); 35 #endif 36 #if !UCONFIG_NO_LEGACY_CONVERSION 37 static void TestJitterbug1293(void); 38 #endif 39 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; 40 static void TestConverterTypesAndStarters(void); 41 static void TestAmbiguous(void); 42 static void TestSignatureDetection(void); 43 static void TestUTF7(void); 44 static void TestIMAP(void); 45 static void TestUTF8(void); 46 static void TestCESU8(void); 47 static void TestUTF16(void); 48 static void TestUTF16BE(void); 49 static void TestUTF16LE(void); 50 static void TestUTF32(void); 51 static void TestUTF32BE(void); 52 static void TestUTF32LE(void); 53 static void TestLATIN1(void); 54 55 #if !UCONFIG_NO_LEGACY_CONVERSION 56 static void TestSBCS(void); 57 static void TestDBCS(void); 58 static void TestMBCS(void); 59 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 60 static void TestICCRunout(void); 61 #endif 62 63 #ifdef U_ENABLE_GENERIC_ISO_2022 64 static void TestISO_2022(void); 65 #endif 66 67 static void TestISO_2022_JP(void); 68 static void TestISO_2022_JP_1(void); 69 static void TestISO_2022_JP_2(void); 70 static void TestISO_2022_KR(void); 71 static void TestISO_2022_KR_1(void); 72 static void TestISO_2022_CN(void); 73 #if 0 74 /* 75 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 76 */ 77 static void TestISO_2022_CN_EXT(void); 78 #endif 79 static void TestJIS(void); 80 static void TestHZ(void); 81 #endif 82 83 static void TestSCSU(void); 84 85 #if !UCONFIG_NO_LEGACY_CONVERSION 86 static void TestEBCDIC_STATEFUL(void); 87 static void TestGB18030(void); 88 static void TestLMBCS(void); 89 static void TestJitterbug255(void); 90 static void TestEBCDICUS4XML(void); 91 #if 0 92 /* 93 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 94 */ 95 static void TestJitterbug915(void); 96 #endif 97 static void TestISCII(void); 98 99 static void TestCoverageMBCS(void); 100 static void TestJitterbug2346(void); 101 static void TestJitterbug2411(void); 102 static void TestJB5275(void); 103 static void TestJB5275_1(void); 104 static void TestJitterbug6175(void); 105 106 static void TestIsFixedWidth(void); 107 #endif 108 109 static void TestInBufSizes(void); 110 111 static void TestRoundTrippingAllUTF(void); 112 static void TestConv(const uint16_t in[], 113 int len, 114 const char* conv, 115 const char* lang, 116 char byteArr[], 117 int byteArrLen); 118 119 /* open a converter, using test data if it begins with '@' */ 120 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); 121 122 123 #define NEW_MAX_BUFFER 999 124 125 static int32_t gInBufferSize = NEW_MAX_BUFFER; 126 static int32_t gOutBufferSize = NEW_MAX_BUFFER; 127 static char gNuConvTestName[1024]; 128 129 #define nct_min(x,y) ((x<y) ? x : y) 130 131 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) 132 { 133 if(cnv && cnv[0] == '@') { 134 return ucnv_openPackage(loadTestData(err), cnv+1, err); 135 } else { 136 return ucnv_open(cnv, err); 137 } 138 } 139 140 static void printSeq(const unsigned char* a, int len) 141 { 142 int i=0; 143 log_verbose("{"); 144 while (i<len) 145 log_verbose("0x%02x ", a[i++]); 146 log_verbose("}\n"); 147 } 148 149 static void printUSeq(const UChar* a, int len) 150 { 151 int i=0; 152 log_verbose("{U+"); 153 while (i<len) log_verbose("0x%04x ", a[i++]); 154 log_verbose("}\n"); 155 } 156 157 static void printSeqErr(const unsigned char* a, int len) 158 { 159 int i=0; 160 fprintf(stderr, "{"); 161 while (i<len) 162 fprintf(stderr, "0x%02x ", a[i++]); 163 fprintf(stderr, "}\n"); 164 } 165 166 static void printUSeqErr(const UChar* a, int len) 167 { 168 int i=0; 169 fprintf(stderr, "{U+"); 170 while (i<len) 171 fprintf(stderr, "0x%04x ", a[i++]); 172 fprintf(stderr,"}\n"); 173 } 174 175 static void 176 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message) 177 { 178 const char* s0; 179 const char* s=(char*)source; 180 const int32_t *r=results; 181 UErrorCode errorCode=U_ZERO_ERROR; 182 UChar32 c; 183 184 while(s<limit) { 185 s0=s; 186 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 187 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 188 break; /* no more significant input */ 189 } else if(U_FAILURE(errorCode)) { 190 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 191 break; 192 } else if( 193 /* test the expected number of input bytes only if >=0 */ 194 (*r>=0 && (int32_t)(s-s0)!=*r) || 195 c!=*(r+1) 196 ) { 197 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 198 message, c, (s-s0), *(r+1), *r); 199 break; 200 } 201 r+=2; 202 } 203 } 204 205 static void 206 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message) 207 { 208 const char* s=(char*)source; 209 UErrorCode errorCode=U_ZERO_ERROR; 210 uint32_t c; 211 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 212 if(errorCode != expected){ 213 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode)); 214 } 215 if(c != 0xFFFD && c != 0xffff){ 216 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c); 217 } 218 219 } 220 221 static void TestInBufSizes(void) 222 { 223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); 224 #if 1 225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); 226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); 227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); 228 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); 229 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); 230 TestNewConvertWithBufferSizes(1,1); 231 TestNewConvertWithBufferSizes(2,3); 232 TestNewConvertWithBufferSizes(3,2); 233 #endif 234 } 235 236 static void TestOutBufSizes(void) 237 { 238 #if 1 239 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); 240 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); 241 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); 242 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); 243 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); 244 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); 245 246 #endif 247 } 248 249 250 void addTestNewConvert(TestNode** root) 251 { 252 #if !UCONFIG_NO_FILE_IO 253 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); 254 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); 255 #endif 256 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); 257 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); 258 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); 259 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); 260 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); 261 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); 262 263 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ 264 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); 265 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); 266 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); 267 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); 268 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); 269 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); 270 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); 271 272 #if !UCONFIG_NO_LEGACY_CONVERSION 273 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); 274 #endif 275 276 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); 277 278 #if !UCONFIG_NO_LEGACY_CONVERSION 279 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); 280 #if !UCONFIG_NO_FILE_IO 281 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); 282 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); 283 #endif 284 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); 285 286 #ifdef U_ENABLE_GENERIC_ISO_2022 287 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); 288 #endif 289 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); 290 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); 291 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); 292 /* BEGIN android-changed: we don't have ISO_2022_JP_2 293 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); 294 END android-changed */ 295 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); 296 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); 297 /* BEGIN android-changed: we don't have ISO-2022-CN. 298 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); 299 END android-changed */ 300 /* 301 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 302 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); 303 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); 304 */ 305 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); 306 #endif 307 308 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); 309 310 #if !UCONFIG_NO_LEGACY_CONVERSION 311 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); 312 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); 313 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); 314 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); 315 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); 316 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); 317 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); 318 #if !UCONFIG_NO_COLLATION 319 /* BEGIN android-removed 320 To save space, Android does not include the collation tailoring rules. 321 Skip the related tests. 322 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); 323 END android-removed */ 324 #endif 325 326 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); 327 #endif 328 329 330 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 331 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); 332 #endif 333 334 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); 335 336 #if !UCONFIG_NO_LEGACY_CONVERSION 337 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); 338 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); 339 /* BEGIN android-removed 340 To save space, Android does not build full ISO2022 CJK tables. 341 We turn off the tests here. 342 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); 343 END android-removed */ 344 345 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth"); 346 #endif 347 } 348 349 350 /* Note that this test already makes use of statics, so it's not really 351 multithread safe. 352 This convenience function lets us make the error messages actually useful. 353 */ 354 355 static void setNuConvTestName(const char *codepage, const char *direction) 356 { 357 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 358 codepage, 359 direction, 360 (int)gInBufferSize, 361 (int)gOutBufferSize); 362 } 363 364 typedef enum 365 { 366 TC_OK = 0, /* test was OK */ 367 TC_MISMATCH = 1, /* Match failed - err was printed */ 368 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */ 369 } ETestConvertResult; 370 371 /* Note: This function uses global variables and it will not do offset 372 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 373 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 374 const char *codepage, const int32_t *expectOffsets , UBool useFallback) 375 { 376 UErrorCode status = U_ZERO_ERROR; 377 UConverter *conv = 0; 378 char junkout[NEW_MAX_BUFFER]; /* FIX */ 379 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 380 char *p; 381 const UChar *src; 382 char *end; 383 char *targ; 384 int32_t *offs; 385 int i; 386 int32_t realBufferSize; 387 char *realBufferEnd; 388 const UChar *realSourceEnd; 389 const UChar *sourceLimit; 390 UBool checkOffsets = TRUE; 391 UBool doFlush; 392 393 for(i=0;i<NEW_MAX_BUFFER;i++) 394 junkout[i] = (char)0xF0; 395 for(i=0;i<NEW_MAX_BUFFER;i++) 396 junokout[i] = 0xFF; 397 398 setNuConvTestName(codepage, "FROM"); 399 400 log_verbose("\n========= %s\n", gNuConvTestName); 401 402 conv = my_ucnv_open(codepage, &status); 403 404 if(U_FAILURE(status)) 405 { 406 log_data_err("Couldn't open converter %s\n",codepage); 407 return TC_FAIL; 408 } 409 if(useFallback){ 410 ucnv_setFallback(conv,useFallback); 411 } 412 413 log_verbose("Converter opened..\n"); 414 415 src = source; 416 targ = junkout; 417 offs = junokout; 418 419 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 420 realBufferEnd = junkout + realBufferSize; 421 realSourceEnd = source + sourceLen; 422 423 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 424 checkOffsets = FALSE; 425 426 do 427 { 428 end = nct_min(targ + gOutBufferSize, realBufferEnd); 429 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 430 431 doFlush = (UBool)(sourceLimit == realSourceEnd); 432 433 if(targ == realBufferEnd) { 434 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 435 return TC_FAIL; 436 } 437 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 438 439 440 status = U_ZERO_ERROR; 441 442 ucnv_fromUnicode (conv, 443 &targ, 444 end, 445 &src, 446 sourceLimit, 447 checkOffsets ? offs : NULL, 448 doFlush, /* flush if we're at the end of the input data */ 449 &status); 450 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 451 452 if(U_FAILURE(status)) { 453 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 454 return TC_FAIL; 455 } 456 457 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 458 sourceLen, targ-junkout); 459 460 if(getTestOption(VERBOSITY_OPTION)) 461 { 462 char junk[9999]; 463 char offset_str[9999]; 464 char *ptr; 465 466 junk[0] = 0; 467 offset_str[0] = 0; 468 for(ptr = junkout;ptr<targ;ptr++) { 469 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); 470 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout])); 471 } 472 473 log_verbose(junk); 474 printSeq((const uint8_t *)expect, expectLen); 475 if ( checkOffsets ) { 476 log_verbose("\nOffsets:"); 477 log_verbose(offset_str); 478 } 479 log_verbose("\n"); 480 } 481 ucnv_close(conv); 482 483 if(expectLen != targ-junkout) { 484 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 485 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 486 fprintf(stderr, "Got:\n"); 487 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 488 fprintf(stderr, "Expected:\n"); 489 printSeqErr((const unsigned char*)expect, expectLen); 490 return TC_MISMATCH; 491 } 492 493 if (checkOffsets && (expectOffsets != 0) ) { 494 log_verbose("comparing %d offsets..\n", targ-junkout); 495 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 496 log_err("did not get the expected offsets. %s\n", gNuConvTestName); 497 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 498 log_err("\n"); 499 log_err("Got : "); 500 for(p=junkout;p<targ;p++) { 501 log_err("%d,", junokout[p-junkout]); 502 } 503 log_err("\n"); 504 log_err("Expected: "); 505 for(i=0; i<(targ-junkout); i++) { 506 log_err("%d,", expectOffsets[i]); 507 } 508 log_err("\n"); 509 } 510 } 511 512 log_verbose("comparing..\n"); 513 if(!memcmp(junkout, expect, expectLen)) { 514 log_verbose("Matches!\n"); 515 return TC_OK; 516 } else { 517 log_err("String does not match u->%s\n", gNuConvTestName); 518 printUSeqErr(source, sourceLen); 519 fprintf(stderr, "Got:\n"); 520 printSeqErr((const unsigned char *)junkout, expectLen); 521 fprintf(stderr, "Expected:\n"); 522 printSeqErr((const unsigned char *)expect, expectLen); 523 524 return TC_MISMATCH; 525 } 526 } 527 528 /* Note: This function uses global variables and it will not do offset 529 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 530 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 531 const char *codepage, const int32_t *expectOffsets, UBool useFallback) 532 { 533 UErrorCode status = U_ZERO_ERROR; 534 UConverter *conv = 0; 535 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 536 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 537 const char *src; 538 const char *realSourceEnd; 539 const char *srcLimit; 540 UChar *p; 541 UChar *targ; 542 UChar *end; 543 int32_t *offs; 544 int i; 545 UBool checkOffsets = TRUE; 546 547 int32_t realBufferSize; 548 UChar *realBufferEnd; 549 550 551 for(i=0;i<NEW_MAX_BUFFER;i++) 552 junkout[i] = 0xFFFE; 553 554 for(i=0;i<NEW_MAX_BUFFER;i++) 555 junokout[i] = -1; 556 557 setNuConvTestName(codepage, "TO"); 558 559 log_verbose("\n========= %s\n", gNuConvTestName); 560 561 conv = my_ucnv_open(codepage, &status); 562 563 if(U_FAILURE(status)) 564 { 565 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 566 return TC_FAIL; 567 } 568 if(useFallback){ 569 ucnv_setFallback(conv,useFallback); 570 } 571 log_verbose("Converter opened..\n"); 572 573 src = (const char *)source; 574 targ = junkout; 575 offs = junokout; 576 577 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 578 realBufferEnd = junkout + realBufferSize; 579 realSourceEnd = src + sourcelen; 580 581 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 582 checkOffsets = FALSE; 583 584 do 585 { 586 end = nct_min( targ + gOutBufferSize, realBufferEnd); 587 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 588 589 if(targ == realBufferEnd) 590 { 591 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); 592 return TC_FAIL; 593 } 594 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 595 596 /* oldTarg = targ; */ 597 598 status = U_ZERO_ERROR; 599 600 ucnv_toUnicode (conv, 601 &targ, 602 end, 603 &src, 604 srcLimit, 605 checkOffsets ? offs : NULL, 606 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ 607 &status); 608 609 /* offs += (targ-oldTarg); */ 610 611 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 612 613 if(U_FAILURE(status)) 614 { 615 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 616 return TC_FAIL; 617 } 618 619 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 620 sourcelen, targ-junkout); 621 if(getTestOption(VERBOSITY_OPTION)) 622 { 623 char junk[9999]; 624 char offset_str[9999]; 625 UChar *ptr; 626 627 junk[0] = 0; 628 offset_str[0] = 0; 629 630 for(ptr = junkout;ptr<targ;ptr++) 631 { 632 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 633 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 634 } 635 636 log_verbose(junk); 637 printUSeq(expect, expectlen); 638 if ( checkOffsets ) 639 { 640 log_verbose("\nOffsets:"); 641 log_verbose(offset_str); 642 } 643 log_verbose("\n"); 644 } 645 ucnv_close(conv); 646 647 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 648 649 if (checkOffsets && (expectOffsets != 0)) 650 { 651 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 652 log_err("did not get the expected offsets. %s\n",gNuConvTestName); 653 log_err("Got: "); 654 for(p=junkout;p<targ;p++) { 655 log_err("%d,", junokout[p-junkout]); 656 } 657 log_err("\n"); 658 log_err("Expected: "); 659 for(i=0; i<(targ-junkout); i++) { 660 log_err("%d,", expectOffsets[i]); 661 } 662 log_err("\n"); 663 log_err("output: "); 664 for(i=0; i<(targ-junkout); i++) { 665 log_err("%X,", junkout[i]); 666 } 667 log_err("\n"); 668 log_err("input: "); 669 for(i=0; i<(src-(const char *)source); i++) { 670 log_err("%X,", (unsigned char)source[i]); 671 } 672 log_err("\n"); 673 } 674 } 675 676 if(!memcmp(junkout, expect, expectlen*2)) 677 { 678 log_verbose("Matches!\n"); 679 return TC_OK; 680 } 681 else 682 { 683 log_err("String does not match. %s\n", gNuConvTestName); 684 log_verbose("String does not match. %s\n", gNuConvTestName); 685 printf("\nGot:"); 686 printUSeqErr(junkout, expectlen); 687 printf("\nExpected:"); 688 printUSeqErr(expect, expectlen); 689 return TC_MISMATCH; 690 } 691 } 692 693 694 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 695 { 696 /** test chars #1 */ 697 /* 1 2 3 1Han 2Han 3Han . */ 698 static const UChar sampleText[] = 699 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 }; 700 static const UChar sampleTextRoundTripUnmappable[] = 701 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; 702 703 704 static const uint8_t expectedUTF8[] = 705 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; 706 static const int32_t toUTF8Offs[] = 707 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; 708 static const int32_t fmUTF8Offs[] = 709 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e }; 710 711 #ifdef U_ENABLE_GENERIC_ISO_2022 712 /* Same as UTF8, but with ^[%B preceeding */ 713 static const const uint8_t expectedISO2022[] = 714 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 715 static const int32_t toISO2022Offs[] = 716 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 717 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ 718 static const int32_t fmISO2022Offs[] = 719 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */ 720 #endif 721 722 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ 723 static const uint8_t expectedIBM930[] = 724 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f }; 725 static const int32_t toIBM930Offs[] = 726 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 }; 727 static const int32_t fmIBM930Offs[] = 728 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; 729 730 /* 1 2 3 0 h1 h2 h3 . MBCS*/ 731 static const uint8_t expectedIBM943[] = 732 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc }; 733 static const int32_t toIBM943Offs [] = 734 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 }; 735 static const int32_t fmIBM943Offs[] = 736 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; 737 738 /* 1 2 3 0 h1 h2 h3 . DBCS*/ 739 static const uint8_t expectedIBM9027[] = 740 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; 741 static const int32_t toIBM9027Offs [] = 742 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; 743 744 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 745 static const uint8_t expectedIBM920[] = 746 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; 747 static const int32_t toIBM920Offs [] = 748 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 749 750 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 751 static const uint8_t expectedISO88593[] = 752 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 753 static const int32_t toISO88593Offs[] = 754 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 755 756 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ 757 static const uint8_t expectedLATIN1[] = 758 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 759 static const int32_t toLATIN1Offs[] = 760 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 761 762 763 /* etc */ 764 static const uint8_t expectedUTF16BE[] = 765 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; 766 static const int32_t toUTF16BEOffs[]= 767 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 768 static const int32_t fmUTF16BEOffs[] = 769 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 770 771 static const uint8_t expectedUTF16LE[] = 772 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; 773 static const int32_t toUTF16LEOffs[]= 774 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 775 static const int32_t fmUTF16LEOffs[] = 776 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 777 778 static const uint8_t expectedUTF32BE[] = 779 { 0x00, 0x00, 0x00, 0x31, 780 0x00, 0x00, 0x00, 0x32, 781 0x00, 0x00, 0x00, 0x33, 782 0x00, 0x00, 0x00, 0x00, 783 0x00, 0x00, 0x4e, 0x00, 784 0x00, 0x00, 0x4e, 0x8c, 785 0x00, 0x00, 0x4e, 0x09, 786 0x00, 0x00, 0x00, 0x2e, 787 0x00, 0x02, 0x00, 0x21 }; 788 static const int32_t toUTF32BEOffs[]= 789 { 0x00, 0x00, 0x00, 0x00, 790 0x01, 0x01, 0x01, 0x01, 791 0x02, 0x02, 0x02, 0x02, 792 0x03, 0x03, 0x03, 0x03, 793 0x04, 0x04, 0x04, 0x04, 794 0x05, 0x05, 0x05, 0x05, 795 0x06, 0x06, 0x06, 0x06, 796 0x07, 0x07, 0x07, 0x07, 797 0x08, 0x08, 0x08, 0x08, 798 0x08, 0x08, 0x08, 0x08 }; 799 static const int32_t fmUTF32BEOffs[] = 800 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 801 802 static const uint8_t expectedUTF32LE[] = 803 { 0x31, 0x00, 0x00, 0x00, 804 0x32, 0x00, 0x00, 0x00, 805 0x33, 0x00, 0x00, 0x00, 806 0x00, 0x00, 0x00, 0x00, 807 0x00, 0x4e, 0x00, 0x00, 808 0x8c, 0x4e, 0x00, 0x00, 809 0x09, 0x4e, 0x00, 0x00, 810 0x2e, 0x00, 0x00, 0x00, 811 0x21, 0x00, 0x02, 0x00 }; 812 static const int32_t toUTF32LEOffs[]= 813 { 0x00, 0x00, 0x00, 0x00, 814 0x01, 0x01, 0x01, 0x01, 815 0x02, 0x02, 0x02, 0x02, 816 0x03, 0x03, 0x03, 0x03, 817 0x04, 0x04, 0x04, 0x04, 818 0x05, 0x05, 0x05, 0x05, 819 0x06, 0x06, 0x06, 0x06, 820 0x07, 0x07, 0x07, 0x07, 821 0x08, 0x08, 0x08, 0x08, 822 0x08, 0x08, 0x08, 0x08 }; 823 static const int32_t fmUTF32LEOffs[] = 824 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 825 826 827 828 829 /** Test chars #2 **/ 830 831 /* Sahha [health], slashed h's */ 832 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; 833 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; 834 835 /* LMBCS */ 836 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; 837 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; 838 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; 839 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; 840 /*********************************** START OF CODE finally *************/ 841 842 gInBufferSize = insize; 843 gOutBufferSize = outsize; 844 845 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); 846 847 848 /*UTF-8*/ 849 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 850 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); 851 852 log_verbose("Test surrogate behaviour for UTF8\n"); 853 { 854 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; 855 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, 856 0xf0, 0x90, 0x90, 0x81, 857 0xef, 0xbf, 0xbd 858 }; 859 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; 860 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), 861 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); 862 863 864 } 865 866 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 867 /*ISO-2022*/ 868 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 869 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); 870 #endif 871 872 /*UTF16 LE*/ 873 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 874 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); 875 /*UTF16 BE*/ 876 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 877 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); 878 /*UTF32 LE*/ 879 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 880 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); 881 /*UTF32 BE*/ 882 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 883 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); 884 885 /*LATIN_1*/ 886 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 887 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); 888 889 #if !UCONFIG_NO_LEGACY_CONVERSION 890 /*EBCDIC_STATEFUL*/ 891 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 892 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); 893 894 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 895 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 896 897 /*MBCS*/ 898 899 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 900 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); 901 /*DBCS*/ 902 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 903 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); 904 /*SBCS*/ 905 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 906 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); 907 /*SBCS*/ 908 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 909 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 910 #endif 911 912 913 /****/ 914 915 /*UTF-8*/ 916 testConvertToU(expectedUTF8, sizeof(expectedUTF8), 917 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE); 918 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 919 /*ISO-2022*/ 920 testConvertToU(expectedISO2022, sizeof(expectedISO2022), 921 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE); 922 #endif 923 924 /*UTF16 LE*/ 925 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 926 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 927 /*UTF16 BE*/ 928 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), 929 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE); 930 /*UTF32 LE*/ 931 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), 932 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE); 933 /*UTF32 BE*/ 934 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), 935 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE); 936 937 #if !UCONFIG_NO_LEGACY_CONVERSION 938 /*EBCDIC_STATEFUL*/ 939 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable, 940 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE); 941 /*MBCS*/ 942 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable, 943 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE); 944 #endif 945 946 /* Try it again to make sure it still works */ 947 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 948 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 949 950 #if !UCONFIG_NO_LEGACY_CONVERSION 951 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), 952 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE); 953 954 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), 955 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); 956 957 /*LMBCS*/ 958 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), 959 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); 960 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), 961 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE); 962 #endif 963 964 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ 965 { 966 /* encode directly set D and set O */ 967 static const uint8_t utf7[] = { 968 /* 969 Hi Mom -+Jjo--! 970 A+ImIDkQ. 971 +- 972 +ZeVnLIqe- 973 */ 974 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 975 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 976 0x2b, 0x2d, 977 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d 978 }; 979 static const UChar unicode[] = { 980 /* 981 Hi Mom -<WHITE SMILING FACE>-! 982 A<NOT IDENTICAL TO><ALPHA>. 983 + 984 [Japanese word "nihongo"] 985 */ 986 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 987 0x41, 0x2262, 0x0391, 0x2e, 988 0x2b, 989 0x65e5, 0x672c, 0x8a9e 990 }; 991 static const int32_t toUnicodeOffsets[] = { 992 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 993 15, 17, 19, 23, 994 24, 995 27, 29, 32 996 }; 997 static const int32_t fromUnicodeOffsets[] = { 998 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 999 11, 12, 12, 12, 13, 13, 13, 13, 14, 1000 15, 15, 1001 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 1002 }; 1003 1004 /* same but escaping set O (the exclamation mark) */ 1005 static const uint8_t utf7Restricted[] = { 1006 /* 1007 Hi Mom -+Jjo--+ACE- 1008 A+ImIDkQ. 1009 +- 1010 +ZeVnLIqe- 1011 */ 1012 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, 1013 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 1014 0x2b, 0x2d, 1015 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d 1016 }; 1017 static const int32_t toUnicodeOffsetsR[] = { 1018 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, 1019 19, 21, 23, 27, 1020 28, 1021 31, 33, 36 1022 }; 1023 static const int32_t fromUnicodeOffsetsR[] = { 1024 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, 1025 11, 12, 12, 12, 13, 13, 13, 13, 14, 1026 15, 15, 1027 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 1028 }; 1029 1030 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); 1031 1032 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE); 1033 1034 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); 1035 1036 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); 1037 } 1038 1039 /* 1040 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, 1041 * modified according to RFC 2060, 1042 * and supplemented with the one example in RFC 2060 itself. 1043 */ 1044 { 1045 static const uint8_t imap[] = { 1046 /* Hi Mom -&Jjo--! 1047 A&ImIDkQ-. 1048 &- 1049 &ZeVnLIqe- 1050 \ 1051 ~peter 1052 /mail 1053 /&ZeVnLIqe- 1054 /&U,BTFw- 1055 */ 1056 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 1057 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, 1058 0x26, 0x2d, 1059 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1060 0x5c, 1061 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1062 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1063 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1064 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d 1065 }; 1066 static const UChar unicode[] = { 1067 /* Hi Mom -<WHITE SMILING FACE>-! 1068 A<NOT IDENTICAL TO><ALPHA>. 1069 & 1070 [Japanese word "nihongo"] 1071 \ 1072 ~peter 1073 /mail 1074 /<65e5, 672c, 8a9e> 1075 /<53f0, 5317> 1076 */ 1077 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 1078 0x41, 0x2262, 0x0391, 0x2e, 1079 0x26, 1080 0x65e5, 0x672c, 0x8a9e, 1081 0x5c, 1082 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1083 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1084 0x2f, 0x65e5, 0x672c, 0x8a9e, 1085 0x2f, 0x53f0, 0x5317 1086 }; 1087 static const int32_t toUnicodeOffsets[] = { 1088 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 1089 15, 17, 19, 24, 1090 25, 1091 28, 30, 33, 1092 37, 1093 38, 39, 40, 41, 42, 43, 1094 44, 45, 46, 47, 48, 1095 49, 51, 53, 56, 1096 60, 62, 64 1097 }; 1098 static const int32_t fromUnicodeOffsets[] = { 1099 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 1100 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, 1101 15, 15, 1102 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 1103 19, 1104 20, 21, 22, 23, 24, 25, 1105 26, 27, 28, 29, 30, 1106 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 1107 35, 36, 36, 36, 37, 37, 37, 37, 37 1108 }; 1109 1110 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); 1111 1112 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); 1113 } 1114 1115 /* Test UTF-8 bad data handling*/ 1116 { 1117 static const uint8_t utf8[]={ 1118 0x61, 1119 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1120 0x00, 1121 0x62, 1122 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1123 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1124 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ 1125 0xdf, 0xbf, /* 7ff */ 1126 0xbf, /* truncated tail */ 1127 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ 1128 0x02 1129 }; 1130 1131 static const uint16_t utf8Expected[]={ 1132 0x0061, 1133 0xfffd, 1134 0x0000, 1135 0x0062, 1136 0xfffd, 1137 0xfffd, 1138 0xdbff, 0xdfff, 1139 0x07ff, 1140 0xfffd, 1141 0xfffd, 1142 0x0002 1143 }; 1144 1145 static const int32_t utf8Offsets[]={ 1146 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 1147 }; 1148 testConvertToU(utf8, sizeof(utf8), 1149 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE); 1150 1151 } 1152 1153 /* Test UTF-32BE bad data handling*/ 1154 { 1155 static const uint8_t utf32[]={ 1156 0x00, 0x00, 0x00, 0x61, 1157 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 1158 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1159 0x00, 0x00, 0x00, 0x62, 1160 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1161 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 1162 0x00, 0x00, 0x01, 0x62, 1163 0x00, 0x00, 0x02, 0x62 1164 }; 1165 static const uint16_t utf32Expected[]={ 1166 0x0061, 1167 0xfffd, /* 0x110000 out of range */ 1168 0xDBFF, /* 0x10FFFF in range */ 1169 0xDFFF, 1170 0x0062, 1171 0xfffd, /* 0xffffffff out of range */ 1172 0xfffd, /* 0x7fffffff out of range */ 1173 0x0162, 1174 0x0262 1175 }; 1176 static const int32_t utf32Offsets[]={ 1177 0, 4, 8, 8, 12, 16, 20, 24, 28 1178 }; 1179 static const uint8_t utf32ExpectedBack[]={ 1180 0x00, 0x00, 0x00, 0x61, 1181 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ 1182 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1183 0x00, 0x00, 0x00, 0x62, 1184 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ 1185 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ 1186 0x00, 0x00, 0x01, 0x62, 1187 0x00, 0x00, 0x02, 0x62 1188 }; 1189 static const int32_t utf32OffsetsBack[]={ 1190 0,0,0,0, 1191 1,1,1,1, 1192 2,2,2,2, 1193 4,4,4,4, 1194 5,5,5,5, 1195 6,6,6,6, 1196 7,7,7,7, 1197 8,8,8,8 1198 }; 1199 1200 testConvertToU(utf32, sizeof(utf32), 1201 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE); 1202 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1203 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE); 1204 } 1205 1206 /* Test UTF-32LE bad data handling*/ 1207 { 1208 static const uint8_t utf32[]={ 1209 0x61, 0x00, 0x00, 0x00, 1210 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 1211 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1212 0x62, 0x00, 0x00, 0x00, 1213 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1214 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 1215 0x62, 0x01, 0x00, 0x00, 1216 0x62, 0x02, 0x00, 0x00, 1217 }; 1218 1219 static const uint16_t utf32Expected[]={ 1220 0x0061, 1221 0xfffd, /* 0x110000 out of range */ 1222 0xDBFF, /* 0x10FFFF in range */ 1223 0xDFFF, 1224 0x0062, 1225 0xfffd, /* 0xffffffff out of range */ 1226 0xfffd, /* 0x7fffffff out of range */ 1227 0x0162, 1228 0x0262 1229 }; 1230 static const int32_t utf32Offsets[]={ 1231 0, 4, 8, 8, 12, 16, 20, 24, 28 1232 }; 1233 static const uint8_t utf32ExpectedBack[]={ 1234 0x61, 0x00, 0x00, 0x00, 1235 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ 1236 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1237 0x62, 0x00, 0x00, 0x00, 1238 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ 1239 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ 1240 0x62, 0x01, 0x00, 0x00, 1241 0x62, 0x02, 0x00, 0x00 1242 }; 1243 static const int32_t utf32OffsetsBack[]={ 1244 0,0,0,0, 1245 1,1,1,1, 1246 2,2,2,2, 1247 4,4,4,4, 1248 5,5,5,5, 1249 6,6,6,6, 1250 7,7,7,7, 1251 8,8,8,8 1252 }; 1253 testConvertToU(utf32, sizeof(utf32), 1254 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE ); 1255 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1256 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE); 1257 } 1258 } 1259 1260 static void TestCoverageMBCS(){ 1261 #if 0 1262 UErrorCode status = U_ZERO_ERROR; 1263 const char *directory = loadTestData(&status); 1264 char* tdpath = NULL; 1265 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); 1266 int len = strlen(directory); 1267 char* index=NULL; 1268 1269 tdpath = (char*) malloc(sizeof(char) * (len * 2)); 1270 uprv_strcpy(saveDirectory,u_getDataDirectory()); 1271 log_verbose("Retrieved data directory %s \n",saveDirectory); 1272 uprv_strcpy(tdpath,directory); 1273 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); 1274 1275 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ 1276 *(index+1)=0; 1277 } 1278 u_setDataDirectory(tdpath); 1279 log_verbose("ICU data directory is set to: %s \n" ,tdpath); 1280 #endif 1281 1282 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm 1283 which is test file for MBCS conversion with single-byte codepage data.*/ 1284 { 1285 1286 /* MBCS with single byte codepage data test1.ucm*/ 1287 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; 1288 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; 1289 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; 1290 1291 /*from Unicode*/ 1292 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1293 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); 1294 } 1295 1296 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm 1297 which is test file for MBCS conversion with three-byte codepage data.*/ 1298 { 1299 1300 /* MBCS with three byte codepage data test3.ucm*/ 1301 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1302 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; 1303 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; 1304 1305 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; 1306 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1307 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; 1308 1309 /*from Unicode*/ 1310 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1311 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); 1312 1313 /*to Unicode*/ 1314 testConvertToU(test3input, sizeof(test3input), 1315 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE); 1316 1317 } 1318 1319 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm 1320 which is test file for MBCS conversion with four-byte codepage data.*/ 1321 { 1322 1323 /* MBCS with three byte codepage data test4.ucm*/ 1324 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1325 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; 1326 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; 1327 1328 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; 1329 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1330 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; 1331 1332 /*from Unicode*/ 1333 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1334 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); 1335 1336 /*to Unicode*/ 1337 testConvertToU(test4input, sizeof(test4input), 1338 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE ); 1339 1340 } 1341 #if 0 1342 free(tdpath); 1343 /* restore the original data directory */ 1344 log_verbose("Setting the data directory to %s \n", saveDirectory); 1345 u_setDataDirectory(saveDirectory); 1346 free(saveDirectory); 1347 #endif 1348 1349 } 1350 1351 static void TestConverterType(const char *convName, UConverterType convType) { 1352 UConverter* myConverter; 1353 UErrorCode err = U_ZERO_ERROR; 1354 1355 myConverter = my_ucnv_open(convName, &err); 1356 1357 if (U_FAILURE(err)) { 1358 log_data_err("Failed to create an %s converter\n", convName); 1359 return; 1360 } 1361 else 1362 { 1363 if (ucnv_getType(myConverter)!=convType) { 1364 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", 1365 convName, convType); 1366 } 1367 else { 1368 log_verbose("ucnv_getType %s ok\n", convName); 1369 } 1370 } 1371 ucnv_close(myConverter); 1372 } 1373 1374 static void TestConverterTypesAndStarters() 1375 { 1376 #if !UCONFIG_NO_LEGACY_CONVERSION 1377 UConverter* myConverter; 1378 UErrorCode err = U_ZERO_ERROR; 1379 UBool mystarters[256]; 1380 1381 /* const UBool expectedKSCstarters[256] = { 1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1383 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1384 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1385 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1386 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1387 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1388 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1389 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1390 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1391 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1392 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1393 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1394 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1395 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1396 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1397 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1398 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1399 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, 1400 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1401 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1402 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1403 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1404 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1405 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1406 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1407 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ 1408 1409 1410 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); 1411 1412 myConverter = ucnv_open("ksc", &err); 1413 if (U_FAILURE(err)) { 1414 log_data_err("Failed to create an ibm-ksc converter\n"); 1415 return; 1416 } 1417 else 1418 { 1419 if (ucnv_getType(myConverter)!=UCNV_MBCS) 1420 log_err("ucnv_getType Failed for ibm-949\n"); 1421 else 1422 log_verbose("ucnv_getType ibm-949 ok\n"); 1423 1424 if(myConverter!=NULL) 1425 ucnv_getStarters(myConverter, mystarters, &err); 1426 1427 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) 1428 log_err("Failed ucnv_getStarters for ksc\n"); 1429 else 1430 log_verbose("ucnv_getStarters ok\n");*/ 1431 1432 } 1433 ucnv_close(myConverter); 1434 1435 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); 1436 TestConverterType("ibm-878", UCNV_SBCS); 1437 #endif 1438 1439 TestConverterType("iso-8859-1", UCNV_LATIN_1); 1440 1441 TestConverterType("ibm-1208", UCNV_UTF8); 1442 1443 TestConverterType("utf-8", UCNV_UTF8); 1444 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); 1445 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); 1446 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); 1447 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); 1448 1449 #if !UCONFIG_NO_LEGACY_CONVERSION 1450 1451 #if defined(U_ENABLE_GENERIC_ISO_2022) 1452 TestConverterType("iso-2022", UCNV_ISO_2022); 1453 #endif 1454 1455 TestConverterType("hz", UCNV_HZ); 1456 #endif 1457 1458 TestConverterType("scsu", UCNV_SCSU); 1459 1460 #if !UCONFIG_NO_LEGACY_CONVERSION 1461 TestConverterType("x-iscii-de", UCNV_ISCII); 1462 #endif 1463 1464 TestConverterType("ascii", UCNV_US_ASCII); 1465 TestConverterType("utf-7", UCNV_UTF7); 1466 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); 1467 TestConverterType("bocu-1", UCNV_BOCU1); 1468 } 1469 1470 static void 1471 TestAmbiguousConverter(UConverter *cnv) { 1472 static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; 1473 UChar outUnicode[20]={ 0, 0, 0, 0 }; 1474 1475 const char *s; 1476 UChar *u; 1477 UErrorCode errorCode; 1478 UBool isAmbiguous; 1479 1480 /* try to convert an 'a', a square bracket and a US-ASCII backslash */ 1481 errorCode=U_ZERO_ERROR; 1482 s=inBytes; 1483 u=outUnicode; 1484 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); 1485 if(U_FAILURE(errorCode)) { 1486 /* we do not care about general failures in this test; the input may just not be mappable */ 1487 return; 1488 } 1489 1490 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { 1491 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ 1492 /* There are some encodings that are partially ASCII based, 1493 like the ISO-7 and GSM series of codepages, which we ignore. */ 1494 return; 1495 } 1496 1497 isAmbiguous=ucnv_isAmbiguous(cnv); 1498 1499 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ 1500 if((outUnicode[2]!=0x5c)!=isAmbiguous) { 1501 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", 1502 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); 1503 return; 1504 } 1505 1506 if(outUnicode[2]!=0x5c) { 1507 /* needs fixup, fix it */ 1508 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); 1509 if(outUnicode[2]!=0x5c) { 1510 /* the fix failed */ 1511 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); 1512 return; 1513 } 1514 } 1515 } 1516 1517 static void TestAmbiguous() 1518 { 1519 UErrorCode status = U_ZERO_ERROR; 1520 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; 1521 static const char target[] = { 1522 /* "\\usr\\local\\share\\data\\icutest.txt" */ 1523 0x5c, 0x75, 0x73, 0x72, 1524 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 1525 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, 1526 0x5c, 0x64, 0x61, 0x74, 0x61, 1527 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, 1528 0 1529 }; 1530 UChar asciiResult[200], sjisResult[200]; 1531 int32_t /*asciiLength = 0,*/ sjisLength = 0, i; 1532 const char *name; 1533 1534 /* enumerate all converters */ 1535 status=U_ZERO_ERROR; 1536 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { 1537 cnv=ucnv_open(name, &status); 1538 if(U_SUCCESS(status)) { 1539 /* BEGIN android-changed 1540 To save space, Android does not build full ISO-2022-CN CJK tables. */ 1541 const char* cnvName = ucnv_getName(cnv, &status); 1542 if (strlen(cnvName) < 8 || 1543 strncmp(cnvName, "ISO_2022_CN", 8) != 0) { 1544 TestAmbiguousConverter(cnv); 1545 } 1546 /* END android-changed */ 1547 ucnv_close(cnv); 1548 } else { 1549 log_err("error: unable to open available converter \"%s\"\n", name); 1550 status=U_ZERO_ERROR; 1551 } 1552 } 1553 1554 #if !UCONFIG_NO_LEGACY_CONVERSION 1555 sjis_cnv = ucnv_open("ibm-943", &status); 1556 if (U_FAILURE(status)) 1557 { 1558 log_data_err("Failed to create a SJIS converter\n"); 1559 return; 1560 } 1561 ascii_cnv = ucnv_open("LATIN-1", &status); 1562 if (U_FAILURE(status)) 1563 { 1564 log_data_err("Failed to create a LATIN-1 converter\n"); 1565 ucnv_close(sjis_cnv); 1566 return; 1567 } 1568 /* convert target from SJIS to Unicode */ 1569 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1570 if (U_FAILURE(status)) 1571 { 1572 log_err("Failed to convert the SJIS string.\n"); 1573 ucnv_close(sjis_cnv); 1574 ucnv_close(ascii_cnv); 1575 return; 1576 } 1577 /* convert target from Latin-1 to Unicode */ 1578 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1579 if (U_FAILURE(status)) 1580 { 1581 log_err("Failed to convert the Latin-1 string.\n"); 1582 ucnv_close(sjis_cnv); 1583 ucnv_close(ascii_cnv); 1584 return; 1585 } 1586 if (!ucnv_isAmbiguous(sjis_cnv)) 1587 { 1588 log_err("SJIS converter should contain ambiguous character mappings.\n"); 1589 ucnv_close(sjis_cnv); 1590 ucnv_close(ascii_cnv); 1591 return; 1592 } 1593 if (u_strcmp(sjisResult, asciiResult) == 0) 1594 { 1595 log_err("File separators for SJIS don't need to be fixed.\n"); 1596 } 1597 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); 1598 if (u_strcmp(sjisResult, asciiResult) != 0) 1599 { 1600 log_err("Fixing file separator for SJIS failed.\n"); 1601 } 1602 ucnv_close(sjis_cnv); 1603 ucnv_close(ascii_cnv); 1604 #endif 1605 } 1606 1607 static void 1608 TestSignatureDetection(){ 1609 /* with null terminated strings */ 1610 { 1611 static const char* data[] = { 1612 "\xFE\xFF\x00\x00", /* UTF-16BE */ 1613 "\xFF\xFE\x00\x00", /* UTF-16LE */ 1614 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1615 "\x0E\xFE\xFF\x00", /* SCSU */ 1616 1617 "\xFE\xFF", /* UTF-16BE */ 1618 "\xFF\xFE", /* UTF-16LE */ 1619 "\xEF\xBB\xBF", /* UTF-8 */ 1620 "\x0E\xFE\xFF", /* SCSU */ 1621 1622 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1623 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1624 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1625 "\x0E\xFE\xFF\x41", /* SCSU */ 1626 1627 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ 1628 "\x2B\x2F\x76\x38\x41", /* UTF-7 */ 1629 "\x2B\x2F\x76\x39\x41", /* UTF-7 */ 1630 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ 1631 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ 1632 1633 "\xDD\x73\x66\x73" /* UTF-EBCDIC */ 1634 }; 1635 static const char* expected[] = { 1636 "UTF-16BE", 1637 "UTF-16LE", 1638 "UTF-8", 1639 "SCSU", 1640 1641 "UTF-16BE", 1642 "UTF-16LE", 1643 "UTF-8", 1644 "SCSU", 1645 1646 "UTF-16BE", 1647 "UTF-16LE", 1648 "UTF-8", 1649 "SCSU", 1650 1651 "UTF-7", 1652 "UTF-7", 1653 "UTF-7", 1654 "UTF-7", 1655 "UTF-7", 1656 "UTF-EBCDIC" 1657 }; 1658 static const int32_t expectedLength[] ={ 1659 2, 1660 2, 1661 3, 1662 3, 1663 1664 2, 1665 2, 1666 3, 1667 3, 1668 1669 2, 1670 2, 1671 3, 1672 3, 1673 1674 5, 1675 4, 1676 4, 1677 4, 1678 4, 1679 4 1680 }; 1681 int i=0; 1682 UErrorCode err; 1683 int32_t signatureLength = -1; 1684 const char* source = NULL; 1685 const char* enc = NULL; 1686 for( ; i<sizeof(data)/sizeof(char*); i++){ 1687 err = U_ZERO_ERROR; 1688 source = data[i]; 1689 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err); 1690 if(U_FAILURE(err)){ 1691 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1692 continue; 1693 } 1694 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1695 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1696 continue; 1697 } 1698 if(signatureLength != expectedLength[i]){ 1699 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1700 } 1701 } 1702 } 1703 { 1704 static const char* data[] = { 1705 "\xFE\xFF\x00", /* UTF-16BE */ 1706 "\xFF\xFE\x00", /* UTF-16LE */ 1707 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1708 "\x0E\xFE\xFF\x00", /* SCSU */ 1709 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1710 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1711 "\xFE\xFF", /* UTF-16BE */ 1712 "\xFF\xFE", /* UTF-16LE */ 1713 "\xEF\xBB\xBF", /* UTF-8 */ 1714 "\x0E\xFE\xFF", /* SCSU */ 1715 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1716 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1717 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1718 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1719 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1720 "\x0E\xFE\xFF\x41", /* SCSU */ 1721 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ 1722 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ 1723 "\xFB\xEE\x28", /* BOCU-1 */ 1724 "\xFF\x41\x42" /* NULL */ 1725 }; 1726 static const int len[] = { 1727 3, 1728 3, 1729 4, 1730 4, 1731 4, 1732 4, 1733 2, 1734 2, 1735 3, 1736 3, 1737 4, 1738 4, 1739 4, 1740 4, 1741 4, 1742 4, 1743 5, 1744 5, 1745 3, 1746 3 1747 }; 1748 1749 static const char* expected[] = { 1750 "UTF-16BE", 1751 "UTF-16LE", 1752 "UTF-8", 1753 "SCSU", 1754 "UTF-32BE", 1755 "UTF-32LE", 1756 "UTF-16BE", 1757 "UTF-16LE", 1758 "UTF-8", 1759 "SCSU", 1760 "UTF-32BE", 1761 "UTF-32LE", 1762 "UTF-16BE", 1763 "UTF-16LE", 1764 "UTF-8", 1765 "SCSU", 1766 "UTF-32BE", 1767 "UTF-32LE", 1768 "BOCU-1", 1769 NULL 1770 }; 1771 static const int32_t expectedLength[] ={ 1772 2, 1773 2, 1774 3, 1775 3, 1776 4, 1777 4, 1778 2, 1779 2, 1780 3, 1781 3, 1782 4, 1783 4, 1784 2, 1785 2, 1786 3, 1787 3, 1788 4, 1789 4, 1790 3, 1791 0 1792 }; 1793 int i=0; 1794 UErrorCode err; 1795 int32_t signatureLength = -1; 1796 int32_t sourceLength=-1; 1797 const char* source = NULL; 1798 const char* enc = NULL; 1799 for( ; i<sizeof(data)/sizeof(char*); i++){ 1800 err = U_ZERO_ERROR; 1801 source = data[i]; 1802 sourceLength = len[i]; 1803 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err); 1804 if(U_FAILURE(err)){ 1805 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1806 continue; 1807 } 1808 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1809 if(expected[i] !=NULL){ 1810 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1811 continue; 1812 } 1813 } 1814 if(signatureLength != expectedLength[i]){ 1815 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1816 } 1817 } 1818 } 1819 } 1820 1821 static void TestUTF7() { 1822 /* test input */ 1823 static const uint8_t in[]={ 1824 /* H - +Jjo- - ! +- +2AHcAQ */ 1825 0x48, 1826 0x2d, 1827 0x2b, 0x4a, 0x6a, 0x6f, 1828 0x2d, 0x2d, 1829 0x21, 1830 0x2b, 0x2d, 1831 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 1832 }; 1833 1834 /* expected test results */ 1835 static const int32_t results[]={ 1836 /* number of bytes read, code point */ 1837 1, 0x48, 1838 1, 0x2d, 1839 4, 0x263a, /* <WHITE SMILING FACE> */ 1840 2, 0x2d, 1841 1, 0x21, 1842 2, 0x2b, 1843 7, 0x10401 1844 }; 1845 1846 const char *cnvName; 1847 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1848 UErrorCode errorCode=U_ZERO_ERROR; 1849 UConverter *cnv=ucnv_open("UTF-7", &errorCode); 1850 if(U_FAILURE(errorCode)) { 1851 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1852 return; 1853 } 1854 TestNextUChar(cnv, source, limit, results, "UTF-7"); 1855 /* Test the condition when source >= sourceLimit */ 1856 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1857 cnvName = ucnv_getName(cnv, &errorCode); 1858 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { 1859 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1860 } 1861 ucnv_close(cnv); 1862 } 1863 1864 static void TestIMAP() { 1865 /* test input */ 1866 static const uint8_t in[]={ 1867 /* H - &Jjo- - ! &- &2AHcAQ- \ */ 1868 0x48, 1869 0x2d, 1870 0x26, 0x4a, 0x6a, 0x6f, 1871 0x2d, 0x2d, 1872 0x21, 1873 0x26, 0x2d, 1874 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d 1875 }; 1876 1877 /* expected test results */ 1878 static const int32_t results[]={ 1879 /* number of bytes read, code point */ 1880 1, 0x48, 1881 1, 0x2d, 1882 4, 0x263a, /* <WHITE SMILING FACE> */ 1883 2, 0x2d, 1884 1, 0x21, 1885 2, 0x26, 1886 7, 0x10401 1887 }; 1888 1889 const char *cnvName; 1890 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1891 UErrorCode errorCode=U_ZERO_ERROR; 1892 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); 1893 if(U_FAILURE(errorCode)) { 1894 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1895 return; 1896 } 1897 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); 1898 /* Test the condition when source >= sourceLimit */ 1899 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1900 cnvName = ucnv_getName(cnv, &errorCode); 1901 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { 1902 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1903 } 1904 ucnv_close(cnv); 1905 } 1906 1907 static void TestUTF8() { 1908 /* test input */ 1909 static const uint8_t in[]={ 1910 0x61, 1911 0xc2, 0x80, 1912 0xe0, 0xa0, 0x80, 1913 0xf0, 0x90, 0x80, 0x80, 1914 0xf4, 0x84, 0x8c, 0xa1, 1915 0xf0, 0x90, 0x90, 0x81 1916 }; 1917 1918 /* expected test results */ 1919 static const int32_t results[]={ 1920 /* number of bytes read, code point */ 1921 1, 0x61, 1922 2, 0x80, 1923 3, 0x800, 1924 4, 0x10000, 1925 4, 0x104321, 1926 4, 0x10401 1927 }; 1928 1929 /* error test input */ 1930 static const uint8_t in2[]={ 1931 0x61, 1932 0xc0, 0x80, /* illegal non-shortest form */ 1933 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1934 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1935 0xc0, 0xc0, /* illegal trail byte */ 1936 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1937 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1938 0xfe, /* illegal byte altogether */ 1939 0x62 1940 }; 1941 1942 /* expected error test results */ 1943 static const int32_t results2[]={ 1944 /* number of bytes read, code point */ 1945 1, 0x61, 1946 22, 0x62 1947 }; 1948 1949 UConverterToUCallback cb; 1950 const void *p; 1951 1952 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 1953 UErrorCode errorCode=U_ZERO_ERROR; 1954 UConverter *cnv=ucnv_open("UTF-8", &errorCode); 1955 if(U_FAILURE(errorCode)) { 1956 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); 1957 return; 1958 } 1959 TestNextUChar(cnv, source, limit, results, "UTF-8"); 1960 /* Test the condition when source >= sourceLimit */ 1961 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1962 1963 /* test error behavior with a skip callback */ 1964 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 1965 source=(const char *)in2; 1966 limit=(const char *)(in2+sizeof(in2)); 1967 TestNextUChar(cnv, source, limit, results2, "UTF-8"); 1968 1969 ucnv_close(cnv); 1970 } 1971 1972 static void TestCESU8() { 1973 /* test input */ 1974 static const uint8_t in[]={ 1975 0x61, 1976 0xc2, 0x80, 1977 0xe0, 0xa0, 0x80, 1978 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 1979 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, 1980 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1981 0xef, 0xbf, 0xbc 1982 }; 1983 1984 /* expected test results */ 1985 static const int32_t results[]={ 1986 /* number of bytes read, code point */ 1987 1, 0x61, 1988 2, 0x80, 1989 3, 0x800, 1990 6, 0x10000, 1991 3, 0xdc01, 1992 -1,0xd802, /* may read 3 or 6 bytes */ 1993 -1,0x10ffff,/* may read 0 or 3 bytes */ 1994 3, 0xfffc 1995 }; 1996 1997 /* error test input */ 1998 static const uint8_t in2[]={ 1999 0x61, 2000 0xc0, 0x80, /* illegal non-shortest form */ 2001 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 2002 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 2003 0xc0, 0xc0, /* illegal trail byte */ 2004 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ 2005 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ 2006 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ 2007 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 2008 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 2009 0xfe, /* illegal byte altogether */ 2010 0x62 2011 }; 2012 2013 /* expected error test results */ 2014 static const int32_t results2[]={ 2015 /* number of bytes read, code point */ 2016 1, 0x61, 2017 34, 0x62 2018 }; 2019 2020 UConverterToUCallback cb; 2021 const void *p; 2022 2023 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 2024 UErrorCode errorCode=U_ZERO_ERROR; 2025 UConverter *cnv=ucnv_open("CESU-8", &errorCode); 2026 if(U_FAILURE(errorCode)) { 2027 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); 2028 return; 2029 } 2030 TestNextUChar(cnv, source, limit, results, "CESU-8"); 2031 /* Test the condition when source >= sourceLimit */ 2032 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2033 2034 /* test error behavior with a skip callback */ 2035 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2036 source=(const char *)in2; 2037 limit=(const char *)(in2+sizeof(in2)); 2038 TestNextUChar(cnv, source, limit, results2, "CESU-8"); 2039 2040 ucnv_close(cnv); 2041 } 2042 2043 static void TestUTF16() { 2044 /* test input */ 2045 static const uint8_t in1[]={ 2046 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff 2047 }; 2048 static const uint8_t in2[]={ 2049 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff 2050 }; 2051 static const uint8_t in3[]={ 2052 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 2053 }; 2054 2055 /* expected test results */ 2056 static const int32_t results1[]={ 2057 /* number of bytes read, code point */ 2058 4, 0x4e00, 2059 2, 0xfeff 2060 }; 2061 static const int32_t results2[]={ 2062 /* number of bytes read, code point */ 2063 4, 0x004e, 2064 2, 0xfffe 2065 }; 2066 static const int32_t results3[]={ 2067 /* number of bytes read, code point */ 2068 2, 0xfefe, 2069 2, 0x4e00, 2070 2, 0xfeff, 2071 4, 0x20001 2072 }; 2073 2074 const char *source, *limit; 2075 2076 UErrorCode errorCode=U_ZERO_ERROR; 2077 UConverter *cnv=ucnv_open("UTF-16", &errorCode); 2078 if(U_FAILURE(errorCode)) { 2079 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); 2080 return; 2081 } 2082 2083 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2084 TestNextUChar(cnv, source, limit, results1, "UTF-16"); 2085 2086 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2087 ucnv_resetToUnicode(cnv); 2088 TestNextUChar(cnv, source, limit, results2, "UTF-16"); 2089 2090 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2091 ucnv_resetToUnicode(cnv); 2092 TestNextUChar(cnv, source, limit, results3, "UTF-16"); 2093 2094 /* Test the condition when source >= sourceLimit */ 2095 ucnv_resetToUnicode(cnv); 2096 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2097 2098 ucnv_close(cnv); 2099 } 2100 2101 static void TestUTF16BE() { 2102 /* test input */ 2103 static const uint8_t in[]={ 2104 0x00, 0x61, 2105 0x00, 0xc0, 2106 0x00, 0x31, 2107 0x00, 0xf4, 2108 0xce, 0xfe, 2109 0xd8, 0x01, 0xdc, 0x01 2110 }; 2111 2112 /* expected test results */ 2113 static const int32_t results[]={ 2114 /* number of bytes read, code point */ 2115 2, 0x61, 2116 2, 0xc0, 2117 2, 0x31, 2118 2, 0xf4, 2119 2, 0xcefe, 2120 4, 0x10401 2121 }; 2122 2123 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2124 UErrorCode errorCode=U_ZERO_ERROR; 2125 UConverter *cnv=ucnv_open("utf-16be", &errorCode); 2126 if(U_FAILURE(errorCode)) { 2127 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); 2128 return; 2129 } 2130 TestNextUChar(cnv, source, limit, results, "UTF-16BE"); 2131 /* Test the condition when source >= sourceLimit */ 2132 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2133 /*Test for the condition where there is an invalid character*/ 2134 { 2135 static const uint8_t source2[]={0x61}; 2136 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2137 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2138 } 2139 #if 0 2140 /* 2141 * Test disabled because currently the UTF-16BE/LE converters are supposed 2142 * to not set errors for unpaired surrogates. 2143 * This may change with 2144 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2145 */ 2146 2147 /*Test for the condition where there is a surrogate pair*/ 2148 { 2149 const uint8_t source2[]={0xd8, 0x01}; 2150 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2151 } 2152 #endif 2153 ucnv_close(cnv); 2154 } 2155 2156 static void 2157 TestUTF16LE() { 2158 /* test input */ 2159 static const uint8_t in[]={ 2160 0x61, 0x00, 2161 0x31, 0x00, 2162 0x4e, 0x2e, 2163 0x4e, 0x00, 2164 0x01, 0xd8, 0x01, 0xdc 2165 }; 2166 2167 /* expected test results */ 2168 static const int32_t results[]={ 2169 /* number of bytes read, code point */ 2170 2, 0x61, 2171 2, 0x31, 2172 2, 0x2e4e, 2173 2, 0x4e, 2174 4, 0x10401 2175 }; 2176 2177 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2178 UErrorCode errorCode=U_ZERO_ERROR; 2179 UConverter *cnv=ucnv_open("utf-16le", &errorCode); 2180 if(U_FAILURE(errorCode)) { 2181 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); 2182 return; 2183 } 2184 TestNextUChar(cnv, source, limit, results, "UTF-16LE"); 2185 /* Test the condition when source >= sourceLimit */ 2186 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2187 /*Test for the condition where there is an invalid character*/ 2188 { 2189 static const uint8_t source2[]={0x61}; 2190 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2191 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2192 } 2193 #if 0 2194 /* 2195 * Test disabled because currently the UTF-16BE/LE converters are supposed 2196 * to not set errors for unpaired surrogates. 2197 * This may change with 2198 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2199 */ 2200 2201 /*Test for the condition where there is a surrogate character*/ 2202 { 2203 static const uint8_t source2[]={0x01, 0xd8}; 2204 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2205 } 2206 #endif 2207 2208 ucnv_close(cnv); 2209 } 2210 2211 static void TestUTF32() { 2212 /* test input */ 2213 static const uint8_t in1[]={ 2214 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff 2215 }; 2216 static const uint8_t in2[]={ 2217 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 2218 }; 2219 static const uint8_t in3[]={ 2220 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 2221 }; 2222 2223 /* expected test results */ 2224 static const int32_t results1[]={ 2225 /* number of bytes read, code point */ 2226 8, 0x100f00, 2227 4, 0xfeff 2228 }; 2229 static const int32_t results2[]={ 2230 /* number of bytes read, code point */ 2231 8, 0x0f1000, 2232 4, 0xfffe 2233 }; 2234 static const int32_t results3[]={ 2235 /* number of bytes read, code point */ 2236 4, 0xfefe, 2237 4, 0x100f00, 2238 4, 0xfffd, /* unmatched surrogate */ 2239 4, 0xfffd /* unmatched surrogate */ 2240 }; 2241 2242 const char *source, *limit; 2243 2244 UErrorCode errorCode=U_ZERO_ERROR; 2245 UConverter *cnv=ucnv_open("UTF-32", &errorCode); 2246 if(U_FAILURE(errorCode)) { 2247 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); 2248 return; 2249 } 2250 2251 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2252 TestNextUChar(cnv, source, limit, results1, "UTF-32"); 2253 2254 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2255 ucnv_resetToUnicode(cnv); 2256 TestNextUChar(cnv, source, limit, results2, "UTF-32"); 2257 2258 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2259 ucnv_resetToUnicode(cnv); 2260 TestNextUChar(cnv, source, limit, results3, "UTF-32"); 2261 2262 /* Test the condition when source >= sourceLimit */ 2263 ucnv_resetToUnicode(cnv); 2264 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2265 2266 ucnv_close(cnv); 2267 } 2268 2269 static void 2270 TestUTF32BE() { 2271 /* test input */ 2272 static const uint8_t in[]={ 2273 0x00, 0x00, 0x00, 0x61, 2274 0x00, 0x00, 0x30, 0x61, 2275 0x00, 0x00, 0xdc, 0x00, 2276 0x00, 0x00, 0xd8, 0x00, 2277 0x00, 0x00, 0xdf, 0xff, 2278 0x00, 0x00, 0xff, 0xfe, 2279 0x00, 0x10, 0xab, 0xcd, 2280 0x00, 0x10, 0xff, 0xff 2281 }; 2282 2283 /* expected test results */ 2284 static const int32_t results[]={ 2285 /* number of bytes read, code point */ 2286 4, 0x61, 2287 4, 0x3061, 2288 4, 0xfffd, 2289 4, 0xfffd, 2290 4, 0xfffd, 2291 4, 0xfffe, 2292 4, 0x10abcd, 2293 4, 0x10ffff 2294 }; 2295 2296 /* error test input */ 2297 static const uint8_t in2[]={ 2298 0x00, 0x00, 0x00, 0x61, 2299 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 2300 0x00, 0x00, 0x00, 0x62, 2301 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2302 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 2303 0x00, 0x00, 0x01, 0x62, 2304 0x00, 0x00, 0x02, 0x62 2305 }; 2306 2307 /* expected error test results */ 2308 static const int32_t results2[]={ 2309 /* number of bytes read, code point */ 2310 4, 0x61, 2311 8, 0x62, 2312 12, 0x162, 2313 4, 0x262 2314 }; 2315 2316 UConverterToUCallback cb; 2317 const void *p; 2318 2319 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2320 UErrorCode errorCode=U_ZERO_ERROR; 2321 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); 2322 if(U_FAILURE(errorCode)) { 2323 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); 2324 return; 2325 } 2326 TestNextUChar(cnv, source, limit, results, "UTF-32BE"); 2327 2328 /* Test the condition when source >= sourceLimit */ 2329 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2330 2331 /* test error behavior with a skip callback */ 2332 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2333 source=(const char *)in2; 2334 limit=(const char *)(in2+sizeof(in2)); 2335 TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); 2336 2337 ucnv_close(cnv); 2338 } 2339 2340 static void 2341 TestUTF32LE() { 2342 /* test input */ 2343 static const uint8_t in[]={ 2344 0x61, 0x00, 0x00, 0x00, 2345 0x61, 0x30, 0x00, 0x00, 2346 0x00, 0xdc, 0x00, 0x00, 2347 0x00, 0xd8, 0x00, 0x00, 2348 0xff, 0xdf, 0x00, 0x00, 2349 0xfe, 0xff, 0x00, 0x00, 2350 0xcd, 0xab, 0x10, 0x00, 2351 0xff, 0xff, 0x10, 0x00 2352 }; 2353 2354 /* expected test results */ 2355 static const int32_t results[]={ 2356 /* number of bytes read, code point */ 2357 4, 0x61, 2358 4, 0x3061, 2359 4, 0xfffd, 2360 4, 0xfffd, 2361 4, 0xfffd, 2362 4, 0xfffe, 2363 4, 0x10abcd, 2364 4, 0x10ffff 2365 }; 2366 2367 /* error test input */ 2368 static const uint8_t in2[]={ 2369 0x61, 0x00, 0x00, 0x00, 2370 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 2371 0x62, 0x00, 0x00, 0x00, 2372 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2373 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 2374 0x62, 0x01, 0x00, 0x00, 2375 0x62, 0x02, 0x00, 0x00, 2376 }; 2377 2378 /* expected error test results */ 2379 static const int32_t results2[]={ 2380 /* number of bytes read, code point */ 2381 4, 0x61, 2382 8, 0x62, 2383 12, 0x162, 2384 4, 0x262, 2385 }; 2386 2387 UConverterToUCallback cb; 2388 const void *p; 2389 2390 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2391 UErrorCode errorCode=U_ZERO_ERROR; 2392 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); 2393 if(U_FAILURE(errorCode)) { 2394 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); 2395 return; 2396 } 2397 TestNextUChar(cnv, source, limit, results, "UTF-32LE"); 2398 2399 /* Test the condition when source >= sourceLimit */ 2400 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2401 2402 /* test error behavior with a skip callback */ 2403 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2404 source=(const char *)in2; 2405 limit=(const char *)(in2+sizeof(in2)); 2406 TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); 2407 2408 ucnv_close(cnv); 2409 } 2410 2411 static void 2412 TestLATIN1() { 2413 /* test input */ 2414 static const uint8_t in[]={ 2415 0x61, 2416 0x31, 2417 0x32, 2418 0xc0, 2419 0xf0, 2420 0xf4, 2421 }; 2422 2423 /* expected test results */ 2424 static const int32_t results[]={ 2425 /* number of bytes read, code point */ 2426 1, 0x61, 2427 1, 0x31, 2428 1, 0x32, 2429 1, 0xc0, 2430 1, 0xf0, 2431 1, 0xf4, 2432 }; 2433 static const uint16_t in1[] = { 2434 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2435 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2436 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2437 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2438 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2439 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2440 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2441 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2442 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2443 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2444 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2445 0xcb, 0x82 2446 }; 2447 static const uint8_t out1[] = { 2448 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2449 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2450 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2451 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2452 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2453 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2454 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2455 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2456 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2457 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2458 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2459 0xcb, 0x82 2460 }; 2461 static const uint16_t in2[]={ 2462 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2463 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2464 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2465 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2466 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2467 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2468 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2469 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2470 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2471 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2472 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2473 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2474 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2475 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2476 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2477 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2478 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2479 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2480 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2481 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2482 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2483 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2484 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2485 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2486 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2487 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2488 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2489 0x37, 0x20, 0x2A, 0x2F, 2490 }; 2491 static const unsigned char out2[]={ 2492 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2493 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2494 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2495 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2496 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2497 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2498 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2499 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2500 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2501 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2502 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2503 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2504 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2505 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2506 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2507 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2508 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2509 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2510 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2511 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2512 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2513 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2514 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2515 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2516 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2517 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2518 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2519 0x37, 0x20, 0x2A, 0x2F, 2520 }; 2521 const char *source=(const char *)in; 2522 const char *limit=(const char *)in+sizeof(in); 2523 2524 UErrorCode errorCode=U_ZERO_ERROR; 2525 UConverter *cnv=ucnv_open("LATIN_1", &errorCode); 2526 if(U_FAILURE(errorCode)) { 2527 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode)); 2528 return; 2529 } 2530 TestNextUChar(cnv, source, limit, results, "LATIN_1"); 2531 /* Test the condition when source >= sourceLimit */ 2532 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2533 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1)); 2534 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2)); 2535 2536 ucnv_close(cnv); 2537 } 2538 2539 static void 2540 TestSBCS() { 2541 /* test input */ 2542 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; 2543 /* expected test results */ 2544 static const int32_t results[]={ 2545 /* number of bytes read, code point */ 2546 1, 0x61, 2547 1, 0xbf, 2548 1, 0xc4, 2549 1, 0x2021, 2550 1, 0xf8ff, 2551 1, 0x00d9 2552 }; 2553 2554 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2555 UErrorCode errorCode=U_ZERO_ERROR; 2556 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); 2557 if(U_FAILURE(errorCode)) { 2558 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode)); 2559 return; 2560 } 2561 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); 2562 /* Test the condition when source >= sourceLimit */ 2563 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2564 /*Test for Illegal character */ /* 2565 { 2566 static const uint8_t input1[]={ 0xA1 }; 2567 const char* illegalsource=(const char*)input1; 2568 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte"); 2569 } 2570 */ 2571 ucnv_close(cnv); 2572 } 2573 2574 static void 2575 TestDBCS() { 2576 /* test input */ 2577 static const uint8_t in[]={ 2578 0x44, 0x6a, 2579 0xc4, 0x9c, 2580 0x7a, 0x74, 2581 0x46, 0xab, 2582 0x42, 0x5b, 2583 2584 }; 2585 2586 /* expected test results */ 2587 static const int32_t results[]={ 2588 /* number of bytes read, code point */ 2589 2, 0x00a7, 2590 2, 0xe1d2, 2591 2, 0x6962, 2592 2, 0xf842, 2593 2, 0xffe5, 2594 }; 2595 2596 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2597 UErrorCode errorCode=U_ZERO_ERROR; 2598 2599 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); 2600 if(U_FAILURE(errorCode)) { 2601 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode)); 2602 return; 2603 } 2604 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); 2605 /* Test the condition when source >= sourceLimit */ 2606 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2607 /*Test for the condition where there is an invalid character*/ 2608 { 2609 static const uint8_t source2[]={0x1a, 0x1b}; 2610 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2611 } 2612 /*Test for the condition where we have a truncated char*/ 2613 { 2614 static const uint8_t source1[]={0xc4}; 2615 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2616 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2617 } 2618 ucnv_close(cnv); 2619 } 2620 2621 static void 2622 TestMBCS() { 2623 /* test input */ 2624 static const uint8_t in[]={ 2625 0x01, 2626 0xa6, 0xa3, 2627 0x00, 2628 0xa6, 0xa1, 2629 0x08, 2630 0xc2, 0x76, 2631 0xc2, 0x78, 2632 2633 }; 2634 2635 /* expected test results */ 2636 static const int32_t results[]={ 2637 /* number of bytes read, code point */ 2638 1, 0x0001, 2639 2, 0x250c, 2640 1, 0x0000, 2641 2, 0x2500, 2642 1, 0x0008, 2643 2, 0xd60c, 2644 2, 0xd60e, 2645 }; 2646 2647 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2648 UErrorCode errorCode=U_ZERO_ERROR; 2649 2650 UConverter *cnv=ucnv_open("ibm-1363", &errorCode); 2651 if(U_FAILURE(errorCode)) { 2652 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode)); 2653 return; 2654 } 2655 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); 2656 /* Test the condition when source >= sourceLimit */ 2657 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2658 /*Test for the condition where there is an invalid character*/ 2659 { 2660 static const uint8_t source2[]={0xa1, 0x80}; 2661 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2662 } 2663 /*Test for the condition where we have a truncated char*/ 2664 { 2665 static const uint8_t source1[]={0xc4}; 2666 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2667 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2668 } 2669 ucnv_close(cnv); 2670 2671 } 2672 2673 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 2674 static void 2675 TestICCRunout() { 2676 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */ 2677 2678 const char *cnvName = "ibm-1363"; 2679 UErrorCode status = U_ZERO_ERROR; 2680 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 }; 2681 /* UChar expectUData[] = { 0x00a1, 0x001a }; */ 2682 const char *source = sourceData; 2683 const char *sourceLim = sourceData+sizeof(sourceData); 2684 UChar c1, c2, c3; 2685 UConverter *cnv=ucnv_open(cnvName, &status); 2686 if(U_FAILURE(status)) { 2687 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status)); 2688 return; 2689 } 2690 2691 #if 0 2692 { 2693 UChar targetBuf[256]; 2694 UChar *target = targetBuf; 2695 UChar *targetLim = target+256; 2696 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status); 2697 2698 log_info("After convert: target@%d, source@%d, status%s\n", 2699 target-targetBuf, source-sourceData, u_errorName(status)); 2700 2701 if(U_FAILURE(status)) { 2702 log_err("Failed to convert: %s\n", u_errorName(status)); 2703 } else { 2704 2705 } 2706 } 2707 #endif 2708 2709 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2710 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status)); 2711 2712 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2713 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status)); 2714 2715 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2716 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status)); 2717 2718 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { 2719 log_verbose("OK\n"); 2720 } else { 2721 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n"); 2722 } 2723 2724 ucnv_close(cnv); 2725 2726 } 2727 #endif 2728 2729 #ifdef U_ENABLE_GENERIC_ISO_2022 2730 2731 static void 2732 TestISO_2022() { 2733 /* test input */ 2734 static const uint8_t in[]={ 2735 0x1b, 0x25, 0x42, 2736 0x31, 2737 0x32, 2738 0x61, 2739 0xc2, 0x80, 2740 0xe0, 0xa0, 0x80, 2741 0xf0, 0x90, 0x80, 0x80 2742 }; 2743 2744 2745 2746 /* expected test results */ 2747 static const int32_t results[]={ 2748 /* number of bytes read, code point */ 2749 4, 0x0031, /* 4 bytes including the escape sequence */ 2750 1, 0x0032, 2751 1, 0x61, 2752 2, 0x80, 2753 3, 0x800, 2754 4, 0x10000 2755 }; 2756 2757 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2758 UErrorCode errorCode=U_ZERO_ERROR; 2759 UConverter *cnv; 2760 2761 cnv=ucnv_open("ISO_2022", &errorCode); 2762 if(U_FAILURE(errorCode)) { 2763 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 2764 return; 2765 } 2766 TestNextUChar(cnv, source, limit, results, "ISO_2022"); 2767 2768 /* Test the condition when source >= sourceLimit */ 2769 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source"); 2770 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2771 /*Test for the condition where we have a truncated char*/ 2772 { 2773 static const uint8_t source1[]={0xc4}; 2774 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2775 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2776 } 2777 /*Test for the condition where there is an invalid character*/ 2778 { 2779 static const uint8_t source2[]={0xa1, 0x01}; 2780 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); 2781 } 2782 ucnv_close(cnv); 2783 } 2784 2785 #endif 2786 2787 static void 2788 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2789 const UChar* uSource; 2790 const UChar* uSourceLimit; 2791 const char* cSource; 2792 const char* cSourceLimit; 2793 UChar *uTargetLimit =NULL; 2794 UChar *uTarget; 2795 char *cTarget; 2796 const char *cTargetLimit; 2797 char *cBuf; 2798 UChar *uBuf; /*,*test;*/ 2799 int32_t uBufSize = 120; 2800 int len=0; 2801 int i=2; 2802 UErrorCode errorCode=U_ZERO_ERROR; 2803 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2804 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2805 ucnv_reset(cnv); 2806 for(;--i>0; ){ 2807 uSource = (UChar*) source; 2808 uSourceLimit=(const UChar*)sourceLimit; 2809 cTarget = cBuf; 2810 uTarget = uBuf; 2811 cSource = cBuf; 2812 cTargetLimit = cBuf; 2813 uTargetLimit = uBuf; 2814 2815 do{ 2816 2817 cTargetLimit = cTargetLimit+ i; 2818 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2819 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2820 errorCode=U_ZERO_ERROR; 2821 continue; 2822 } 2823 2824 if(U_FAILURE(errorCode)){ 2825 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2826 return; 2827 } 2828 2829 }while (uSource<uSourceLimit); 2830 2831 cSourceLimit =cTarget; 2832 do{ 2833 uTargetLimit=uTargetLimit+i; 2834 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2835 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2836 errorCode=U_ZERO_ERROR; 2837 continue; 2838 } 2839 if(U_FAILURE(errorCode)){ 2840 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2841 return; 2842 } 2843 }while(cSource<cSourceLimit); 2844 2845 uSource = source; 2846 /*test =uBuf;*/ 2847 for(len=0;len<(int)(source - sourceLimit);len++){ 2848 if(uBuf[len]!=uSource[len]){ 2849 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2850 } 2851 } 2852 } 2853 free(uBuf); 2854 free(cBuf); 2855 } 2856 /* Test for Jitterbug 778 */ 2857 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2858 const UChar* uSource; 2859 const UChar* uSourceLimit; 2860 const char* cSource; 2861 UChar *uTargetLimit =NULL; 2862 UChar *uTarget; 2863 char *cTarget; 2864 const char *cTargetLimit; 2865 char *cBuf; 2866 UChar *uBuf,*test; 2867 int32_t uBufSize = 120; 2868 int numCharsInTarget=0; 2869 UErrorCode errorCode=U_ZERO_ERROR; 2870 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2871 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 2872 uSource = source; 2873 uSourceLimit=sourceLimit; 2874 cTarget = cBuf; 2875 cTargetLimit = cBuf +uBufSize*5; 2876 uTarget = uBuf; 2877 uTargetLimit = uBuf+ uBufSize*5; 2878 ucnv_reset(cnv); 2879 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); 2880 if(U_FAILURE(errorCode)){ 2881 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2882 return; 2883 } 2884 cSource = cBuf; 2885 test =uBuf; 2886 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode); 2887 if(U_FAILURE(errorCode)){ 2888 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode)); 2889 return; 2890 } 2891 uSource = source; 2892 while(uSource<uSourceLimit){ 2893 if(*test!=*uSource){ 2894 2895 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 2896 } 2897 uSource++; 2898 test++; 2899 } 2900 free(uBuf); 2901 free(cBuf); 2902 } 2903 2904 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2905 const UChar* uSource; 2906 const UChar* uSourceLimit; 2907 const char* cSource; 2908 const char* cSourceLimit; 2909 UChar *uTargetLimit =NULL; 2910 UChar *uTarget; 2911 char *cTarget; 2912 const char *cTargetLimit; 2913 char *cBuf; 2914 UChar *uBuf; /*,*test;*/ 2915 int32_t uBufSize = 120; 2916 int len=0; 2917 int i=2; 2918 const UChar *temp = sourceLimit; 2919 UErrorCode errorCode=U_ZERO_ERROR; 2920 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2921 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2922 2923 ucnv_reset(cnv); 2924 for(;--i>0;){ 2925 uSource = (UChar*) source; 2926 cTarget = cBuf; 2927 uTarget = uBuf; 2928 cSource = cBuf; 2929 cTargetLimit = cBuf; 2930 uTargetLimit = uBuf+uBufSize*5; 2931 cTargetLimit = cTargetLimit+uBufSize*10; 2932 uSourceLimit=uSource; 2933 do{ 2934 2935 if (uSourceLimit < sourceLimit) { 2936 uSourceLimit = uSourceLimit+1; 2937 } 2938 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2939 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2940 errorCode=U_ZERO_ERROR; 2941 continue; 2942 } 2943 2944 if(U_FAILURE(errorCode)){ 2945 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2946 return; 2947 } 2948 2949 }while (uSource<temp); 2950 2951 cSourceLimit =cBuf; 2952 do{ 2953 if (cSourceLimit < cBuf + (cTarget - cBuf)) { 2954 cSourceLimit = cSourceLimit+1; 2955 } 2956 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2957 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2958 errorCode=U_ZERO_ERROR; 2959 continue; 2960 } 2961 if(U_FAILURE(errorCode)){ 2962 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2963 return; 2964 } 2965 }while(cSource<cTarget); 2966 2967 uSource = source; 2968 /*test =uBuf;*/ 2969 for(;len<(int)(source - sourceLimit);len++){ 2970 if(uBuf[len]!=uSource[len]){ 2971 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2972 } 2973 } 2974 } 2975 free(uBuf); 2976 free(cBuf); 2977 } 2978 static void 2979 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, 2980 const uint16_t results[], const char* message){ 2981 /* const char* s0; */ 2982 const char* s=(char*)source; 2983 const uint16_t *r=results; 2984 UErrorCode errorCode=U_ZERO_ERROR; 2985 uint32_t c,exC; 2986 ucnv_reset(cnv); 2987 while(s<limit) { 2988 /* s0=s; */ 2989 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 2990 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 2991 break; /* no more significant input */ 2992 } else if(U_FAILURE(errorCode)) { 2993 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 2994 break; 2995 } else { 2996 if(U16_IS_LEAD(*r)){ 2997 int i =0, len = 2; 2998 U16_NEXT(r, i, len, exC); 2999 r++; 3000 }else{ 3001 exC = *r; 3002 } 3003 if(c!=(uint32_t)(exC)) 3004 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c); 3005 } 3006 r++; 3007 } 3008 } 3009 3010 static int TestJitterbug930(const char* enc){ 3011 UErrorCode err = U_ZERO_ERROR; 3012 UConverter*converter; 3013 char out[80]; 3014 char*target = out; 3015 UChar in[4]; 3016 const UChar*source = in; 3017 int32_t off[80]; 3018 int32_t* offsets = off; 3019 int numOffWritten=0; 3020 UBool flush = 0; 3021 converter = my_ucnv_open(enc, &err); 3022 3023 in[0] = 0x41; /* 0x4E00;*/ 3024 in[1] = 0x4E01; 3025 in[2] = 0x4E02; 3026 in[3] = 0x4E03; 3027 3028 memset(off, '*', sizeof(off)); 3029 3030 ucnv_fromUnicode (converter, 3031 &target, 3032 target+2, 3033 &source, 3034 source+3, 3035 offsets, 3036 flush, 3037 &err); 3038 3039 /* writes three bytes into the output buffer: 41 1B 24 3040 * but offsets contains 0 1 1 3041 */ 3042 while(*offsets< off[10]){ 3043 numOffWritten++; 3044 offsets++; 3045 } 3046 log_verbose("Testing Jitterbug 930 for encoding %s",enc); 3047 if(numOffWritten!= (int)(target-out)){ 3048 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten); 3049 } 3050 3051 err = U_ZERO_ERROR; 3052 3053 memset(off,'*' , sizeof(off)); 3054 3055 flush = 1; 3056 offsets=off; 3057 ucnv_fromUnicode (converter, 3058 &target, 3059 target+4, 3060 &source, 3061 source, 3062 offsets, 3063 flush, 3064 &err); 3065 numOffWritten=0; 3066 while(*offsets< off[10]){ 3067 numOffWritten++; 3068 if(*offsets!= -1){ 3069 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ; 3070 } 3071 offsets++; 3072 } 3073 3074 /* writes 42 43 7A into output buffer, 3075 * offsets contains -1 -1 -1 3076 */ 3077 ucnv_close(converter); 3078 return 0; 3079 } 3080 3081 static void 3082 TestHZ() { 3083 /* test input */ 3084 static const uint16_t in[]={ 3085 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 3086 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 3087 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 3088 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 3089 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 3090 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 3091 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 3092 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 3093 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 3094 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 3095 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 3096 0x005A, 0x005B, 0x005C, 0x000A 3097 }; 3098 const UChar* uSource; 3099 const UChar* uSourceLimit; 3100 const char* cSource; 3101 const char* cSourceLimit; 3102 UChar *uTargetLimit =NULL; 3103 UChar *uTarget; 3104 char *cTarget; 3105 const char *cTargetLimit; 3106 char *cBuf; 3107 UChar *uBuf,*test; 3108 int32_t uBufSize = 120; 3109 UErrorCode errorCode=U_ZERO_ERROR; 3110 UConverter *cnv; 3111 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3112 int32_t* myOff= offsets; 3113 cnv=ucnv_open("HZ", &errorCode); 3114 if(U_FAILURE(errorCode)) { 3115 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)); 3116 return; 3117 } 3118 3119 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3120 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3121 uSource = (const UChar*)in; 3122 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3123 cTarget = cBuf; 3124 cTargetLimit = cBuf +uBufSize*5; 3125 uTarget = uBuf; 3126 uTargetLimit = uBuf+ uBufSize*5; 3127 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3128 if(U_FAILURE(errorCode)){ 3129 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3130 return; 3131 } 3132 cSource = cBuf; 3133 cSourceLimit =cTarget; 3134 test =uBuf; 3135 myOff=offsets; 3136 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3137 if(U_FAILURE(errorCode)){ 3138 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3139 return; 3140 } 3141 uSource = (const UChar*)in; 3142 while(uSource<uSourceLimit){ 3143 if(*test!=*uSource){ 3144 3145 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3146 } 3147 uSource++; 3148 test++; 3149 } 3150 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); 3151 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3152 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3153 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3154 TestJitterbug930("csISO2022JP"); 3155 ucnv_close(cnv); 3156 free(offsets); 3157 free(uBuf); 3158 free(cBuf); 3159 } 3160 3161 static void 3162 TestISCII(){ 3163 /* test input */ 3164 static const uint16_t in[]={ 3165 /* test full range of Devanagari */ 3166 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, 3167 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, 3168 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, 3169 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, 3170 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, 3171 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, 3172 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, 3173 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, 3174 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, 3175 0x096D,0x096E,0x096F, 3176 /* test Soft halant*/ 3177 0x0915,0x094d, 0x200D, 3178 /* test explicit halant */ 3179 0x0915,0x094d, 0x200c, 3180 /* test double danda */ 3181 0x965, 3182 /* test ASCII */ 3183 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3184 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3185 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3186 /* tests from Lotus */ 3187 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, 3188 0x0930,0x094D,0x200D, 3189 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, 3190 0x0915,0x0921,0x002B,0x095F, 3191 /* tamil range */ 3192 0x0B86, 0xB87, 0xB88, 3193 /* telugu range */ 3194 0x0C05, 0x0C02, 0x0C03,0x0c31, 3195 /* kannada range */ 3196 0x0C85, 0xC82, 0x0C83, 3197 /* test Abbr sign and Anudatta */ 3198 0x0970, 0x952, 3199 /* 0x0958, 3200 0x0959, 3201 0x095A, 3202 0x095B, 3203 0x095C, 3204 0x095D, 3205 0x095E, 3206 0x095F,*/ 3207 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, 3208 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, 3209 0x090C , 3210 0x0962, 3211 0x0961 /* Vocallic LL 0xa6, 0xE9 */, 3212 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, 3213 0x0950 /* OM Symbol 0xa1, 0xE9,*/, 3214 0x093D /* Avagraha 0xEA, 0xE9*/, 3215 0x0958, 3216 0x0959, 3217 0x095A, 3218 0x095B, 3219 0x095C, 3220 0x095D, 3221 0x095E, 3222 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 3223 }; 3224 static const unsigned char byteArr[]={ 3225 3226 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, 3227 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, 3228 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, 3229 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, 3230 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, 3231 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 3232 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, 3233 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, 3234 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, 3235 0xf8,0xf9,0xfa, 3236 /* test soft halant */ 3237 0xb3, 0xE8, 0xE9, 3238 /* test explicit halant */ 3239 0xb3, 0xE8, 0xE8, 3240 /* test double danda */ 3241 0xea, 0xea, 3242 /* test ASCII */ 3243 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3244 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3245 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3246 /* test ATR code */ 3247 3248 /* tests from Lotus */ 3249 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, 3250 0xEF,0x42,0xCF,0xE8,0xD9, 3251 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, 3252 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, 3253 /* tamil range */ 3254 0xEF, 0x44, 0xa5, 0xa6, 0xa7, 3255 /* telugu range */ 3256 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, 3257 /* kannada range */ 3258 0xEF, 0x48,0xa4, 0xa2, 0xa3, 3259 /* anudatta and abbreviation sign */ 3260 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, 3261 3262 3263 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ 3264 3265 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ 3266 3267 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ 3268 3269 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ 3270 3271 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ 3272 3273 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ 3274 3275 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ 3276 3277 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ 3278 3279 0xB3, 0xE9, /* Ka + NUKTA */ 3280 3281 0xB4, 0xE9, /* Kha + NUKTA */ 3282 3283 0xB5, 0xE9, /* Ga + NUKTA */ 3284 3285 0xBA, 0xE9, 3286 3287 0xBF, 0xE9, 3288 3289 0xC0, 0xE9, 3290 3291 0xC9, 0xE9, 3292 /* INV halant RA */ 3293 0xD9, 0xE8, 0xCF, 3294 0x00, 0x00A0, 3295 /* just consume unhandled codepoints */ 3296 0xEF, 0x30, 3297 3298 }; 3299 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE); 3300 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr)); 3301 3302 } 3303 3304 static void 3305 TestISO_2022_JP() { 3306 /* test input */ 3307 static const uint16_t in[]={ 3308 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, 3309 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3310 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3311 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3312 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3313 0x201D, 0x3014, 0x000D, 0x000A, 3314 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3315 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3316 }; 3317 const UChar* uSource; 3318 const UChar* uSourceLimit; 3319 const char* cSource; 3320 const char* cSourceLimit; 3321 UChar *uTargetLimit =NULL; 3322 UChar *uTarget; 3323 char *cTarget; 3324 const char *cTargetLimit; 3325 char *cBuf; 3326 UChar *uBuf,*test; 3327 int32_t uBufSize = 120; 3328 UErrorCode errorCode=U_ZERO_ERROR; 3329 UConverter *cnv; 3330 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3331 int32_t* myOff= offsets; 3332 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3333 if(U_FAILURE(errorCode)) { 3334 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode)); 3335 return; 3336 } 3337 3338 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3339 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3340 uSource = (const UChar*)in; 3341 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3342 cTarget = cBuf; 3343 cTargetLimit = cBuf +uBufSize*5; 3344 uTarget = uBuf; 3345 uTargetLimit = uBuf+ uBufSize*5; 3346 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3347 if(U_FAILURE(errorCode)){ 3348 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3349 return; 3350 } 3351 cSource = cBuf; 3352 cSourceLimit =cTarget; 3353 test =uBuf; 3354 myOff=offsets; 3355 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3356 if(U_FAILURE(errorCode)){ 3357 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3358 return; 3359 } 3360 3361 uSource = (const UChar*)in; 3362 while(uSource<uSourceLimit){ 3363 if(*test!=*uSource){ 3364 3365 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3366 } 3367 uSource++; 3368 test++; 3369 } 3370 3371 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3372 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3373 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); 3374 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3375 TestJitterbug930("csISO2022JP"); 3376 ucnv_close(cnv); 3377 free(uBuf); 3378 free(cBuf); 3379 free(offsets); 3380 } 3381 3382 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){ 3383 const UChar* uSource; 3384 const UChar* uSourceLimit; 3385 const char* cSource; 3386 const char* cSourceLimit; 3387 UChar *uTargetLimit =NULL; 3388 UChar *uTarget; 3389 char *cTarget; 3390 const char *cTargetLimit; 3391 char *cBuf; 3392 UChar *uBuf,*test; 3393 int32_t uBufSize = 120*10; 3394 UErrorCode errorCode=U_ZERO_ERROR; 3395 UConverter *cnv; 3396 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); 3397 int32_t* myOff= offsets; 3398 cnv=my_ucnv_open(conv, &errorCode); 3399 if(U_FAILURE(errorCode)) { 3400 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode)); 3401 return; 3402 } 3403 3404 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); 3405 cBuf =(char*)malloc(uBufSize * sizeof(char)); 3406 uSource = (const UChar*)in; 3407 uSourceLimit=uSource+len; 3408 cTarget = cBuf; 3409 cTargetLimit = cBuf +uBufSize; 3410 uTarget = uBuf; 3411 uTargetLimit = uBuf+ uBufSize; 3412 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3413 if(U_FAILURE(errorCode)){ 3414 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3415 return; 3416 } 3417 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/ 3418 cSource = cBuf; 3419 cSourceLimit =cTarget; 3420 test =uBuf; 3421 myOff=offsets; 3422 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3423 if(U_FAILURE(errorCode)){ 3424 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode)); 3425 return; 3426 } 3427 3428 uSource = (const UChar*)in; 3429 while(uSource<uSourceLimit){ 3430 if(*test!=*uSource){ 3431 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ; 3432 } 3433 uSource++; 3434 test++; 3435 } 3436 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); 3437 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); 3438 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); 3439 if(byteArr && byteArrLen!=0){ 3440 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); 3441 TestToAndFromUChars(in,(const UChar*)&in[len],cnv); 3442 { 3443 cSource = byteArr; 3444 cSourceLimit = cSource+byteArrLen; 3445 test=uBuf; 3446 myOff = offsets; 3447 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3448 if(U_FAILURE(errorCode)){ 3449 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3450 return; 3451 } 3452 3453 uSource = (const UChar*)in; 3454 while(uSource<uSourceLimit){ 3455 if(*test!=*uSource){ 3456 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3457 } 3458 uSource++; 3459 test++; 3460 } 3461 } 3462 } 3463 3464 ucnv_close(cnv); 3465 free(uBuf); 3466 free(cBuf); 3467 free(offsets); 3468 } 3469 static UChar U_CALLCONV 3470 _charAt(int32_t offset, void *context) { 3471 return ((char*)context)[offset]; 3472 } 3473 3474 static int32_t 3475 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){ 3476 int32_t srcIndex=0; 3477 int32_t dstIndex=0; 3478 if(U_FAILURE(*status)){ 3479 return 0; 3480 } 3481 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ 3482 *status = U_ILLEGAL_ARGUMENT_ERROR; 3483 return 0; 3484 } 3485 if(srcLen==-1){ 3486 srcLen = (int32_t)uprv_strlen(src); 3487 } 3488 3489 for (; srcIndex<srcLen; ) { 3490 UChar32 c = src[srcIndex++]; 3491 if (c == 0x005C /*'\\'*/) { 3492 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/ 3493 if (c == (UChar32)0xFFFFFFFF) { 3494 *status=U_INVALID_CHAR_FOUND; /* return empty string */ 3495 break; /* invalid escape sequence */ 3496 } 3497 } 3498 if(dstIndex < dstLen){ 3499 if(c>0xFFFF){ 3500 dst[dstIndex++] = U16_LEAD(c); 3501 if(dstIndex<dstLen){ 3502 dst[dstIndex]=U16_TRAIL(c); 3503 }else{ 3504 *status=U_BUFFER_OVERFLOW_ERROR; 3505 } 3506 }else{ 3507 dst[dstIndex]=(UChar)c; 3508 } 3509 3510 }else{ 3511 *status = U_BUFFER_OVERFLOW_ERROR; 3512 } 3513 dstIndex++; /* for preflighting */ 3514 } 3515 return dstIndex; 3516 } 3517 3518 static void 3519 TestFullRoundtrip(const char* cp){ 3520 UChar usource[10] ={0}; 3521 UChar nsrc[10] = {0}; 3522 uint32_t i=1; 3523 int len=0, ulen; 3524 nsrc[0]=0x0061; 3525 /* Test codepoint 0 */ 3526 TestConv(usource,1,cp,"",NULL,0); 3527 TestConv(usource,2,cp,"",NULL,0); 3528 nsrc[2]=0x5555; 3529 TestConv(nsrc,3,cp,"",NULL,0); 3530 3531 for(;i<=0x10FFFF;i++){ 3532 if(i==0xD800){ 3533 i=0xDFFF; 3534 continue; 3535 } 3536 if(i<=0xFFFF){ 3537 usource[0] =(UChar) i; 3538 len=1; 3539 }else{ 3540 usource[0]=U16_LEAD(i); 3541 usource[1]=U16_TRAIL(i); 3542 len=2; 3543 } 3544 ulen=len; 3545 if(i==0x80) { 3546 usource[2]=0; 3547 } 3548 /* Test only single code points */ 3549 TestConv(usource,ulen,cp,"",NULL,0); 3550 /* Test codepoint repeated twice */ 3551 usource[ulen]=usource[0]; 3552 usource[ulen+1]=usource[1]; 3553 ulen+=len; 3554 TestConv(usource,ulen,cp,"",NULL,0); 3555 /* Test codepoint repeated 3 times */ 3556 usource[ulen]=usource[0]; 3557 usource[ulen+1]=usource[1]; 3558 ulen+=len; 3559 TestConv(usource,ulen,cp,"",NULL,0); 3560 /* Test codepoint in between 2 codepoints */ 3561 nsrc[1]=usource[0]; 3562 nsrc[2]=usource[1]; 3563 nsrc[len+1]=0x5555; 3564 TestConv(nsrc,len+2,cp,"",NULL,0); 3565 uprv_memset(usource,0,sizeof(UChar)*10); 3566 } 3567 } 3568 3569 static void 3570 TestRoundTrippingAllUTF(void){ 3571 if(!getTestOption(QUICK_OPTION)){ 3572 log_verbose("Running exhaustive round trip test for BOCU-1\n"); 3573 TestFullRoundtrip("BOCU-1"); 3574 log_verbose("Running exhaustive round trip test for SCSU\n"); 3575 TestFullRoundtrip("SCSU"); 3576 log_verbose("Running exhaustive round trip test for UTF-8\n"); 3577 TestFullRoundtrip("UTF-8"); 3578 log_verbose("Running exhaustive round trip test for CESU-8\n"); 3579 TestFullRoundtrip("CESU-8"); 3580 log_verbose("Running exhaustive round trip test for UTF-16BE\n"); 3581 TestFullRoundtrip("UTF-16BE"); 3582 log_verbose("Running exhaustive round trip test for UTF-16LE\n"); 3583 TestFullRoundtrip("UTF-16LE"); 3584 log_verbose("Running exhaustive round trip test for UTF-16\n"); 3585 TestFullRoundtrip("UTF-16"); 3586 log_verbose("Running exhaustive round trip test for UTF-32BE\n"); 3587 TestFullRoundtrip("UTF-32BE"); 3588 log_verbose("Running exhaustive round trip test for UTF-32LE\n"); 3589 TestFullRoundtrip("UTF-32LE"); 3590 log_verbose("Running exhaustive round trip test for UTF-32\n"); 3591 TestFullRoundtrip("UTF-32"); 3592 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3593 TestFullRoundtrip("UTF-7"); 3594 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3595 TestFullRoundtrip("UTF-7,version=1"); 3596 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"); 3597 TestFullRoundtrip("IMAP-mailbox-name"); 3598 /* 3599 * 3600 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of 3601 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA). 3602 * The old mappings remain as fallbacks. 3603 * This test may be reintroduced at a later time. 3604 * 3605 * 110118 - mow 3606 */ 3607 /* 3608 log_verbose("Running exhaustive round trip test for GB18030\n"); 3609 TestFullRoundtrip("GB18030"); 3610 */ 3611 } 3612 } 3613 3614 static void 3615 TestSCSU() { 3616 3617 static const uint16_t germanUTF16[]={ 3618 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 3619 }; 3620 3621 static const uint8_t germanSCSU[]={ 3622 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 3623 }; 3624 3625 static const uint16_t russianUTF16[]={ 3626 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 3627 }; 3628 3629 static const uint8_t russianSCSU[]={ 3630 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 3631 }; 3632 3633 static const uint16_t japaneseUTF16[]={ 3634 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 3635 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 3636 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 3637 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 3638 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 3639 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 3640 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 3641 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 3642 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 3643 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 3644 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 3645 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 3646 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 3647 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 3648 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 3649 }; 3650 3651 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 3652 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */ 3653 static const uint8_t japaneseSCSU[]={ 3654 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 3655 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 3656 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 3657 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 3658 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 3659 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 3660 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 3661 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 3662 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 3663 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 3664 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 3665 0xcb, 0x82 3666 }; 3667 3668 static const uint16_t allFeaturesUTF16[]={ 3669 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 3670 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 3671 0x01df, 0xf000, 0xdbff, 0xdfff 3672 }; 3673 3674 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter 3675 * result here (34B vs. 35B) 3676 */ 3677 static const uint8_t allFeaturesSCSU[]={ 3678 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, 3679 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, 3680 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, 3681 0xdf, 0x14, 0x80, 0x15, 0xff 3682 }; 3683 static const uint16_t monkeyIn[]={ 3684 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3685 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3686 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3687 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3688 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3689 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3690 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3691 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3692 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3693 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3694 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3695 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3696 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3697 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3698 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3699 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3700 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3701 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3702 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 3703 /* test non-BMP code points */ 3704 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 3705 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 3706 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 3707 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 3708 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 3709 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 3710 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 3711 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 3712 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 3713 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 3714 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 3715 3716 3717 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 3718 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 3719 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 3720 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 3721 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 3722 }; 3723 static const char *fTestCases [] = { 3724 "\\ud800\\udc00", /* smallest surrogate*/ 3725 "\\ud8ff\\udcff", 3726 "\\udBff\\udFff", /* largest surrogate pair*/ 3727 "\\ud834\\udc00", 3728 "\\U0010FFFF", 3729 "Hello \\u9292 \\u9192 World!", 3730 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", 3731 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3732 3733 "\\u0648\\u06c8", /* catch missing reset*/ 3734 "\\u0648\\u06c8", 3735 3736 "\\u4444\\uE001", /* lowest quotable*/ 3737 "\\u4444\\uf2FF", /* highest quotable*/ 3738 "\\u4444\\uf188\\u4444", 3739 "\\u4444\\uf188\\uf288", 3740 "\\u4444\\uf188abc\\u0429\\uf288", 3741 "\\u9292\\u2222", 3742 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", 3743 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3744 "Hello World!123456", 3745 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ 3746 3747 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ 3748 "abc\\u4411d", /* uses SQU*/ 3749 "abc\\u4411\\u4412d",/* uses SCU*/ 3750 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ 3751 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/ 3752 "\\u9292\\u2222", 3753 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", 3754 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c", 3755 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", 3756 3757 "", /* empty input*/ 3758 "\\u0000", /* smallest BMP character*/ 3759 "\\uFFFF", /* largest BMP character*/ 3760 3761 /* regression tests*/ 3762 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", 3763 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff", 3764 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", 3765 "\\u0041\\u00df\\u0401\\u015f", 3766 "\\u9066\\u2123abc", 3767 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5", 3768 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489", 3769 }; 3770 int i=0; 3771 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){ 3772 const char* cSrc = fTestCases[i]; 3773 UErrorCode status = U_ZERO_ERROR; 3774 int32_t cSrcLen,srcLen; 3775 UChar* src; 3776 /* UConverter* cnv = ucnv_open("SCSU",&status); */ 3777 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); 3778 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); 3779 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); 3780 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); 3781 TestConv(src,srcLen,"SCSU","Coverage",NULL,0); 3782 free(src); 3783 } 3784 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3785 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3786 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3787 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3788 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU)); 3789 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU)); 3790 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); 3791 } 3792 3793 #if !UCONFIG_NO_LEGACY_CONVERSION 3794 static void TestJitterbug2346(){ 3795 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, 3796 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; 3797 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; 3798 3799 UChar uTarget[500]={'\0'}; 3800 UChar* utarget=uTarget; 3801 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 3802 3803 char cTarget[500]={'\0'}; 3804 char* ctarget=cTarget; 3805 char* ctargetLimit=cTarget+sizeof(cTarget); 3806 const char* csource=source; 3807 UChar* temp = expected; 3808 UErrorCode err=U_ZERO_ERROR; 3809 3810 UConverter* conv =ucnv_open("ISO_2022_JP",&err); 3811 if(U_FAILURE(err)) { 3812 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 3813 return; 3814 } 3815 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err); 3816 if(U_FAILURE(err)) { 3817 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err)); 3818 return; 3819 } 3820 utargetLimit=utarget; 3821 utarget = uTarget; 3822 while(utarget<utargetLimit){ 3823 if(*temp!=*utarget){ 3824 3825 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ; 3826 } 3827 utarget++; 3828 temp++; 3829 } 3830 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 3831 if(U_FAILURE(err)) { 3832 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err)); 3833 return; 3834 } 3835 ctargetLimit=ctarget; 3836 ctarget =cTarget; 3837 ucnv_close(conv); 3838 3839 3840 } 3841 3842 static void 3843 TestISO_2022_JP_1() { 3844 /* test input */ 3845 static const uint16_t in[]={ 3846 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A, 3847 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3848 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A, 3849 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3850 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3851 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3852 0x201D, 0x000D, 0x000A, 3853 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3854 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A, 3855 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3856 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A, 3857 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A, 3858 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A 3859 }; 3860 const UChar* uSource; 3861 const UChar* uSourceLimit; 3862 const char* cSource; 3863 const char* cSourceLimit; 3864 UChar *uTargetLimit =NULL; 3865 UChar *uTarget; 3866 char *cTarget; 3867 const char *cTargetLimit; 3868 char *cBuf; 3869 UChar *uBuf,*test; 3870 int32_t uBufSize = 120; 3871 UErrorCode errorCode=U_ZERO_ERROR; 3872 UConverter *cnv; 3873 3874 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3875 if(U_FAILURE(errorCode)) { 3876 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3877 return; 3878 } 3879 3880 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3881 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3882 uSource = (const UChar*)in; 3883 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3884 cTarget = cBuf; 3885 cTargetLimit = cBuf +uBufSize*5; 3886 uTarget = uBuf; 3887 uTargetLimit = uBuf+ uBufSize*5; 3888 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode); 3889 if(U_FAILURE(errorCode)){ 3890 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3891 return; 3892 } 3893 cSource = cBuf; 3894 cSourceLimit =cTarget; 3895 test =uBuf; 3896 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode); 3897 if(U_FAILURE(errorCode)){ 3898 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3899 return; 3900 } 3901 uSource = (const UChar*)in; 3902 while(uSource<uSourceLimit){ 3903 if(*test!=*uSource){ 3904 3905 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3906 } 3907 uSource++; 3908 test++; 3909 } 3910 /*ucnv_close(cnv); 3911 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ 3912 /*Test for the condition where there is an invalid character*/ 3913 ucnv_reset(cnv); 3914 { 3915 static const uint8_t source2[]={0x0e,0x24,0x053}; 3916 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); 3917 } 3918 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3919 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3920 ucnv_close(cnv); 3921 free(uBuf); 3922 free(cBuf); 3923 } 3924 3925 static void 3926 TestISO_2022_JP_2() { 3927 /* test input */ 3928 static const uint16_t in[]={ 3929 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3930 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3931 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3932 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3933 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3934 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3935 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3936 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3937 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3938 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3939 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3940 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3941 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3942 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3943 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3944 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3945 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3946 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3947 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A 3948 }; 3949 const UChar* uSource; 3950 const UChar* uSourceLimit; 3951 const char* cSource; 3952 const char* cSourceLimit; 3953 UChar *uTargetLimit =NULL; 3954 UChar *uTarget; 3955 char *cTarget; 3956 const char *cTargetLimit; 3957 char *cBuf; 3958 UChar *uBuf,*test; 3959 int32_t uBufSize = 120; 3960 UErrorCode errorCode=U_ZERO_ERROR; 3961 UConverter *cnv; 3962 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3963 int32_t* myOff= offsets; 3964 cnv=ucnv_open("ISO_2022_JP_2", &errorCode); 3965 if(U_FAILURE(errorCode)) { 3966 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3967 return; 3968 } 3969 3970 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3971 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3972 uSource = (const UChar*)in; 3973 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3974 cTarget = cBuf; 3975 cTargetLimit = cBuf +uBufSize*5; 3976 uTarget = uBuf; 3977 uTargetLimit = uBuf+ uBufSize*5; 3978 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3979 if(U_FAILURE(errorCode)){ 3980 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3981 return; 3982 } 3983 cSource = cBuf; 3984 cSourceLimit =cTarget; 3985 test =uBuf; 3986 myOff=offsets; 3987 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3988 if(U_FAILURE(errorCode)){ 3989 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3990 return; 3991 } 3992 uSource = (const UChar*)in; 3993 while(uSource<uSourceLimit){ 3994 if(*test!=*uSource){ 3995 3996 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3997 } 3998 uSource++; 3999 test++; 4000 } 4001 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4002 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4003 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4004 /*Test for the condition where there is an invalid character*/ 4005 ucnv_reset(cnv); 4006 { 4007 static const uint8_t source2[]={0x0e,0x24,0x053}; 4008 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); 4009 } 4010 ucnv_close(cnv); 4011 free(uBuf); 4012 free(cBuf); 4013 free(offsets); 4014 } 4015 4016 static void 4017 TestISO_2022_KR() { 4018 /* test input */ 4019 static const uint16_t in[]={ 4020 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4021 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4022 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4023 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4024 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4025 ,0x53E3,0x53E4,0x000A,0x000D}; 4026 const UChar* uSource; 4027 const UChar* uSourceLimit; 4028 const char* cSource; 4029 const char* cSourceLimit; 4030 UChar *uTargetLimit =NULL; 4031 UChar *uTarget; 4032 char *cTarget; 4033 const char *cTargetLimit; 4034 char *cBuf; 4035 UChar *uBuf,*test; 4036 int32_t uBufSize = 120; 4037 UErrorCode errorCode=U_ZERO_ERROR; 4038 UConverter *cnv; 4039 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4040 int32_t* myOff= offsets; 4041 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); 4042 if(U_FAILURE(errorCode)) { 4043 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4044 return; 4045 } 4046 4047 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4048 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4049 uSource = (const UChar*)in; 4050 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4051 cTarget = cBuf; 4052 cTargetLimit = cBuf +uBufSize*5; 4053 uTarget = uBuf; 4054 uTargetLimit = uBuf+ uBufSize*5; 4055 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4056 if(U_FAILURE(errorCode)){ 4057 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4058 return; 4059 } 4060 cSource = cBuf; 4061 cSourceLimit =cTarget; 4062 test =uBuf; 4063 myOff=offsets; 4064 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4065 if(U_FAILURE(errorCode)){ 4066 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4067 return; 4068 } 4069 uSource = (const UChar*)in; 4070 while(uSource<uSourceLimit){ 4071 if(*test!=*uSource){ 4072 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4073 } 4074 uSource++; 4075 test++; 4076 } 4077 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4078 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4079 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4080 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4081 TestJitterbug930("csISO2022KR"); 4082 /*Test for the condition where there is an invalid character*/ 4083 ucnv_reset(cnv); 4084 { 4085 static const uint8_t source2[]={0x1b,0x24,0x053}; 4086 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4087 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4088 } 4089 ucnv_close(cnv); 4090 free(uBuf); 4091 free(cBuf); 4092 free(offsets); 4093 } 4094 4095 static void 4096 TestISO_2022_KR_1() { 4097 /* test input */ 4098 static const uint16_t in[]={ 4099 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4100 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4101 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4102 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4103 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4104 ,0x53E3,0x53E4,0x000A,0x000D}; 4105 const UChar* uSource; 4106 const UChar* uSourceLimit; 4107 const char* cSource; 4108 const char* cSourceLimit; 4109 UChar *uTargetLimit =NULL; 4110 UChar *uTarget; 4111 char *cTarget; 4112 const char *cTargetLimit; 4113 char *cBuf; 4114 UChar *uBuf,*test; 4115 int32_t uBufSize = 120; 4116 UErrorCode errorCode=U_ZERO_ERROR; 4117 UConverter *cnv; 4118 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4119 int32_t* myOff= offsets; 4120 cnv=ucnv_open("ibm-25546", &errorCode); 4121 if(U_FAILURE(errorCode)) { 4122 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4123 return; 4124 } 4125 4126 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4127 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4128 uSource = (const UChar*)in; 4129 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4130 cTarget = cBuf; 4131 cTargetLimit = cBuf +uBufSize*5; 4132 uTarget = uBuf; 4133 uTargetLimit = uBuf+ uBufSize*5; 4134 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4135 if(U_FAILURE(errorCode)){ 4136 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4137 return; 4138 } 4139 cSource = cBuf; 4140 cSourceLimit =cTarget; 4141 test =uBuf; 4142 myOff=offsets; 4143 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4144 if(U_FAILURE(errorCode)){ 4145 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4146 return; 4147 } 4148 uSource = (const UChar*)in; 4149 while(uSource<uSourceLimit){ 4150 if(*test!=*uSource){ 4151 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4152 } 4153 uSource++; 4154 test++; 4155 } 4156 ucnv_reset(cnv); 4157 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4158 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4159 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4160 ucnv_reset(cnv); 4161 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4162 /*Test for the condition where there is an invalid character*/ 4163 ucnv_reset(cnv); 4164 { 4165 static const uint8_t source2[]={0x1b,0x24,0x053}; 4166 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4167 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4168 } 4169 ucnv_close(cnv); 4170 free(uBuf); 4171 free(cBuf); 4172 free(offsets); 4173 } 4174 4175 static void TestJitterbug2411(){ 4176 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" 4177 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; 4178 UConverter* kr=NULL, *kr1=NULL; 4179 UErrorCode errorCode = U_ZERO_ERROR; 4180 UChar tgt[100]={'\0'}; 4181 UChar* target = tgt; 4182 UChar* targetLimit = target+100; 4183 kr=ucnv_open("iso-2022-kr", &errorCode); 4184 if(U_FAILURE(errorCode)) { 4185 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode)); 4186 return; 4187 } 4188 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4189 if(U_FAILURE(errorCode)) { 4190 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4191 return; 4192 } 4193 kr1 = ucnv_open("ibm-25546", &errorCode); 4194 if(U_FAILURE(errorCode)) { 4195 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode)); 4196 return; 4197 } 4198 target = tgt; 4199 targetLimit = target+100; 4200 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4201 4202 if(U_FAILURE(errorCode)) { 4203 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4204 return; 4205 } 4206 4207 ucnv_close(kr); 4208 ucnv_close(kr1); 4209 4210 } 4211 4212 static void 4213 TestJIS(){ 4214 /* From Unicode moved to testdata/conversion.txt */ 4215 /*To Unicode*/ 4216 { 4217 static const uint8_t sampleTextJIS[] = { 4218 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ 4219 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4220 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4221 }; 4222 static const uint16_t expectedISO2022JIS[] = { 4223 0x0041, 0x0042, 4224 0xFF81, 0xFF82, 4225 0x3000 4226 }; 4227 static const int32_t toISO2022JISOffs[]={ 4228 3,4, 4229 8,9, 4230 16 4231 }; 4232 4233 static const uint8_t sampleTextJIS7[] = { 4234 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ 4235 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4236 0x1b,0x24,0x42,0x21,0x21, 4237 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ 4238 0x21,0x22, 4239 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4240 }; 4241 static const uint16_t expectedISO2022JIS7[] = { 4242 0x0041, 0x0042, 4243 0xFF81, 0xFF82, 4244 0x3000, 4245 0xFF81, 0xFF82, 4246 0x3001, 4247 0x3000 4248 }; 4249 static const int32_t toISO2022JIS7Offs[]={ 4250 3,4, 4251 8,9, 4252 13,16, 4253 17, 4254 19,27 4255 }; 4256 static const uint8_t sampleTextJIS8[] = { 4257 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ 4258 0xa1,0xc8,0xd9,/*Katakana Set*/ 4259 0x1b,0x28,0x42, 4260 0x41,0x42, 4261 0xb1,0xc3, /*Katakana Set*/ 4262 0x1b,0x24,0x42,0x21,0x21 4263 }; 4264 static const uint16_t expectedISO2022JIS8[] = { 4265 0x0041, 0x0042, 4266 0xff61, 0xff88, 0xff99, 4267 0x0041, 0x0042, 4268 0xff71, 0xff83, 4269 0x3000 4270 }; 4271 static const int32_t toISO2022JIS8Offs[]={ 4272 3, 4, 5, 6, 4273 7, 11, 12, 13, 4274 14, 18, 4275 }; 4276 4277 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, 4278 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE); 4279 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, 4280 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE); 4281 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, 4282 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE); 4283 } 4284 4285 } 4286 4287 4288 #if 0 4289 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 4290 4291 static void TestJitterbug915(){ 4292 /* tests for roundtripping of the below sequence 4293 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / 4294 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / 4295 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / 4296 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / 4297 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / 4298 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / 4299 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / 4300 */ 4301 static const char cSource[]={ 4302 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 4303 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 4304 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 4305 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 4306 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 4307 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 4308 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, 4309 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 4310 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 4311 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 4312 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 4313 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 4314 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 4315 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 4316 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4317 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4318 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 4319 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 4320 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4321 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4322 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 4323 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 4324 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 4325 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 4326 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 4327 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 4328 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 4329 0x37, 0x20, 0x2A, 0x2F 4330 }; 4331 UChar uTarget[500]={'\0'}; 4332 UChar* utarget=uTarget; 4333 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 4334 4335 char cTarget[500]={'\0'}; 4336 char* ctarget=cTarget; 4337 char* ctargetLimit=cTarget+sizeof(cTarget); 4338 const char* csource=cSource; 4339 const char* tempSrc = cSource; 4340 UErrorCode err=U_ZERO_ERROR; 4341 4342 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); 4343 if(U_FAILURE(err)) { 4344 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 4345 return; 4346 } 4347 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err); 4348 if(U_FAILURE(err)) { 4349 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err)); 4350 return; 4351 } 4352 utargetLimit=utarget; 4353 utarget = uTarget; 4354 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 4355 if(U_FAILURE(err)) { 4356 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err)); 4357 return; 4358 } 4359 ctargetLimit=ctarget; 4360 ctarget =cTarget; 4361 while(ctarget<ctargetLimit){ 4362 if(*ctarget != *tempSrc){ 4363 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ; 4364 } 4365 ++ctarget; 4366 ++tempSrc; 4367 } 4368 4369 ucnv_close(conv); 4370 } 4371 4372 static void 4373 TestISO_2022_CN_EXT() { 4374 /* test input */ 4375 static const uint16_t in[]={ 4376 /* test Non-BMP code points */ 4377 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 4378 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 4379 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 4380 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 4381 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 4382 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 4383 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 4384 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 4385 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 4386 0xD869, 0xDED5, 4387 4388 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 4389 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 4390 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 4391 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4392 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4393 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4394 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4395 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4396 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4397 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4398 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4399 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4400 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4401 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A, 4402 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 4403 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A, 4404 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A, 4405 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A, 4406 4407 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A 4408 4409 }; 4410 4411 const UChar* uSource; 4412 const UChar* uSourceLimit; 4413 const char* cSource; 4414 const char* cSourceLimit; 4415 UChar *uTargetLimit =NULL; 4416 UChar *uTarget; 4417 char *cTarget; 4418 const char *cTargetLimit; 4419 char *cBuf; 4420 UChar *uBuf,*test; 4421 int32_t uBufSize = 180; 4422 UErrorCode errorCode=U_ZERO_ERROR; 4423 UConverter *cnv; 4424 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4425 int32_t* myOff= offsets; 4426 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); 4427 if(U_FAILURE(errorCode)) { 4428 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4429 return; 4430 } 4431 4432 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4433 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4434 uSource = (const UChar*)in; 4435 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4436 cTarget = cBuf; 4437 cTargetLimit = cBuf +uBufSize*5; 4438 uTarget = uBuf; 4439 uTargetLimit = uBuf+ uBufSize*5; 4440 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4441 if(U_FAILURE(errorCode)){ 4442 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4443 return; 4444 } 4445 cSource = cBuf; 4446 cSourceLimit =cTarget; 4447 test =uBuf; 4448 myOff=offsets; 4449 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4450 if(U_FAILURE(errorCode)){ 4451 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4452 return; 4453 } 4454 uSource = (const UChar*)in; 4455 while(uSource<uSourceLimit){ 4456 if(*test!=*uSource){ 4457 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4458 } 4459 else{ 4460 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4461 } 4462 uSource++; 4463 test++; 4464 } 4465 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4466 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4467 /*Test for the condition where there is an invalid character*/ 4468 ucnv_reset(cnv); 4469 { 4470 static const uint8_t source2[]={0x0e,0x24,0x053}; 4471 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); 4472 } 4473 ucnv_close(cnv); 4474 free(uBuf); 4475 free(cBuf); 4476 free(offsets); 4477 } 4478 #endif 4479 4480 static void 4481 TestISO_2022_CN() { 4482 /* test input */ 4483 static const uint16_t in[]={ 4484 /* jitterbug 951 */ 4485 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52, 4486 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 4487 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52, 4488 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45, 4489 0x0020, 0x0045, 0x004e, 0x0044, 4490 /**/ 4491 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A, 4492 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A, 4493 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A, 4494 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4495 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4496 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4497 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4498 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4499 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4500 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4501 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4502 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4503 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A, 4504 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A, 4505 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4506 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 4507 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A, 4508 4509 }; 4510 const UChar* uSource; 4511 const UChar* uSourceLimit; 4512 const char* cSource; 4513 const char* cSourceLimit; 4514 UChar *uTargetLimit =NULL; 4515 UChar *uTarget; 4516 char *cTarget; 4517 const char *cTargetLimit; 4518 char *cBuf; 4519 UChar *uBuf,*test; 4520 int32_t uBufSize = 180; 4521 UErrorCode errorCode=U_ZERO_ERROR; 4522 UConverter *cnv; 4523 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4524 int32_t* myOff= offsets; 4525 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); 4526 if(U_FAILURE(errorCode)) { 4527 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4528 return; 4529 } 4530 4531 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4532 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4533 uSource = (const UChar*)in; 4534 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4535 cTarget = cBuf; 4536 cTargetLimit = cBuf +uBufSize*5; 4537 uTarget = uBuf; 4538 uTargetLimit = uBuf+ uBufSize*5; 4539 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4540 if(U_FAILURE(errorCode)){ 4541 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4542 return; 4543 } 4544 cSource = cBuf; 4545 cSourceLimit =cTarget; 4546 test =uBuf; 4547 myOff=offsets; 4548 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4549 if(U_FAILURE(errorCode)){ 4550 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4551 return; 4552 } 4553 uSource = (const UChar*)in; 4554 while(uSource<uSourceLimit){ 4555 if(*test!=*uSource){ 4556 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4557 } 4558 else{ 4559 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4560 } 4561 uSource++; 4562 test++; 4563 } 4564 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); 4565 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4566 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4567 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4568 TestJitterbug930("csISO2022CN"); 4569 /*Test for the condition where there is an invalid character*/ 4570 ucnv_reset(cnv); 4571 { 4572 static const uint8_t source2[]={0x0e,0x24,0x053}; 4573 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); 4574 } 4575 4576 ucnv_close(cnv); 4577 free(uBuf); 4578 free(cBuf); 4579 free(offsets); 4580 } 4581 4582 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */ 4583 typedef struct { 4584 const char * converterName; 4585 const char * inputText; 4586 int inputTextLength; 4587 } EmptySegmentTest; 4588 4589 /* Callback for TestJitterbug6175, should only get called for empty segment errors */ 4590 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, 4591 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) { 4592 if (reason > UCNV_IRREGULAR) { 4593 return; 4594 } 4595 if (reason != UCNV_IRREGULAR) { 4596 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); 4597 } 4598 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ 4599 *err = U_ZERO_ERROR; 4600 ucnv_cbToUWriteSub(toArgs,0,err); 4601 } 4602 4603 enum { kEmptySegmentToUCharsMax = 64 }; 4604 static void TestJitterbug6175(void) { 4605 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; 4606 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; 4607 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; 4608 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; 4609 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; 4610 static const EmptySegmentTest emptySegmentTests[] = { 4611 /* converterName inputText inputTextLength */ 4612 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, 4613 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, 4614 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, 4615 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, 4616 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, 4617 /* terminator: */ 4618 { NULL, NULL, 0, } 4619 }; 4620 const EmptySegmentTest * testPtr; 4621 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { 4622 UErrorCode err = U_ZERO_ERROR; 4623 UConverter * cnv = ucnv_open(testPtr->converterName, &err); 4624 if (U_FAILURE(err)) { 4625 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4626 return; 4627 } 4628 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); 4629 if (U_FAILURE(err)) { 4630 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4631 ucnv_close(cnv); 4632 return; 4633 } 4634 { 4635 UChar toUChars[kEmptySegmentToUCharsMax]; 4636 UChar * toUCharsPtr = toUChars; 4637 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; 4638 const char * inCharsPtr = testPtr->inputText; 4639 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; 4640 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); 4641 } 4642 ucnv_close(cnv); 4643 } 4644 } 4645 4646 static void 4647 TestEBCDIC_STATEFUL() { 4648 /* test input */ 4649 static const uint8_t in[]={ 4650 0x61, 4651 0x1a, 4652 0x0f, 0x4b, 4653 0x42, 4654 0x40, 4655 0x36, 4656 }; 4657 4658 /* expected test results */ 4659 static const int32_t results[]={ 4660 /* number of bytes read, code point */ 4661 1, 0x002f, 4662 1, 0x0092, 4663 2, 0x002e, 4664 1, 0xff62, 4665 1, 0x0020, 4666 1, 0x0096, 4667 4668 }; 4669 static const uint8_t in2[]={ 4670 0x0f, 4671 0xa1, 4672 0x01 4673 }; 4674 4675 /* expected test results */ 4676 static const int32_t results2[]={ 4677 /* number of bytes read, code point */ 4678 2, 0x203E, 4679 1, 0x0001, 4680 }; 4681 4682 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 4683 UErrorCode errorCode=U_ZERO_ERROR; 4684 UConverter *cnv=ucnv_open("ibm-930", &errorCode); 4685 if(U_FAILURE(errorCode)) { 4686 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode)); 4687 return; 4688 } 4689 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); 4690 ucnv_reset(cnv); 4691 /* Test the condition when source >= sourceLimit */ 4692 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 4693 ucnv_reset(cnv); 4694 /*Test for the condition where source > sourcelimit after consuming the shift chracter */ 4695 { 4696 static const uint8_t source1[]={0x0f}; 4697 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); 4698 } 4699 /*Test for the condition where there is an invalid character*/ 4700 ucnv_reset(cnv); 4701 { 4702 static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; 4703 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); 4704 } 4705 ucnv_reset(cnv); 4706 source=(const char*)in2; 4707 limit=(const char*)in2+sizeof(in2); 4708 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); 4709 ucnv_close(cnv); 4710 4711 } 4712 4713 static void 4714 TestGB18030() { 4715 /* test input */ 4716 static const uint8_t in[]={ 4717 0x24, 4718 0x7f, 4719 0x81, 0x30, 0x81, 0x30, 4720 0xa8, 0xbf, 4721 0xa2, 0xe3, 4722 0xd2, 0xbb, 4723 0x82, 0x35, 0x8f, 0x33, 4724 0x84, 0x31, 0xa4, 0x39, 4725 0x90, 0x30, 0x81, 0x30, 4726 0xe3, 0x32, 0x9a, 0x35 4727 #if 0 4728 /* 4729 * Feature removed markus 2000-oct-26 4730 * Only some codepages must match surrogate pairs into supplementary code points - 4731 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c . 4732 * GB 18030 provides direct encodings for supplementary code points, therefore 4733 * it must not combine two single-encoded surrogates into one code point. 4734 */ 4735 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */ 4736 #endif 4737 }; 4738 4739 /* expected test results */ 4740 static const int32_t results[]={ 4741 /* number of bytes read, code point */ 4742 1, 0x24, 4743 1, 0x7f, 4744 4, 0x80, 4745 2, 0x1f9, 4746 2, 0x20ac, 4747 2, 0x4e00, 4748 4, 0x9fa6, 4749 4, 0xffff, 4750 4, 0x10000, 4751 4, 0x10ffff 4752 #if 0 4753 /* Feature removed. See comment above. */ 4754 8, 0x10000 4755 #endif 4756 }; 4757 4758 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ 4759 UErrorCode errorCode=U_ZERO_ERROR; 4760 UConverter *cnv=ucnv_open("gb18030", &errorCode); 4761 if(U_FAILURE(errorCode)) { 4762 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode)); 4763 return; 4764 } 4765 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030"); 4766 ucnv_close(cnv); 4767 } 4768 4769 static void 4770 TestLMBCS() { 4771 /* LMBCS-1 string */ 4772 static const uint8_t pszLMBCS[]={ 4773 0x61, 4774 0x01, 0x29, 4775 0x81, 4776 0xA0, 4777 0x0F, 0x27, 4778 0x0F, 0x91, 4779 0x14, 0x0a, 0x74, 4780 0x14, 0xF6, 0x02, 4781 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ 4782 0x10, 0x88, 0xA0, 4783 }; 4784 4785 /* Unicode UChar32 equivalents */ 4786 static const UChar32 pszUnicode32[]={ 4787 /* code point */ 4788 0x00000061, 4789 0x00002013, 4790 0x000000FC, 4791 0x000000E1, 4792 0x00000007, 4793 0x00000091, 4794 0x00000a74, 4795 0x00000200, 4796 0x00023456, /* code point for surrogate pair */ 4797 0x00005516 4798 }; 4799 4800 /* Unicode UChar equivalents */ 4801 static const UChar pszUnicode[]={ 4802 /* code point */ 4803 0x0061, 4804 0x2013, 4805 0x00FC, 4806 0x00E1, 4807 0x0007, 4808 0x0091, 4809 0x0a74, 4810 0x0200, 4811 0xD84D, /* low surrogate */ 4812 0xDC56, /* high surrogate */ 4813 0x5516 4814 }; 4815 4816 /* expected test results */ 4817 static const int offsets32[]={ 4818 /* number of bytes read, code point */ 4819 0, 4820 1, 4821 3, 4822 4, 4823 5, 4824 7, 4825 9, 4826 12, 4827 15, 4828 21, 4829 24 4830 }; 4831 4832 /* expected test results */ 4833 static const int offsets[]={ 4834 /* number of bytes read, code point */ 4835 0, 4836 1, 4837 3, 4838 4, 4839 5, 4840 7, 4841 9, 4842 12, 4843 15, 4844 18, 4845 21, 4846 24 4847 }; 4848 4849 4850 UConverter *cnv; 4851 4852 #define NAME_LMBCS_1 "LMBCS-1" 4853 #define NAME_LMBCS_2 "LMBCS-2" 4854 4855 4856 /* Some basic open/close/property tests on some LMBCS converters */ 4857 { 4858 4859 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ 4860 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ 4861 char get_subchars [1]; 4862 const char * get_name; 4863 UConverter *cnv1; 4864 UConverter *cnv2; 4865 4866 int8_t len = sizeof(get_subchars); 4867 4868 UErrorCode errorCode=U_ZERO_ERROR; 4869 4870 /* Open */ 4871 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); 4872 if(U_FAILURE(errorCode)) { 4873 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4874 return; 4875 } 4876 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); 4877 if(U_FAILURE(errorCode)) { 4878 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode)); 4879 return; 4880 } 4881 4882 /* Name */ 4883 get_name = ucnv_getName (cnv1, &errorCode); 4884 if (strcmp(NAME_LMBCS_1,get_name)){ 4885 log_err("Unexpected converter name: %s\n", get_name); 4886 } 4887 get_name = ucnv_getName (cnv2, &errorCode); 4888 if (strcmp(NAME_LMBCS_2,get_name)){ 4889 log_err("Unexpected converter name: %s\n", get_name); 4890 } 4891 4892 /* substitution chars */ 4893 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); 4894 if(U_FAILURE(errorCode)) { 4895 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4896 } 4897 if (len!=1){ 4898 log_err("Unexpected length of sub chars\n"); 4899 } 4900 if (get_subchars[0] != expected_subchars[0]){ 4901 log_err("Unexpected value of sub chars\n"); 4902 } 4903 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); 4904 if(U_FAILURE(errorCode)) { 4905 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); 4906 } 4907 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); 4908 if(U_FAILURE(errorCode)) { 4909 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4910 } 4911 if (len!=1){ 4912 log_err("Unexpected length of sub chars\n"); 4913 } 4914 if (get_subchars[0] != new_subchars[0]){ 4915 log_err("Unexpected value of sub chars\n"); 4916 } 4917 ucnv_close(cnv1); 4918 ucnv_close(cnv2); 4919 4920 } 4921 4922 /* LMBCS to Unicode - offsets */ 4923 { 4924 UErrorCode errorCode=U_ZERO_ERROR; 4925 4926 const char * pSource = (const char *)pszLMBCS; 4927 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 4928 4929 UChar Out [sizeof(pszUnicode) + 1]; 4930 UChar * pOut = Out; 4931 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 4932 4933 int32_t off [sizeof(offsets)]; 4934 4935 /* last 'offset' in expected results is just the final size. 4936 (Makes other tests easier). Compensate here: */ 4937 4938 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); 4939 4940 4941 4942 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ 4943 if(U_FAILURE(errorCode)) { 4944 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode)); 4945 return; 4946 } 4947 4948 4949 4950 ucnv_toUnicode (cnv, 4951 &pOut, 4952 OutLimit, 4953 &pSource, 4954 sourceLimit, 4955 off, 4956 TRUE, 4957 &errorCode); 4958 4959 4960 if (memcmp(off,offsets,sizeof(offsets))) 4961 { 4962 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n"); 4963 } 4964 if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) 4965 { 4966 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n"); 4967 } 4968 ucnv_close(cnv); 4969 } 4970 { 4971 /* LMBCS to Unicode - getNextUChar */ 4972 const char * sourceStart; 4973 const char *source=(const char *)pszLMBCS; 4974 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); 4975 const UChar32 *results= pszUnicode32; 4976 const int *off = offsets32; 4977 4978 UErrorCode errorCode=U_ZERO_ERROR; 4979 UChar32 uniChar; 4980 4981 cnv=ucnv_open("LMBCS-1", &errorCode); 4982 if(U_FAILURE(errorCode)) { 4983 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4984 return; 4985 } 4986 else 4987 { 4988 4989 while(source<limit) { 4990 sourceStart=source; 4991 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode); 4992 if(U_FAILURE(errorCode)) { 4993 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode)); 4994 break; 4995 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) { 4996 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 4997 uniChar, (source-sourceStart), *results, *off); 4998 break; 4999 } 5000 results++; 5001 off++; 5002 } 5003 } 5004 ucnv_close(cnv); 5005 } 5006 { /* test locale & optimization group operations: Unicode to LMBCS */ 5007 5008 UErrorCode errorCode=U_ZERO_ERROR; 5009 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); 5010 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); 5011 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); 5012 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ 5013 const UChar * pUniOut = uniString; 5014 UChar * pUniIn = uniString; 5015 uint8_t lmbcsString [4]; 5016 const char * pLMBCSOut = (const char *)lmbcsString; 5017 char * pLMBCSIn = (char *)lmbcsString; 5018 5019 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */ 5020 ucnv_fromUnicode (cnv16he, 5021 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5022 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5023 NULL, 1, &errorCode); 5024 5025 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) 5026 { 5027 log_err("LMBCS-16,locale=he gives unexpected translation\n"); 5028 } 5029 5030 pLMBCSIn= (char *)lmbcsString; 5031 pUniOut = uniString; 5032 ucnv_fromUnicode (cnv01us, 5033 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5034 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5035 NULL, 1, &errorCode); 5036 5037 if (lmbcsString[0] != 0x9F) 5038 { 5039 log_err("LMBCS-1,locale=US gives unexpected translation\n"); 5040 } 5041 5042 /* single byte char from mbcs char set */ 5043 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ 5044 pLMBCSOut = (const char *)lmbcsString; 5045 pUniIn = uniString; 5046 ucnv_toUnicode (cnv16jp, 5047 &pUniIn, pUniIn + 1, 5048 &pLMBCSOut, (pLMBCSOut + 1), 5049 NULL, 1, &errorCode); 5050 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5051 { 5052 log_err("Unexpected results from LMBCS-16 single byte char\n"); 5053 } 5054 /* convert to group 1: should be 3 bytes */ 5055 pLMBCSIn = (char *)lmbcsString; 5056 pUniOut = uniString; 5057 ucnv_fromUnicode (cnv01us, 5058 &pLMBCSIn, (const char *)(pLMBCSIn + 3), 5059 &pUniOut, pUniOut + 1, 5060 NULL, 1, &errorCode); 5061 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1 5062 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE) 5063 { 5064 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); 5065 } 5066 pLMBCSOut = (const char *)lmbcsString; 5067 pUniIn = uniString; 5068 ucnv_toUnicode (cnv01us, 5069 &pUniIn, pUniIn + 1, 5070 &pLMBCSOut, (const char *)(pLMBCSOut + 3), 5071 NULL, 1, &errorCode); 5072 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5073 { 5074 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); 5075 } 5076 pLMBCSIn = (char *)lmbcsString; 5077 pUniOut = uniString; 5078 ucnv_fromUnicode (cnv16jp, 5079 &pLMBCSIn, (const char *)(pLMBCSIn + 1), 5080 &pUniOut, pUniOut + 1, 5081 NULL, 1, &errorCode); 5082 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE) 5083 { 5084 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); 5085 } 5086 ucnv_close(cnv16he); 5087 ucnv_close(cnv16jp); 5088 ucnv_close(cnv01us); 5089 } 5090 { 5091 /* Small source buffer testing, LMBCS -> Unicode */ 5092 5093 UErrorCode errorCode=U_ZERO_ERROR; 5094 5095 const char * pSource = (const char *)pszLMBCS; 5096 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 5097 int codepointCount = 0; 5098 5099 UChar Out [sizeof(pszUnicode) + 1]; 5100 UChar * pOut = Out; 5101 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 5102 5103 5104 cnv = ucnv_open(NAME_LMBCS_1, &errorCode); 5105 if(U_FAILURE(errorCode)) { 5106 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 5107 return; 5108 } 5109 5110 5111 while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) 5112 { 5113 ucnv_toUnicode (cnv, 5114 &pOut, 5115 OutLimit, 5116 &pSource, 5117 (pSource+1), /* claim that this is a 1- byte buffer */ 5118 NULL, 5119 FALSE, /* FALSE means there might be more chars in the next buffer */ 5120 &errorCode); 5121 5122 if (U_SUCCESS (errorCode)) 5123 { 5124 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1]) 5125 { 5126 /* we are on to the next code point: check value */ 5127 5128 if (Out[0] != pszUnicode[codepointCount]){ 5129 log_err("LMBCS->Uni result %lx should have been %lx \n", 5130 Out[0], pszUnicode[codepointCount]); 5131 } 5132 5133 pOut = Out; /* reset for accumulating next code point */ 5134 codepointCount++; 5135 } 5136 } 5137 else 5138 { 5139 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode)); 5140 } 5141 } 5142 { 5143 /* limits & surrogate error testing */ 5144 char LIn [sizeof(pszLMBCS)]; 5145 const char * pLIn = LIn; 5146 5147 char LOut [sizeof(pszLMBCS)]; 5148 char * pLOut = LOut; 5149 5150 UChar UOut [sizeof(pszUnicode)]; 5151 UChar * pUOut = UOut; 5152 5153 UChar UIn [sizeof(pszUnicode)]; 5154 const UChar * pUIn = UIn; 5155 5156 int32_t off [sizeof(offsets)]; 5157 UChar32 uniChar; 5158 5159 errorCode=U_ZERO_ERROR; 5160 5161 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ 5162 pUIn++; 5163 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode); 5164 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5165 { 5166 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); 5167 } 5168 pUIn--; 5169 5170 errorCode=U_ZERO_ERROR; 5171 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); 5172 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5173 { 5174 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode)); 5175 } 5176 errorCode=U_ZERO_ERROR; 5177 5178 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode); 5179 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5180 { 5181 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode)); 5182 } 5183 errorCode=U_ZERO_ERROR; 5184 5185 /* 0 byte source request - no error, no pointer movement */ 5186 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode); 5187 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); 5188 if(U_FAILURE(errorCode)) { 5189 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode)); 5190 } 5191 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn)) 5192 { 5193 log_err("Unexpected pointer move in 0 byte source request \n"); 5194 } 5195 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ 5196 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); 5197 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) 5198 { 5199 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); 5200 } 5201 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ 5202 { 5203 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n"); 5204 } 5205 errorCode = U_ZERO_ERROR; 5206 5207 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ 5208 5209 pUIn = pszUnicode; 5210 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode); 5211 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) 5212 { 5213 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); 5214 } 5215 5216 errorCode = U_ZERO_ERROR; 5217 5218 pLIn = (const char *)pszLMBCS; 5219 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); 5220 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4]) 5221 { 5222 log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); 5223 } 5224 5225 /* unpaired or chopped LMBCS surrogates */ 5226 5227 /* OK high surrogate, Low surrogate is chopped */ 5228 LIn [0] = (char)0x14; 5229 LIn [1] = (char)0xD8; 5230 LIn [2] = (char)0x01; 5231 LIn [3] = (char)0x14; 5232 LIn [4] = (char)0xDC; 5233 pLIn = LIn; 5234 errorCode = U_ZERO_ERROR; 5235 pUOut = UOut; 5236 5237 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 5238 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5239 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5240 { 5241 log_err("Unexpected results on chopped low surrogate\n"); 5242 } 5243 5244 /* chopped at surrogate boundary */ 5245 LIn [0] = (char)0x14; 5246 LIn [1] = (char)0xD8; 5247 LIn [2] = (char)0x01; 5248 pLIn = LIn; 5249 errorCode = U_ZERO_ERROR; 5250 pUOut = UOut; 5251 5252 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); 5253 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) 5254 { 5255 log_err("Unexpected results on chopped at surrogate boundary \n"); 5256 } 5257 5258 /* unpaired surrogate plus valid Unichar */ 5259 LIn [0] = (char)0x14; 5260 LIn [1] = (char)0xD8; 5261 LIn [2] = (char)0x01; 5262 LIn [3] = (char)0x14; 5263 LIn [4] = (char)0xC9; 5264 LIn [5] = (char)0xD0; 5265 pLIn = LIn; 5266 errorCode = U_ZERO_ERROR; 5267 pUOut = UOut; 5268 5269 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); 5270 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) 5271 { 5272 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); 5273 } 5274 5275 /* unpaired surrogate plus chopped Unichar */ 5276 LIn [0] = (char)0x14; 5277 LIn [1] = (char)0xD8; 5278 LIn [2] = (char)0x01; 5279 LIn [3] = (char)0x14; 5280 LIn [4] = (char)0xC9; 5281 5282 pLIn = LIn; 5283 errorCode = U_ZERO_ERROR; 5284 pUOut = UOut; 5285 5286 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5287 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5288 { 5289 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); 5290 } 5291 5292 /* unpaired surrogate plus valid non-Unichar */ 5293 LIn [0] = (char)0x14; 5294 LIn [1] = (char)0xD8; 5295 LIn [2] = (char)0x01; 5296 LIn [3] = (char)0x0F; 5297 LIn [4] = (char)0x3B; 5298 5299 pLIn = LIn; 5300 errorCode = U_ZERO_ERROR; 5301 pUOut = UOut; 5302 5303 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5304 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) 5305 { 5306 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); 5307 } 5308 5309 /* unpaired surrogate plus chopped non-Unichar */ 5310 LIn [0] = (char)0x14; 5311 LIn [1] = (char)0xD8; 5312 LIn [2] = (char)0x01; 5313 LIn [3] = (char)0x0F; 5314 5315 pLIn = LIn; 5316 errorCode = U_ZERO_ERROR; 5317 pUOut = UOut; 5318 5319 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); 5320 5321 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) 5322 { 5323 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n"); 5324 } 5325 } 5326 } 5327 ucnv_close(cnv); /* final cleanup */ 5328 } 5329 5330 5331 static void TestJitterbug255() 5332 { 5333 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; 5334 const char *testBuffer = (const char *)testBytes; 5335 const char *testEnd = (const char *)testBytes + sizeof(testBytes); 5336 UErrorCode status = U_ZERO_ERROR; 5337 /*UChar32 result;*/ 5338 UConverter *cnv = 0; 5339 5340 cnv = ucnv_open("shift-jis", &status); 5341 if (U_FAILURE(status) || cnv == 0) { 5342 log_data_err("Failed to open the converter for SJIS.\n"); 5343 return; 5344 } 5345 while (testBuffer != testEnd) 5346 { 5347 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); 5348 if (U_FAILURE(status)) 5349 { 5350 log_err("Failed to convert the next UChar for SJIS.\n"); 5351 break; 5352 } 5353 } 5354 ucnv_close(cnv); 5355 } 5356 5357 static void TestEBCDICUS4XML() 5358 { 5359 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; 5360 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; 5361 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; 5362 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; 5363 char target_x[] = {0x00, 0x00, 0x00, 0x00}; 5364 UChar *unicodes = unicodes_x; 5365 const UChar *toUnicodeMaps = toUnicodeMaps_x; 5366 char *target = target_x; 5367 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; 5368 UErrorCode status = U_ZERO_ERROR; 5369 UConverter *cnv = 0; 5370 5371 cnv = ucnv_open("ebcdic-xml-us", &status); 5372 if (U_FAILURE(status) || cnv == 0) { 5373 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); 5374 return; 5375 } 5376 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status); 5377 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) { 5378 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", 5379 u_errorName(status)); 5380 printUSeqErr(unicodes_x, 3); 5381 printUSeqErr(toUnicodeMaps, 3); 5382 } 5383 status = U_ZERO_ERROR; 5384 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status); 5385 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) { 5386 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", 5387 u_errorName(status)); 5388 printSeqErr((const unsigned char*)target_x, 3); 5389 printSeqErr((const unsigned char*)fromUnicodeMaps, 3); 5390 } 5391 ucnv_close(cnv); 5392 } 5393 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ 5394 5395 #if !UCONFIG_NO_COLLATION 5396 5397 static void TestJitterbug981(){ 5398 const UChar* rules; 5399 int32_t rules_length, target_cap, bytes_needed, buff_size; 5400 UErrorCode status = U_ZERO_ERROR; 5401 UConverter *utf8cnv; 5402 UCollator* myCollator; 5403 char *buff; 5404 int numNeeded=0; 5405 utf8cnv = ucnv_open ("utf8", &status); 5406 if(U_FAILURE(status)){ 5407 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status)); 5408 return; 5409 } 5410 myCollator = ucol_open("zh", &status); 5411 if(U_FAILURE(status)){ 5412 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status)); 5413 ucnv_close(utf8cnv); 5414 return; 5415 } 5416 5417 rules = ucol_getRules(myCollator, &rules_length); 5418 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); 5419 buff = malloc(buff_size); 5420 5421 target_cap = 0; 5422 do { 5423 ucnv_reset(utf8cnv); 5424 status = U_ZERO_ERROR; 5425 if(target_cap >= buff_size) { 5426 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size); 5427 break; 5428 } 5429 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, 5430 rules, rules_length, &status); 5431 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5432 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5433 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5434 break; 5435 } 5436 numNeeded = bytes_needed; 5437 } while (status == U_BUFFER_OVERFLOW_ERROR); 5438 ucol_close(myCollator); 5439 ucnv_close(utf8cnv); 5440 free(buff); 5441 } 5442 5443 #endif 5444 5445 #if !UCONFIG_NO_LEGACY_CONVERSION 5446 static void TestJitterbug1293(){ 5447 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; 5448 char target[256]; 5449 UErrorCode status = U_ZERO_ERROR; 5450 UConverter* conv=NULL; 5451 int32_t target_cap, bytes_needed, numNeeded = 0; 5452 conv = ucnv_open("shift-jis",&status); 5453 if(U_FAILURE(status)){ 5454 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status)); 5455 return; 5456 } 5457 5458 do{ 5459 target_cap =0; 5460 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status); 5461 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5462 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5463 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5464 } 5465 numNeeded = bytes_needed; 5466 } while (status == U_BUFFER_OVERFLOW_ERROR); 5467 if(U_FAILURE(status)){ 5468 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status)); 5469 return; 5470 } 5471 ucnv_close(conv); 5472 } 5473 #endif 5474 5475 static void TestJB5275_1(){ 5476 5477 static const char* data = "\x3B\xB3\x0A" /* Easy characters */ 5478 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5479 /* Switch script: */ 5480 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */ 5481 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/ 5482 "\xEF\x40\x3B\xB3\x0A"; 5483 static const UChar expected[] ={ 5484 0x003b, 0x0a15, 0x000a, /* Easy characters */ 5485 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */ 5486 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/ 5487 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/ 5488 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ 5489 }; 5490 5491 UErrorCode status = U_ZERO_ERROR; 5492 UConverter* conv = ucnv_open("iscii-gur", &status); 5493 UChar dest[100] = {'\0'}; 5494 UChar* target = dest; 5495 UChar* targetLimit = dest+100; 5496 const char* source = data; 5497 const char* sourceLimit = data+strlen(data); 5498 const UChar* exp = expected; 5499 5500 if (U_FAILURE(status)) { 5501 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status)); 5502 return; 5503 } 5504 5505 log_verbose("Testing switching back to default script when new line is encountered.\n"); 5506 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5507 if(U_FAILURE(status)){ 5508 log_err("conversion failed: %s \n", u_errorName(status)); 5509 } 5510 targetLimit = target; 5511 target = dest; 5512 printUSeq(target, targetLimit-target); 5513 while(target<targetLimit){ 5514 if(*exp!=*target){ 5515 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5516 } 5517 target++; 5518 exp++; 5519 } 5520 ucnv_close(conv); 5521 } 5522 5523 static void TestJB5275(){ 5524 static const char* data = 5525 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */ 5526 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */ 5527 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */ 5528 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5529 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ 5530 "\xEF\x48\x38\xB3\x0A" /* Kannada test */ 5531 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ 5532 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ 5533 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ 5534 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; 5535 static const UChar expected[] ={ 5536 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */ 5537 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */ 5538 0x0038, 0x0C95, 0x000A, /* Kannada test */ 5539 0x0039, 0x0D15, 0x000A, /* Malayalam test */ 5540 0x003A, 0x0A95, 0x000A, /* Gujarati test */ 5541 0x003B, 0x0A15, 0x000A, /* Punjabi test */ 5542 }; 5543 5544 UErrorCode status = U_ZERO_ERROR; 5545 UConverter* conv = ucnv_open("iscii", &status); 5546 UChar dest[100] = {'\0'}; 5547 UChar* target = dest; 5548 UChar* targetLimit = dest+100; 5549 const char* source = data; 5550 const char* sourceLimit = data+strlen(data); 5551 const UChar* exp = expected; 5552 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5553 if(U_FAILURE(status)){ 5554 log_err("conversion failed: %s \n", u_errorName(status)); 5555 } 5556 targetLimit = target; 5557 target = dest; 5558 5559 printUSeq(target, targetLimit-target); 5560 5561 while(target<targetLimit){ 5562 if(*exp!=*target){ 5563 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5564 } 5565 target++; 5566 exp++; 5567 } 5568 ucnv_close(conv); 5569 } 5570 5571 static void 5572 TestIsFixedWidth() { 5573 UErrorCode status = U_ZERO_ERROR; 5574 UConverter *cnv = NULL; 5575 int32_t i; 5576 5577 const char *fixedWidth[] = { 5578 "US-ASCII", 5579 "UTF32", 5580 "ibm-5478_P100-1995" 5581 }; 5582 5583 const char *notFixedWidth[] = { 5584 "GB18030", 5585 "UTF8", 5586 "windows-949-2000", 5587 "UTF16" 5588 }; 5589 5590 for (i = 0; i < LENGTHOF(fixedWidth); i++) { 5591 cnv = ucnv_open(fixedWidth[i], &status); 5592 if (cnv == NULL || U_FAILURE(status)) { 5593 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status)); 5594 continue; 5595 } 5596 5597 if (!ucnv_isFixedWidth(cnv, &status)) { 5598 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]); 5599 } 5600 ucnv_close(cnv); 5601 } 5602 5603 for (i = 0; i < LENGTHOF(notFixedWidth); i++) { 5604 cnv = ucnv_open(notFixedWidth[i], &status); 5605 if (cnv == NULL || U_FAILURE(status)) { 5606 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status)); 5607 continue; 5608 } 5609 5610 if (ucnv_isFixedWidth(cnv, &status)) { 5611 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]); 5612 } 5613 ucnv_close(cnv); 5614 } 5615 } 5616