1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2014, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************* 7 * 8 * File CCONVTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Steven R. Loomis 7/8/1999 Adding input buffer test 13 ******************************************************************************** 14 */ 15 #include <stdio.h> 16 #include "cstring.h" 17 #include "unicode/uloc.h" 18 #include "unicode/ucnv.h" 19 #include "unicode/ucnv_err.h" 20 #include "unicode/ucnv_cb.h" 21 #include "cintltst.h" 22 #include "unicode/utypes.h" 23 #include "unicode/ustring.h" 24 #include "unicode/ucol.h" 25 #include "unicode/utf16.h" 26 #include "cmemory.h" 27 #include "nucnvtst.h" 28 29 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0])) 30 31 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); 32 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); 33 #if !UCONFIG_NO_COLLATION 34 static void TestJitterbug981(void); 35 #endif 36 #if !UCONFIG_NO_LEGACY_CONVERSION 37 static void TestJitterbug1293(void); 38 #endif 39 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; 40 static void TestConverterTypesAndStarters(void); 41 static void TestAmbiguous(void); 42 static void TestSignatureDetection(void); 43 static void TestUTF7(void); 44 static void TestIMAP(void); 45 static void TestUTF8(void); 46 static void TestCESU8(void); 47 static void TestUTF16(void); 48 static void TestUTF16BE(void); 49 static void TestUTF16LE(void); 50 static void TestUTF32(void); 51 static void TestUTF32BE(void); 52 static void TestUTF32LE(void); 53 static void TestLATIN1(void); 54 55 #if !UCONFIG_NO_LEGACY_CONVERSION 56 static void TestSBCS(void); 57 static void TestDBCS(void); 58 static void TestMBCS(void); 59 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 60 static void TestICCRunout(void); 61 #endif 62 63 #ifdef U_ENABLE_GENERIC_ISO_2022 64 static void TestISO_2022(void); 65 #endif 66 67 static void TestISO_2022_JP(void); 68 static void TestISO_2022_JP_1(void); 69 static void TestISO_2022_JP_2(void); 70 static void TestISO_2022_KR(void); 71 static void TestISO_2022_KR_1(void); 72 static void TestISO_2022_CN(void); 73 #if 0 74 /* 75 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 76 */ 77 static void TestISO_2022_CN_EXT(void); 78 #endif 79 static void TestJIS(void); 80 static void TestHZ(void); 81 #endif 82 83 static void TestSCSU(void); 84 85 #if !UCONFIG_NO_LEGACY_CONVERSION 86 static void TestEBCDIC_STATEFUL(void); 87 static void TestGB18030(void); 88 static void TestLMBCS(void); 89 static void TestJitterbug255(void); 90 static void TestEBCDICUS4XML(void); 91 #if 0 92 /* 93 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 94 */ 95 static void TestJitterbug915(void); 96 #endif 97 static void TestISCII(void); 98 99 static void TestCoverageMBCS(void); 100 static void TestJitterbug2346(void); 101 static void TestJitterbug2411(void); 102 static void TestJB5275(void); 103 static void TestJB5275_1(void); 104 static void TestJitterbug6175(void); 105 106 static void TestIsFixedWidth(void); 107 #endif 108 109 static void TestInBufSizes(void); 110 111 static void TestRoundTrippingAllUTF(void); 112 static void TestConv(const uint16_t in[], 113 int len, 114 const char* conv, 115 const char* lang, 116 char byteArr[], 117 int byteArrLen); 118 119 /* open a converter, using test data if it begins with '@' */ 120 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); 121 122 123 #define NEW_MAX_BUFFER 999 124 125 static int32_t gInBufferSize = NEW_MAX_BUFFER; 126 static int32_t gOutBufferSize = NEW_MAX_BUFFER; 127 static char gNuConvTestName[1024]; 128 129 #define nct_min(x,y) ((x<y) ? x : y) 130 131 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) 132 { 133 if(cnv && cnv[0] == '@') { 134 return ucnv_openPackage(loadTestData(err), cnv+1, err); 135 } else { 136 return ucnv_open(cnv, err); 137 } 138 } 139 140 static void printSeq(const unsigned char* a, int len) 141 { 142 int i=0; 143 log_verbose("{"); 144 while (i<len) 145 log_verbose("0x%02x ", a[i++]); 146 log_verbose("}\n"); 147 } 148 149 static void printUSeq(const UChar* a, int len) 150 { 151 int i=0; 152 log_verbose("{U+"); 153 while (i<len) log_verbose("0x%04x ", a[i++]); 154 log_verbose("}\n"); 155 } 156 157 static void printSeqErr(const unsigned char* a, int len) 158 { 159 int i=0; 160 fprintf(stderr, "{"); 161 while (i<len) 162 fprintf(stderr, "0x%02x ", a[i++]); 163 fprintf(stderr, "}\n"); 164 } 165 166 static void printUSeqErr(const UChar* a, int len) 167 { 168 int i=0; 169 fprintf(stderr, "{U+"); 170 while (i<len) 171 fprintf(stderr, "0x%04x ", a[i++]); 172 fprintf(stderr,"}\n"); 173 } 174 175 static void 176 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message) 177 { 178 const char* s0; 179 const char* s=(char*)source; 180 const int32_t *r=results; 181 UErrorCode errorCode=U_ZERO_ERROR; 182 UChar32 c; 183 184 while(s<limit) { 185 s0=s; 186 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 187 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 188 break; /* no more significant input */ 189 } else if(U_FAILURE(errorCode)) { 190 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 191 break; 192 } else if( 193 /* test the expected number of input bytes only if >=0 */ 194 (*r>=0 && (int32_t)(s-s0)!=*r) || 195 c!=*(r+1) 196 ) { 197 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 198 message, c, (s-s0), *(r+1), *r); 199 break; 200 } 201 r+=2; 202 } 203 } 204 205 static void 206 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message) 207 { 208 const char* s=(char*)source; 209 UErrorCode errorCode=U_ZERO_ERROR; 210 uint32_t c; 211 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 212 if(errorCode != expected){ 213 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode)); 214 } 215 if(c != 0xFFFD && c != 0xffff){ 216 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c); 217 } 218 219 } 220 221 static void TestInBufSizes(void) 222 { 223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); 224 #if 1 225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); 226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); 227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); 228 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); 229 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); 230 TestNewConvertWithBufferSizes(1,1); 231 TestNewConvertWithBufferSizes(2,3); 232 TestNewConvertWithBufferSizes(3,2); 233 #endif 234 } 235 236 static void TestOutBufSizes(void) 237 { 238 #if 1 239 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); 240 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); 241 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); 242 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); 243 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); 244 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); 245 246 #endif 247 } 248 249 250 void addTestNewConvert(TestNode** root) 251 { 252 #if !UCONFIG_NO_FILE_IO 253 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); 254 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); 255 #endif 256 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); 257 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); 258 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); 259 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); 260 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); 261 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); 262 263 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ 264 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); 265 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); 266 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); 267 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); 268 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); 269 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); 270 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); 271 272 #if !UCONFIG_NO_LEGACY_CONVERSION 273 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); 274 #endif 275 276 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); 277 278 #if !UCONFIG_NO_LEGACY_CONVERSION 279 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); 280 #if !UCONFIG_NO_FILE_IO 281 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); 282 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); 283 #endif 284 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); 285 286 #ifdef U_ENABLE_GENERIC_ISO_2022 287 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); 288 #endif 289 290 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); 291 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); 292 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); 293 // android-changed (no have ISO_2022_JP_2) -- addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); 294 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); 295 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); 296 // android-changed (no ISO-2022-CN) -- addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); 297 /* 298 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 299 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); 300 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); 301 */ 302 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); 303 #endif 304 305 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); 306 307 #if !UCONFIG_NO_LEGACY_CONVERSION 308 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); 309 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); 310 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); 311 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); 312 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); 313 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); 314 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); 315 #if !UCONFIG_NO_COLLATION 316 // android-removed (no collation tailoring rules) -- addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); 317 #endif 318 319 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); 320 #endif 321 322 323 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 324 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); 325 #endif 326 327 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); 328 329 #if !UCONFIG_NO_LEGACY_CONVERSION 330 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); 331 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); 332 // android-removed (no full ISO2022 CJK tables) -- addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); 333 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth"); 334 #endif 335 } 336 337 338 /* Note that this test already makes use of statics, so it's not really 339 multithread safe. 340 This convenience function lets us make the error messages actually useful. 341 */ 342 343 static void setNuConvTestName(const char *codepage, const char *direction) 344 { 345 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 346 codepage, 347 direction, 348 (int)gInBufferSize, 349 (int)gOutBufferSize); 350 } 351 352 typedef enum 353 { 354 TC_OK = 0, /* test was OK */ 355 TC_MISMATCH = 1, /* Match failed - err was printed */ 356 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */ 357 } ETestConvertResult; 358 359 /* Note: This function uses global variables and it will not do offset 360 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 361 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 362 const char *codepage, const int32_t *expectOffsets , UBool useFallback) 363 { 364 UErrorCode status = U_ZERO_ERROR; 365 UConverter *conv = 0; 366 char junkout[NEW_MAX_BUFFER]; /* FIX */ 367 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 368 char *p; 369 const UChar *src; 370 char *end; 371 char *targ; 372 int32_t *offs; 373 int i; 374 int32_t realBufferSize; 375 char *realBufferEnd; 376 const UChar *realSourceEnd; 377 const UChar *sourceLimit; 378 UBool checkOffsets = TRUE; 379 UBool doFlush; 380 381 for(i=0;i<NEW_MAX_BUFFER;i++) 382 junkout[i] = (char)0xF0; 383 for(i=0;i<NEW_MAX_BUFFER;i++) 384 junokout[i] = 0xFF; 385 386 setNuConvTestName(codepage, "FROM"); 387 388 log_verbose("\n========= %s\n", gNuConvTestName); 389 390 conv = my_ucnv_open(codepage, &status); 391 392 if(U_FAILURE(status)) 393 { 394 log_data_err("Couldn't open converter %s\n",codepage); 395 return TC_FAIL; 396 } 397 if(useFallback){ 398 ucnv_setFallback(conv,useFallback); 399 } 400 401 log_verbose("Converter opened..\n"); 402 403 src = source; 404 targ = junkout; 405 offs = junokout; 406 407 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 408 realBufferEnd = junkout + realBufferSize; 409 realSourceEnd = source + sourceLen; 410 411 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 412 checkOffsets = FALSE; 413 414 do 415 { 416 end = nct_min(targ + gOutBufferSize, realBufferEnd); 417 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 418 419 doFlush = (UBool)(sourceLimit == realSourceEnd); 420 421 if(targ == realBufferEnd) { 422 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 423 return TC_FAIL; 424 } 425 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 426 427 428 status = U_ZERO_ERROR; 429 430 ucnv_fromUnicode (conv, 431 &targ, 432 end, 433 &src, 434 sourceLimit, 435 checkOffsets ? offs : NULL, 436 doFlush, /* flush if we're at the end of the input data */ 437 &status); 438 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 439 440 if(U_FAILURE(status)) { 441 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 442 return TC_FAIL; 443 } 444 445 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 446 sourceLen, targ-junkout); 447 448 if(getTestOption(VERBOSITY_OPTION)) 449 { 450 char junk[9999]; 451 char offset_str[9999]; 452 char *ptr; 453 454 junk[0] = 0; 455 offset_str[0] = 0; 456 for(ptr = junkout;ptr<targ;ptr++) { 457 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); 458 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout])); 459 } 460 461 log_verbose(junk); 462 printSeq((const uint8_t *)expect, expectLen); 463 if ( checkOffsets ) { 464 log_verbose("\nOffsets:"); 465 log_verbose(offset_str); 466 } 467 log_verbose("\n"); 468 } 469 ucnv_close(conv); 470 471 if(expectLen != targ-junkout) { 472 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 473 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 474 fprintf(stderr, "Got:\n"); 475 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 476 fprintf(stderr, "Expected:\n"); 477 printSeqErr((const unsigned char*)expect, expectLen); 478 return TC_MISMATCH; 479 } 480 481 if (checkOffsets && (expectOffsets != 0) ) { 482 log_verbose("comparing %d offsets..\n", targ-junkout); 483 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 484 log_err("did not get the expected offsets. %s\n", gNuConvTestName); 485 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 486 log_err("\n"); 487 log_err("Got : "); 488 for(p=junkout;p<targ;p++) { 489 log_err("%d,", junokout[p-junkout]); 490 } 491 log_err("\n"); 492 log_err("Expected: "); 493 for(i=0; i<(targ-junkout); i++) { 494 log_err("%d,", expectOffsets[i]); 495 } 496 log_err("\n"); 497 } 498 } 499 500 log_verbose("comparing..\n"); 501 if(!memcmp(junkout, expect, expectLen)) { 502 log_verbose("Matches!\n"); 503 return TC_OK; 504 } else { 505 log_err("String does not match u->%s\n", gNuConvTestName); 506 printUSeqErr(source, sourceLen); 507 fprintf(stderr, "Got:\n"); 508 printSeqErr((const unsigned char *)junkout, expectLen); 509 fprintf(stderr, "Expected:\n"); 510 printSeqErr((const unsigned char *)expect, expectLen); 511 512 return TC_MISMATCH; 513 } 514 } 515 516 /* Note: This function uses global variables and it will not do offset 517 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 518 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 519 const char *codepage, const int32_t *expectOffsets, UBool useFallback) 520 { 521 UErrorCode status = U_ZERO_ERROR; 522 UConverter *conv = 0; 523 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 524 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 525 const char *src; 526 const char *realSourceEnd; 527 const char *srcLimit; 528 UChar *p; 529 UChar *targ; 530 UChar *end; 531 int32_t *offs; 532 int i; 533 UBool checkOffsets = TRUE; 534 535 int32_t realBufferSize; 536 UChar *realBufferEnd; 537 538 539 for(i=0;i<NEW_MAX_BUFFER;i++) 540 junkout[i] = 0xFFFE; 541 542 for(i=0;i<NEW_MAX_BUFFER;i++) 543 junokout[i] = -1; 544 545 setNuConvTestName(codepage, "TO"); 546 547 log_verbose("\n========= %s\n", gNuConvTestName); 548 549 conv = my_ucnv_open(codepage, &status); 550 551 if(U_FAILURE(status)) 552 { 553 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 554 return TC_FAIL; 555 } 556 if(useFallback){ 557 ucnv_setFallback(conv,useFallback); 558 } 559 log_verbose("Converter opened..\n"); 560 561 src = (const char *)source; 562 targ = junkout; 563 offs = junokout; 564 565 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 566 realBufferEnd = junkout + realBufferSize; 567 realSourceEnd = src + sourcelen; 568 569 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 570 checkOffsets = FALSE; 571 572 do 573 { 574 end = nct_min( targ + gOutBufferSize, realBufferEnd); 575 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 576 577 if(targ == realBufferEnd) 578 { 579 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); 580 return TC_FAIL; 581 } 582 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 583 584 /* oldTarg = targ; */ 585 586 status = U_ZERO_ERROR; 587 588 ucnv_toUnicode (conv, 589 &targ, 590 end, 591 &src, 592 srcLimit, 593 checkOffsets ? offs : NULL, 594 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ 595 &status); 596 597 /* offs += (targ-oldTarg); */ 598 599 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 600 601 if(U_FAILURE(status)) 602 { 603 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 604 return TC_FAIL; 605 } 606 607 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 608 sourcelen, targ-junkout); 609 if(getTestOption(VERBOSITY_OPTION)) 610 { 611 char junk[9999]; 612 char offset_str[9999]; 613 UChar *ptr; 614 615 junk[0] = 0; 616 offset_str[0] = 0; 617 618 for(ptr = junkout;ptr<targ;ptr++) 619 { 620 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 621 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 622 } 623 624 log_verbose(junk); 625 printUSeq(expect, expectlen); 626 if ( checkOffsets ) 627 { 628 log_verbose("\nOffsets:"); 629 log_verbose(offset_str); 630 } 631 log_verbose("\n"); 632 } 633 ucnv_close(conv); 634 635 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 636 637 if (checkOffsets && (expectOffsets != 0)) 638 { 639 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 640 log_err("did not get the expected offsets. %s\n",gNuConvTestName); 641 log_err("Got: "); 642 for(p=junkout;p<targ;p++) { 643 log_err("%d,", junokout[p-junkout]); 644 } 645 log_err("\n"); 646 log_err("Expected: "); 647 for(i=0; i<(targ-junkout); i++) { 648 log_err("%d,", expectOffsets[i]); 649 } 650 log_err("\n"); 651 log_err("output: "); 652 for(i=0; i<(targ-junkout); i++) { 653 log_err("%X,", junkout[i]); 654 } 655 log_err("\n"); 656 log_err("input: "); 657 for(i=0; i<(src-(const char *)source); i++) { 658 log_err("%X,", (unsigned char)source[i]); 659 } 660 log_err("\n"); 661 } 662 } 663 664 if(!memcmp(junkout, expect, expectlen*2)) 665 { 666 log_verbose("Matches!\n"); 667 return TC_OK; 668 } 669 else 670 { 671 log_err("String does not match. %s\n", gNuConvTestName); 672 log_verbose("String does not match. %s\n", gNuConvTestName); 673 printf("\nGot:"); 674 printUSeqErr(junkout, expectlen); 675 printf("\nExpected:"); 676 printUSeqErr(expect, expectlen); 677 return TC_MISMATCH; 678 } 679 } 680 681 682 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 683 { 684 /** test chars #1 */ 685 /* 1 2 3 1Han 2Han 3Han . */ 686 static const UChar sampleText[] = 687 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 }; 688 static const UChar sampleTextRoundTripUnmappable[] = 689 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; 690 691 692 static const uint8_t expectedUTF8[] = 693 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; 694 static const int32_t toUTF8Offs[] = 695 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; 696 static const int32_t fmUTF8Offs[] = 697 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e }; 698 699 #ifdef U_ENABLE_GENERIC_ISO_2022 700 /* Same as UTF8, but with ^[%B preceeding */ 701 static const const uint8_t expectedISO2022[] = 702 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 703 static const int32_t toISO2022Offs[] = 704 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 705 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ 706 static const int32_t fmISO2022Offs[] = 707 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */ 708 #endif 709 710 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ 711 static const uint8_t expectedIBM930[] = 712 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f }; 713 static const int32_t toIBM930Offs[] = 714 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 }; 715 static const int32_t fmIBM930Offs[] = 716 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; 717 718 /* 1 2 3 0 h1 h2 h3 . MBCS*/ 719 static const uint8_t expectedIBM943[] = 720 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc }; 721 static const int32_t toIBM943Offs [] = 722 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 }; 723 static const int32_t fmIBM943Offs[] = 724 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; 725 726 /* 1 2 3 0 h1 h2 h3 . DBCS*/ 727 static const uint8_t expectedIBM9027[] = 728 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; 729 static const int32_t toIBM9027Offs [] = 730 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; 731 732 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 733 static const uint8_t expectedIBM920[] = 734 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; 735 static const int32_t toIBM920Offs [] = 736 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 737 738 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 739 static const uint8_t expectedISO88593[] = 740 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 741 static const int32_t toISO88593Offs[] = 742 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 743 744 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ 745 static const uint8_t expectedLATIN1[] = 746 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 747 static const int32_t toLATIN1Offs[] = 748 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 749 750 751 /* etc */ 752 static const uint8_t expectedUTF16BE[] = 753 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; 754 static const int32_t toUTF16BEOffs[]= 755 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 756 static const int32_t fmUTF16BEOffs[] = 757 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 758 759 static const uint8_t expectedUTF16LE[] = 760 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; 761 static const int32_t toUTF16LEOffs[]= 762 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 763 static const int32_t fmUTF16LEOffs[] = 764 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 765 766 static const uint8_t expectedUTF32BE[] = 767 { 0x00, 0x00, 0x00, 0x31, 768 0x00, 0x00, 0x00, 0x32, 769 0x00, 0x00, 0x00, 0x33, 770 0x00, 0x00, 0x00, 0x00, 771 0x00, 0x00, 0x4e, 0x00, 772 0x00, 0x00, 0x4e, 0x8c, 773 0x00, 0x00, 0x4e, 0x09, 774 0x00, 0x00, 0x00, 0x2e, 775 0x00, 0x02, 0x00, 0x21 }; 776 static const int32_t toUTF32BEOffs[]= 777 { 0x00, 0x00, 0x00, 0x00, 778 0x01, 0x01, 0x01, 0x01, 779 0x02, 0x02, 0x02, 0x02, 780 0x03, 0x03, 0x03, 0x03, 781 0x04, 0x04, 0x04, 0x04, 782 0x05, 0x05, 0x05, 0x05, 783 0x06, 0x06, 0x06, 0x06, 784 0x07, 0x07, 0x07, 0x07, 785 0x08, 0x08, 0x08, 0x08, 786 0x08, 0x08, 0x08, 0x08 }; 787 static const int32_t fmUTF32BEOffs[] = 788 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 789 790 static const uint8_t expectedUTF32LE[] = 791 { 0x31, 0x00, 0x00, 0x00, 792 0x32, 0x00, 0x00, 0x00, 793 0x33, 0x00, 0x00, 0x00, 794 0x00, 0x00, 0x00, 0x00, 795 0x00, 0x4e, 0x00, 0x00, 796 0x8c, 0x4e, 0x00, 0x00, 797 0x09, 0x4e, 0x00, 0x00, 798 0x2e, 0x00, 0x00, 0x00, 799 0x21, 0x00, 0x02, 0x00 }; 800 static const int32_t toUTF32LEOffs[]= 801 { 0x00, 0x00, 0x00, 0x00, 802 0x01, 0x01, 0x01, 0x01, 803 0x02, 0x02, 0x02, 0x02, 804 0x03, 0x03, 0x03, 0x03, 805 0x04, 0x04, 0x04, 0x04, 806 0x05, 0x05, 0x05, 0x05, 807 0x06, 0x06, 0x06, 0x06, 808 0x07, 0x07, 0x07, 0x07, 809 0x08, 0x08, 0x08, 0x08, 810 0x08, 0x08, 0x08, 0x08 }; 811 static const int32_t fmUTF32LEOffs[] = 812 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 813 814 815 816 817 /** Test chars #2 **/ 818 819 /* Sahha [health], slashed h's */ 820 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; 821 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; 822 823 /* LMBCS */ 824 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; 825 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; 826 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; 827 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; 828 /*********************************** START OF CODE finally *************/ 829 830 gInBufferSize = insize; 831 gOutBufferSize = outsize; 832 833 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); 834 835 836 /*UTF-8*/ 837 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 838 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); 839 840 log_verbose("Test surrogate behaviour for UTF8\n"); 841 { 842 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; 843 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, 844 0xf0, 0x90, 0x90, 0x81, 845 0xef, 0xbf, 0xbd 846 }; 847 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; 848 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), 849 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); 850 851 852 } 853 854 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 855 /*ISO-2022*/ 856 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 857 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); 858 #endif 859 860 /*UTF16 LE*/ 861 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 862 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); 863 /*UTF16 BE*/ 864 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 865 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); 866 /*UTF32 LE*/ 867 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 868 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); 869 /*UTF32 BE*/ 870 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 871 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); 872 873 /*LATIN_1*/ 874 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 875 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); 876 877 #if !UCONFIG_NO_LEGACY_CONVERSION 878 /*EBCDIC_STATEFUL*/ 879 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 880 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); 881 882 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 883 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 884 885 /*MBCS*/ 886 887 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 888 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); 889 /*DBCS*/ 890 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 891 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); 892 /*SBCS*/ 893 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 894 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); 895 /*SBCS*/ 896 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 897 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 898 #endif 899 900 901 /****/ 902 903 /*UTF-8*/ 904 testConvertToU(expectedUTF8, sizeof(expectedUTF8), 905 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE); 906 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 907 /*ISO-2022*/ 908 testConvertToU(expectedISO2022, sizeof(expectedISO2022), 909 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE); 910 #endif 911 912 /*UTF16 LE*/ 913 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 914 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 915 /*UTF16 BE*/ 916 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), 917 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE); 918 /*UTF32 LE*/ 919 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), 920 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE); 921 /*UTF32 BE*/ 922 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), 923 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE); 924 925 #if !UCONFIG_NO_LEGACY_CONVERSION 926 /*EBCDIC_STATEFUL*/ 927 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable, 928 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE); 929 /*MBCS*/ 930 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable, 931 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE); 932 #endif 933 934 /* Try it again to make sure it still works */ 935 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 936 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 937 938 #if !UCONFIG_NO_LEGACY_CONVERSION 939 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), 940 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE); 941 942 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), 943 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); 944 945 /*LMBCS*/ 946 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), 947 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); 948 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), 949 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE); 950 #endif 951 952 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ 953 { 954 /* encode directly set D and set O */ 955 static const uint8_t utf7[] = { 956 /* 957 Hi Mom -+Jjo--! 958 A+ImIDkQ. 959 +- 960 +ZeVnLIqe- 961 */ 962 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 963 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 964 0x2b, 0x2d, 965 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d 966 }; 967 static const UChar unicode[] = { 968 /* 969 Hi Mom -<WHITE SMILING FACE>-! 970 A<NOT IDENTICAL TO><ALPHA>. 971 + 972 [Japanese word "nihongo"] 973 */ 974 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 975 0x41, 0x2262, 0x0391, 0x2e, 976 0x2b, 977 0x65e5, 0x672c, 0x8a9e 978 }; 979 static const int32_t toUnicodeOffsets[] = { 980 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 981 15, 17, 19, 23, 982 24, 983 27, 29, 32 984 }; 985 static const int32_t fromUnicodeOffsets[] = { 986 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 987 11, 12, 12, 12, 13, 13, 13, 13, 14, 988 15, 15, 989 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 990 }; 991 992 /* same but escaping set O (the exclamation mark) */ 993 static const uint8_t utf7Restricted[] = { 994 /* 995 Hi Mom -+Jjo--+ACE- 996 A+ImIDkQ. 997 +- 998 +ZeVnLIqe- 999 */ 1000 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, 1001 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 1002 0x2b, 0x2d, 1003 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d 1004 }; 1005 static const int32_t toUnicodeOffsetsR[] = { 1006 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, 1007 19, 21, 23, 27, 1008 28, 1009 31, 33, 36 1010 }; 1011 static const int32_t fromUnicodeOffsetsR[] = { 1012 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, 1013 11, 12, 12, 12, 13, 13, 13, 13, 14, 1014 15, 15, 1015 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 1016 }; 1017 1018 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); 1019 1020 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE); 1021 1022 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); 1023 1024 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); 1025 } 1026 1027 /* 1028 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, 1029 * modified according to RFC 2060, 1030 * and supplemented with the one example in RFC 2060 itself. 1031 */ 1032 { 1033 static const uint8_t imap[] = { 1034 /* Hi Mom -&Jjo--! 1035 A&ImIDkQ-. 1036 &- 1037 &ZeVnLIqe- 1038 \ 1039 ~peter 1040 /mail 1041 /&ZeVnLIqe- 1042 /&U,BTFw- 1043 */ 1044 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 1045 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, 1046 0x26, 0x2d, 1047 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1048 0x5c, 1049 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1050 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1051 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1052 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d 1053 }; 1054 static const UChar unicode[] = { 1055 /* Hi Mom -<WHITE SMILING FACE>-! 1056 A<NOT IDENTICAL TO><ALPHA>. 1057 & 1058 [Japanese word "nihongo"] 1059 \ 1060 ~peter 1061 /mail 1062 /<65e5, 672c, 8a9e> 1063 /<53f0, 5317> 1064 */ 1065 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 1066 0x41, 0x2262, 0x0391, 0x2e, 1067 0x26, 1068 0x65e5, 0x672c, 0x8a9e, 1069 0x5c, 1070 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1071 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1072 0x2f, 0x65e5, 0x672c, 0x8a9e, 1073 0x2f, 0x53f0, 0x5317 1074 }; 1075 static const int32_t toUnicodeOffsets[] = { 1076 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 1077 15, 17, 19, 24, 1078 25, 1079 28, 30, 33, 1080 37, 1081 38, 39, 40, 41, 42, 43, 1082 44, 45, 46, 47, 48, 1083 49, 51, 53, 56, 1084 60, 62, 64 1085 }; 1086 static const int32_t fromUnicodeOffsets[] = { 1087 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 1088 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, 1089 15, 15, 1090 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 1091 19, 1092 20, 21, 22, 23, 24, 25, 1093 26, 27, 28, 29, 30, 1094 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 1095 35, 36, 36, 36, 37, 37, 37, 37, 37 1096 }; 1097 1098 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); 1099 1100 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); 1101 } 1102 1103 /* Test UTF-8 bad data handling*/ 1104 { 1105 static const uint8_t utf8[]={ 1106 0x61, 1107 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1108 0x00, 1109 0x62, 1110 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1111 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1112 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ 1113 0xdf, 0xbf, /* 7ff */ 1114 0xbf, /* truncated tail */ 1115 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ 1116 0x02 1117 }; 1118 1119 static const uint16_t utf8Expected[]={ 1120 0x0061, 1121 0xfffd, 1122 0x0000, 1123 0x0062, 1124 0xfffd, 1125 0xfffd, 1126 0xdbff, 0xdfff, 1127 0x07ff, 1128 0xfffd, 1129 0xfffd, 1130 0x0002 1131 }; 1132 1133 static const int32_t utf8Offsets[]={ 1134 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 1135 }; 1136 testConvertToU(utf8, sizeof(utf8), 1137 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE); 1138 1139 } 1140 1141 /* Test UTF-32BE bad data handling*/ 1142 { 1143 static const uint8_t utf32[]={ 1144 0x00, 0x00, 0x00, 0x61, 1145 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 1146 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1147 0x00, 0x00, 0x00, 0x62, 1148 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1149 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 1150 0x00, 0x00, 0x01, 0x62, 1151 0x00, 0x00, 0x02, 0x62 1152 }; 1153 static const uint16_t utf32Expected[]={ 1154 0x0061, 1155 0xfffd, /* 0x110000 out of range */ 1156 0xDBFF, /* 0x10FFFF in range */ 1157 0xDFFF, 1158 0x0062, 1159 0xfffd, /* 0xffffffff out of range */ 1160 0xfffd, /* 0x7fffffff out of range */ 1161 0x0162, 1162 0x0262 1163 }; 1164 static const int32_t utf32Offsets[]={ 1165 0, 4, 8, 8, 12, 16, 20, 24, 28 1166 }; 1167 static const uint8_t utf32ExpectedBack[]={ 1168 0x00, 0x00, 0x00, 0x61, 1169 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ 1170 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1171 0x00, 0x00, 0x00, 0x62, 1172 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ 1173 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ 1174 0x00, 0x00, 0x01, 0x62, 1175 0x00, 0x00, 0x02, 0x62 1176 }; 1177 static const int32_t utf32OffsetsBack[]={ 1178 0,0,0,0, 1179 1,1,1,1, 1180 2,2,2,2, 1181 4,4,4,4, 1182 5,5,5,5, 1183 6,6,6,6, 1184 7,7,7,7, 1185 8,8,8,8 1186 }; 1187 1188 testConvertToU(utf32, sizeof(utf32), 1189 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE); 1190 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1191 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE); 1192 } 1193 1194 /* Test UTF-32LE bad data handling*/ 1195 { 1196 static const uint8_t utf32[]={ 1197 0x61, 0x00, 0x00, 0x00, 1198 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 1199 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1200 0x62, 0x00, 0x00, 0x00, 1201 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1202 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 1203 0x62, 0x01, 0x00, 0x00, 1204 0x62, 0x02, 0x00, 0x00, 1205 }; 1206 1207 static const uint16_t utf32Expected[]={ 1208 0x0061, 1209 0xfffd, /* 0x110000 out of range */ 1210 0xDBFF, /* 0x10FFFF in range */ 1211 0xDFFF, 1212 0x0062, 1213 0xfffd, /* 0xffffffff out of range */ 1214 0xfffd, /* 0x7fffffff out of range */ 1215 0x0162, 1216 0x0262 1217 }; 1218 static const int32_t utf32Offsets[]={ 1219 0, 4, 8, 8, 12, 16, 20, 24, 28 1220 }; 1221 static const uint8_t utf32ExpectedBack[]={ 1222 0x61, 0x00, 0x00, 0x00, 1223 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ 1224 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1225 0x62, 0x00, 0x00, 0x00, 1226 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ 1227 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ 1228 0x62, 0x01, 0x00, 0x00, 1229 0x62, 0x02, 0x00, 0x00 1230 }; 1231 static const int32_t utf32OffsetsBack[]={ 1232 0,0,0,0, 1233 1,1,1,1, 1234 2,2,2,2, 1235 4,4,4,4, 1236 5,5,5,5, 1237 6,6,6,6, 1238 7,7,7,7, 1239 8,8,8,8 1240 }; 1241 testConvertToU(utf32, sizeof(utf32), 1242 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE ); 1243 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1244 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE); 1245 } 1246 } 1247 1248 static void TestCoverageMBCS(){ 1249 #if 0 1250 UErrorCode status = U_ZERO_ERROR; 1251 const char *directory = loadTestData(&status); 1252 char* tdpath = NULL; 1253 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); 1254 int len = strlen(directory); 1255 char* index=NULL; 1256 1257 tdpath = (char*) malloc(sizeof(char) * (len * 2)); 1258 uprv_strcpy(saveDirectory,u_getDataDirectory()); 1259 log_verbose("Retrieved data directory %s \n",saveDirectory); 1260 uprv_strcpy(tdpath,directory); 1261 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); 1262 1263 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ 1264 *(index+1)=0; 1265 } 1266 u_setDataDirectory(tdpath); 1267 log_verbose("ICU data directory is set to: %s \n" ,tdpath); 1268 #endif 1269 1270 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm 1271 which is test file for MBCS conversion with single-byte codepage data.*/ 1272 { 1273 1274 /* MBCS with single byte codepage data test1.ucm*/ 1275 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; 1276 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; 1277 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; 1278 1279 /*from Unicode*/ 1280 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1281 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); 1282 } 1283 1284 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm 1285 which is test file for MBCS conversion with three-byte codepage data.*/ 1286 { 1287 1288 /* MBCS with three byte codepage data test3.ucm*/ 1289 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1290 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; 1291 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; 1292 1293 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; 1294 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1295 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; 1296 1297 /*from Unicode*/ 1298 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1299 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); 1300 1301 /*to Unicode*/ 1302 testConvertToU(test3input, sizeof(test3input), 1303 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE); 1304 1305 } 1306 1307 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm 1308 which is test file for MBCS conversion with four-byte codepage data.*/ 1309 { 1310 1311 /* MBCS with three byte codepage data test4.ucm*/ 1312 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1313 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; 1314 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; 1315 1316 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; 1317 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1318 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; 1319 1320 /*from Unicode*/ 1321 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1322 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); 1323 1324 /*to Unicode*/ 1325 testConvertToU(test4input, sizeof(test4input), 1326 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE ); 1327 1328 } 1329 #if 0 1330 free(tdpath); 1331 /* restore the original data directory */ 1332 log_verbose("Setting the data directory to %s \n", saveDirectory); 1333 u_setDataDirectory(saveDirectory); 1334 free(saveDirectory); 1335 #endif 1336 1337 } 1338 1339 static void TestConverterType(const char *convName, UConverterType convType) { 1340 UConverter* myConverter; 1341 UErrorCode err = U_ZERO_ERROR; 1342 1343 myConverter = my_ucnv_open(convName, &err); 1344 1345 if (U_FAILURE(err)) { 1346 log_data_err("Failed to create an %s converter\n", convName); 1347 return; 1348 } 1349 else 1350 { 1351 if (ucnv_getType(myConverter)!=convType) { 1352 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", 1353 convName, convType); 1354 } 1355 else { 1356 log_verbose("ucnv_getType %s ok\n", convName); 1357 } 1358 } 1359 ucnv_close(myConverter); 1360 } 1361 1362 static void TestConverterTypesAndStarters() 1363 { 1364 #if !UCONFIG_NO_LEGACY_CONVERSION 1365 UConverter* myConverter; 1366 UErrorCode err = U_ZERO_ERROR; 1367 UBool mystarters[256]; 1368 1369 /* const UBool expectedKSCstarters[256] = { 1370 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1371 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1372 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1373 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1374 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1375 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1376 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1377 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1378 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1379 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1380 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1383 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1384 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1385 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1386 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1387 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, 1388 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1389 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1390 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1391 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1392 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1393 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1394 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1395 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ 1396 1397 1398 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); 1399 1400 myConverter = ucnv_open("ksc", &err); 1401 if (U_FAILURE(err)) { 1402 log_data_err("Failed to create an ibm-ksc converter\n"); 1403 return; 1404 } 1405 else 1406 { 1407 if (ucnv_getType(myConverter)!=UCNV_MBCS) 1408 log_err("ucnv_getType Failed for ibm-949\n"); 1409 else 1410 log_verbose("ucnv_getType ibm-949 ok\n"); 1411 1412 if(myConverter!=NULL) 1413 ucnv_getStarters(myConverter, mystarters, &err); 1414 1415 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) 1416 log_err("Failed ucnv_getStarters for ksc\n"); 1417 else 1418 log_verbose("ucnv_getStarters ok\n");*/ 1419 1420 } 1421 ucnv_close(myConverter); 1422 1423 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); 1424 TestConverterType("ibm-878", UCNV_SBCS); 1425 #endif 1426 1427 TestConverterType("iso-8859-1", UCNV_LATIN_1); 1428 1429 TestConverterType("ibm-1208", UCNV_UTF8); 1430 1431 TestConverterType("utf-8", UCNV_UTF8); 1432 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); 1433 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); 1434 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); 1435 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); 1436 1437 #if !UCONFIG_NO_LEGACY_CONVERSION 1438 1439 #if defined(U_ENABLE_GENERIC_ISO_2022) 1440 TestConverterType("iso-2022", UCNV_ISO_2022); 1441 #endif 1442 1443 TestConverterType("hz", UCNV_HZ); 1444 #endif 1445 1446 TestConverterType("scsu", UCNV_SCSU); 1447 1448 #if !UCONFIG_NO_LEGACY_CONVERSION 1449 TestConverterType("x-iscii-de", UCNV_ISCII); 1450 #endif 1451 1452 TestConverterType("ascii", UCNV_US_ASCII); 1453 TestConverterType("utf-7", UCNV_UTF7); 1454 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); 1455 TestConverterType("bocu-1", UCNV_BOCU1); 1456 } 1457 1458 static void 1459 TestAmbiguousConverter(UConverter *cnv) { 1460 static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; 1461 UChar outUnicode[20]={ 0, 0, 0, 0 }; 1462 1463 const char *s; 1464 UChar *u; 1465 UErrorCode errorCode; 1466 UBool isAmbiguous; 1467 1468 /* try to convert an 'a', a square bracket and a US-ASCII backslash */ 1469 errorCode=U_ZERO_ERROR; 1470 s=inBytes; 1471 u=outUnicode; 1472 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); 1473 if(U_FAILURE(errorCode)) { 1474 /* we do not care about general failures in this test; the input may just not be mappable */ 1475 return; 1476 } 1477 1478 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { 1479 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ 1480 /* There are some encodings that are partially ASCII based, 1481 like the ISO-7 and GSM series of codepages, which we ignore. */ 1482 return; 1483 } 1484 1485 isAmbiguous=ucnv_isAmbiguous(cnv); 1486 1487 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ 1488 if((outUnicode[2]!=0x5c)!=isAmbiguous) { 1489 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", 1490 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); 1491 return; 1492 } 1493 1494 if(outUnicode[2]!=0x5c) { 1495 /* needs fixup, fix it */ 1496 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); 1497 if(outUnicode[2]!=0x5c) { 1498 /* the fix failed */ 1499 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); 1500 return; 1501 } 1502 } 1503 } 1504 1505 static void TestAmbiguous() 1506 { 1507 UErrorCode status = U_ZERO_ERROR; 1508 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; 1509 static const char target[] = { 1510 /* "\\usr\\local\\share\\data\\icutest.txt" */ 1511 0x5c, 0x75, 0x73, 0x72, 1512 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 1513 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, 1514 0x5c, 0x64, 0x61, 0x74, 0x61, 1515 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, 1516 0 1517 }; 1518 UChar asciiResult[200], sjisResult[200]; 1519 int32_t /*asciiLength = 0,*/ sjisLength = 0, i; 1520 const char *name; 1521 1522 /* enumerate all converters */ 1523 status=U_ZERO_ERROR; 1524 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { 1525 cnv=ucnv_open(name, &status); 1526 if(U_SUCCESS(status)) { 1527 /* BEGIN android-changed. To save space Android does not build full ISO-2022-CN CJK tables. */ 1528 const char* cnvName = ucnv_getName(cnv, &status); 1529 if (strlen(cnvName) < 8 || 1530 strncmp(cnvName, "ISO_2022_CN", 8) != 0) { 1531 TestAmbiguousConverter(cnv); 1532 } 1533 /* END android-changed */ 1534 ucnv_close(cnv); 1535 } else { 1536 log_err("error: unable to open available converter \"%s\"\n", name); 1537 status=U_ZERO_ERROR; 1538 } 1539 } 1540 1541 #if !UCONFIG_NO_LEGACY_CONVERSION 1542 sjis_cnv = ucnv_open("ibm-943", &status); 1543 if (U_FAILURE(status)) 1544 { 1545 log_data_err("Failed to create a SJIS converter\n"); 1546 return; 1547 } 1548 ascii_cnv = ucnv_open("LATIN-1", &status); 1549 if (U_FAILURE(status)) 1550 { 1551 log_data_err("Failed to create a LATIN-1 converter\n"); 1552 ucnv_close(sjis_cnv); 1553 return; 1554 } 1555 /* convert target from SJIS to Unicode */ 1556 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1557 if (U_FAILURE(status)) 1558 { 1559 log_err("Failed to convert the SJIS string.\n"); 1560 ucnv_close(sjis_cnv); 1561 ucnv_close(ascii_cnv); 1562 return; 1563 } 1564 /* convert target from Latin-1 to Unicode */ 1565 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1566 if (U_FAILURE(status)) 1567 { 1568 log_err("Failed to convert the Latin-1 string.\n"); 1569 ucnv_close(sjis_cnv); 1570 ucnv_close(ascii_cnv); 1571 return; 1572 } 1573 if (!ucnv_isAmbiguous(sjis_cnv)) 1574 { 1575 log_err("SJIS converter should contain ambiguous character mappings.\n"); 1576 ucnv_close(sjis_cnv); 1577 ucnv_close(ascii_cnv); 1578 return; 1579 } 1580 if (u_strcmp(sjisResult, asciiResult) == 0) 1581 { 1582 log_err("File separators for SJIS don't need to be fixed.\n"); 1583 } 1584 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); 1585 if (u_strcmp(sjisResult, asciiResult) != 0) 1586 { 1587 log_err("Fixing file separator for SJIS failed.\n"); 1588 } 1589 ucnv_close(sjis_cnv); 1590 ucnv_close(ascii_cnv); 1591 #endif 1592 } 1593 1594 static void 1595 TestSignatureDetection(){ 1596 /* with null terminated strings */ 1597 { 1598 static const char* data[] = { 1599 "\xFE\xFF\x00\x00", /* UTF-16BE */ 1600 "\xFF\xFE\x00\x00", /* UTF-16LE */ 1601 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1602 "\x0E\xFE\xFF\x00", /* SCSU */ 1603 1604 "\xFE\xFF", /* UTF-16BE */ 1605 "\xFF\xFE", /* UTF-16LE */ 1606 "\xEF\xBB\xBF", /* UTF-8 */ 1607 "\x0E\xFE\xFF", /* SCSU */ 1608 1609 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1610 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1611 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1612 "\x0E\xFE\xFF\x41", /* SCSU */ 1613 1614 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ 1615 "\x2B\x2F\x76\x38\x41", /* UTF-7 */ 1616 "\x2B\x2F\x76\x39\x41", /* UTF-7 */ 1617 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ 1618 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ 1619 1620 "\xDD\x73\x66\x73" /* UTF-EBCDIC */ 1621 }; 1622 static const char* expected[] = { 1623 "UTF-16BE", 1624 "UTF-16LE", 1625 "UTF-8", 1626 "SCSU", 1627 1628 "UTF-16BE", 1629 "UTF-16LE", 1630 "UTF-8", 1631 "SCSU", 1632 1633 "UTF-16BE", 1634 "UTF-16LE", 1635 "UTF-8", 1636 "SCSU", 1637 1638 "UTF-7", 1639 "UTF-7", 1640 "UTF-7", 1641 "UTF-7", 1642 "UTF-7", 1643 "UTF-EBCDIC" 1644 }; 1645 static const int32_t expectedLength[] ={ 1646 2, 1647 2, 1648 3, 1649 3, 1650 1651 2, 1652 2, 1653 3, 1654 3, 1655 1656 2, 1657 2, 1658 3, 1659 3, 1660 1661 5, 1662 4, 1663 4, 1664 4, 1665 4, 1666 4 1667 }; 1668 int i=0; 1669 UErrorCode err; 1670 int32_t signatureLength = -1; 1671 const char* source = NULL; 1672 const char* enc = NULL; 1673 for( ; i<sizeof(data)/sizeof(char*); i++){ 1674 err = U_ZERO_ERROR; 1675 source = data[i]; 1676 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err); 1677 if(U_FAILURE(err)){ 1678 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1679 continue; 1680 } 1681 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1682 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1683 continue; 1684 } 1685 if(signatureLength != expectedLength[i]){ 1686 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1687 } 1688 } 1689 } 1690 { 1691 static const char* data[] = { 1692 "\xFE\xFF\x00", /* UTF-16BE */ 1693 "\xFF\xFE\x00", /* UTF-16LE */ 1694 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1695 "\x0E\xFE\xFF\x00", /* SCSU */ 1696 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1697 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1698 "\xFE\xFF", /* UTF-16BE */ 1699 "\xFF\xFE", /* UTF-16LE */ 1700 "\xEF\xBB\xBF", /* UTF-8 */ 1701 "\x0E\xFE\xFF", /* SCSU */ 1702 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1703 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1704 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1705 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1706 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1707 "\x0E\xFE\xFF\x41", /* SCSU */ 1708 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ 1709 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ 1710 "\xFB\xEE\x28", /* BOCU-1 */ 1711 "\xFF\x41\x42" /* NULL */ 1712 }; 1713 static const int len[] = { 1714 3, 1715 3, 1716 4, 1717 4, 1718 4, 1719 4, 1720 2, 1721 2, 1722 3, 1723 3, 1724 4, 1725 4, 1726 4, 1727 4, 1728 4, 1729 4, 1730 5, 1731 5, 1732 3, 1733 3 1734 }; 1735 1736 static const char* expected[] = { 1737 "UTF-16BE", 1738 "UTF-16LE", 1739 "UTF-8", 1740 "SCSU", 1741 "UTF-32BE", 1742 "UTF-32LE", 1743 "UTF-16BE", 1744 "UTF-16LE", 1745 "UTF-8", 1746 "SCSU", 1747 "UTF-32BE", 1748 "UTF-32LE", 1749 "UTF-16BE", 1750 "UTF-16LE", 1751 "UTF-8", 1752 "SCSU", 1753 "UTF-32BE", 1754 "UTF-32LE", 1755 "BOCU-1", 1756 NULL 1757 }; 1758 static const int32_t expectedLength[] ={ 1759 2, 1760 2, 1761 3, 1762 3, 1763 4, 1764 4, 1765 2, 1766 2, 1767 3, 1768 3, 1769 4, 1770 4, 1771 2, 1772 2, 1773 3, 1774 3, 1775 4, 1776 4, 1777 3, 1778 0 1779 }; 1780 int i=0; 1781 UErrorCode err; 1782 int32_t signatureLength = -1; 1783 int32_t sourceLength=-1; 1784 const char* source = NULL; 1785 const char* enc = NULL; 1786 for( ; i<sizeof(data)/sizeof(char*); i++){ 1787 err = U_ZERO_ERROR; 1788 source = data[i]; 1789 sourceLength = len[i]; 1790 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err); 1791 if(U_FAILURE(err)){ 1792 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1793 continue; 1794 } 1795 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1796 if(expected[i] !=NULL){ 1797 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1798 continue; 1799 } 1800 } 1801 if(signatureLength != expectedLength[i]){ 1802 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1803 } 1804 } 1805 } 1806 } 1807 1808 static void TestUTF7() { 1809 /* test input */ 1810 static const uint8_t in[]={ 1811 /* H - +Jjo- - ! +- +2AHcAQ */ 1812 0x48, 1813 0x2d, 1814 0x2b, 0x4a, 0x6a, 0x6f, 1815 0x2d, 0x2d, 1816 0x21, 1817 0x2b, 0x2d, 1818 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 1819 }; 1820 1821 /* expected test results */ 1822 static const int32_t results[]={ 1823 /* number of bytes read, code point */ 1824 1, 0x48, 1825 1, 0x2d, 1826 4, 0x263a, /* <WHITE SMILING FACE> */ 1827 2, 0x2d, 1828 1, 0x21, 1829 2, 0x2b, 1830 7, 0x10401 1831 }; 1832 1833 const char *cnvName; 1834 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1835 UErrorCode errorCode=U_ZERO_ERROR; 1836 UConverter *cnv=ucnv_open("UTF-7", &errorCode); 1837 if(U_FAILURE(errorCode)) { 1838 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1839 return; 1840 } 1841 TestNextUChar(cnv, source, limit, results, "UTF-7"); 1842 /* Test the condition when source >= sourceLimit */ 1843 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1844 cnvName = ucnv_getName(cnv, &errorCode); 1845 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { 1846 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1847 } 1848 ucnv_close(cnv); 1849 } 1850 1851 static void TestIMAP() { 1852 /* test input */ 1853 static const uint8_t in[]={ 1854 /* H - &Jjo- - ! &- &2AHcAQ- \ */ 1855 0x48, 1856 0x2d, 1857 0x26, 0x4a, 0x6a, 0x6f, 1858 0x2d, 0x2d, 1859 0x21, 1860 0x26, 0x2d, 1861 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d 1862 }; 1863 1864 /* expected test results */ 1865 static const int32_t results[]={ 1866 /* number of bytes read, code point */ 1867 1, 0x48, 1868 1, 0x2d, 1869 4, 0x263a, /* <WHITE SMILING FACE> */ 1870 2, 0x2d, 1871 1, 0x21, 1872 2, 0x26, 1873 7, 0x10401 1874 }; 1875 1876 const char *cnvName; 1877 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1878 UErrorCode errorCode=U_ZERO_ERROR; 1879 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); 1880 if(U_FAILURE(errorCode)) { 1881 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1882 return; 1883 } 1884 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); 1885 /* Test the condition when source >= sourceLimit */ 1886 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1887 cnvName = ucnv_getName(cnv, &errorCode); 1888 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { 1889 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1890 } 1891 ucnv_close(cnv); 1892 } 1893 1894 static void TestUTF8() { 1895 /* test input */ 1896 static const uint8_t in[]={ 1897 0x61, 1898 0xc2, 0x80, 1899 0xe0, 0xa0, 0x80, 1900 0xf0, 0x90, 0x80, 0x80, 1901 0xf4, 0x84, 0x8c, 0xa1, 1902 0xf0, 0x90, 0x90, 0x81 1903 }; 1904 1905 /* expected test results */ 1906 static const int32_t results[]={ 1907 /* number of bytes read, code point */ 1908 1, 0x61, 1909 2, 0x80, 1910 3, 0x800, 1911 4, 0x10000, 1912 4, 0x104321, 1913 4, 0x10401 1914 }; 1915 1916 /* error test input */ 1917 static const uint8_t in2[]={ 1918 0x61, 1919 0xc0, 0x80, /* illegal non-shortest form */ 1920 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1921 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1922 0xc0, 0xc0, /* illegal trail byte */ 1923 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1924 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1925 0xfe, /* illegal byte altogether */ 1926 0x62 1927 }; 1928 1929 /* expected error test results */ 1930 static const int32_t results2[]={ 1931 /* number of bytes read, code point */ 1932 1, 0x61, 1933 22, 0x62 1934 }; 1935 1936 UConverterToUCallback cb; 1937 const void *p; 1938 1939 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 1940 UErrorCode errorCode=U_ZERO_ERROR; 1941 UConverter *cnv=ucnv_open("UTF-8", &errorCode); 1942 if(U_FAILURE(errorCode)) { 1943 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); 1944 return; 1945 } 1946 TestNextUChar(cnv, source, limit, results, "UTF-8"); 1947 /* Test the condition when source >= sourceLimit */ 1948 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1949 1950 /* test error behavior with a skip callback */ 1951 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 1952 source=(const char *)in2; 1953 limit=(const char *)(in2+sizeof(in2)); 1954 TestNextUChar(cnv, source, limit, results2, "UTF-8"); 1955 1956 ucnv_close(cnv); 1957 } 1958 1959 static void TestCESU8() { 1960 /* test input */ 1961 static const uint8_t in[]={ 1962 0x61, 1963 0xc2, 0x80, 1964 0xe0, 0xa0, 0x80, 1965 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 1966 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, 1967 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1968 0xef, 0xbf, 0xbc 1969 }; 1970 1971 /* expected test results */ 1972 static const int32_t results[]={ 1973 /* number of bytes read, code point */ 1974 1, 0x61, 1975 2, 0x80, 1976 3, 0x800, 1977 6, 0x10000, 1978 3, 0xdc01, 1979 -1,0xd802, /* may read 3 or 6 bytes */ 1980 -1,0x10ffff,/* may read 0 or 3 bytes */ 1981 3, 0xfffc 1982 }; 1983 1984 /* error test input */ 1985 static const uint8_t in2[]={ 1986 0x61, 1987 0xc0, 0x80, /* illegal non-shortest form */ 1988 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1989 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1990 0xc0, 0xc0, /* illegal trail byte */ 1991 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ 1992 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ 1993 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ 1994 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1995 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1996 0xfe, /* illegal byte altogether */ 1997 0x62 1998 }; 1999 2000 /* expected error test results */ 2001 static const int32_t results2[]={ 2002 /* number of bytes read, code point */ 2003 1, 0x61, 2004 34, 0x62 2005 }; 2006 2007 UConverterToUCallback cb; 2008 const void *p; 2009 2010 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 2011 UErrorCode errorCode=U_ZERO_ERROR; 2012 UConverter *cnv=ucnv_open("CESU-8", &errorCode); 2013 if(U_FAILURE(errorCode)) { 2014 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); 2015 return; 2016 } 2017 TestNextUChar(cnv, source, limit, results, "CESU-8"); 2018 /* Test the condition when source >= sourceLimit */ 2019 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2020 2021 /* test error behavior with a skip callback */ 2022 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2023 source=(const char *)in2; 2024 limit=(const char *)(in2+sizeof(in2)); 2025 TestNextUChar(cnv, source, limit, results2, "CESU-8"); 2026 2027 ucnv_close(cnv); 2028 } 2029 2030 static void TestUTF16() { 2031 /* test input */ 2032 static const uint8_t in1[]={ 2033 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff 2034 }; 2035 static const uint8_t in2[]={ 2036 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff 2037 }; 2038 static const uint8_t in3[]={ 2039 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 2040 }; 2041 2042 /* expected test results */ 2043 static const int32_t results1[]={ 2044 /* number of bytes read, code point */ 2045 4, 0x4e00, 2046 2, 0xfeff 2047 }; 2048 static const int32_t results2[]={ 2049 /* number of bytes read, code point */ 2050 4, 0x004e, 2051 2, 0xfffe 2052 }; 2053 static const int32_t results3[]={ 2054 /* number of bytes read, code point */ 2055 2, 0xfefe, 2056 2, 0x4e00, 2057 2, 0xfeff, 2058 4, 0x20001 2059 }; 2060 2061 const char *source, *limit; 2062 2063 UErrorCode errorCode=U_ZERO_ERROR; 2064 UConverter *cnv=ucnv_open("UTF-16", &errorCode); 2065 if(U_FAILURE(errorCode)) { 2066 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); 2067 return; 2068 } 2069 2070 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2071 TestNextUChar(cnv, source, limit, results1, "UTF-16"); 2072 2073 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2074 ucnv_resetToUnicode(cnv); 2075 TestNextUChar(cnv, source, limit, results2, "UTF-16"); 2076 2077 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2078 ucnv_resetToUnicode(cnv); 2079 TestNextUChar(cnv, source, limit, results3, "UTF-16"); 2080 2081 /* Test the condition when source >= sourceLimit */ 2082 ucnv_resetToUnicode(cnv); 2083 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2084 2085 ucnv_close(cnv); 2086 } 2087 2088 static void TestUTF16BE() { 2089 /* test input */ 2090 static const uint8_t in[]={ 2091 0x00, 0x61, 2092 0x00, 0xc0, 2093 0x00, 0x31, 2094 0x00, 0xf4, 2095 0xce, 0xfe, 2096 0xd8, 0x01, 0xdc, 0x01 2097 }; 2098 2099 /* expected test results */ 2100 static const int32_t results[]={ 2101 /* number of bytes read, code point */ 2102 2, 0x61, 2103 2, 0xc0, 2104 2, 0x31, 2105 2, 0xf4, 2106 2, 0xcefe, 2107 4, 0x10401 2108 }; 2109 2110 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2111 UErrorCode errorCode=U_ZERO_ERROR; 2112 UConverter *cnv=ucnv_open("utf-16be", &errorCode); 2113 if(U_FAILURE(errorCode)) { 2114 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); 2115 return; 2116 } 2117 TestNextUChar(cnv, source, limit, results, "UTF-16BE"); 2118 /* Test the condition when source >= sourceLimit */ 2119 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2120 /*Test for the condition where there is an invalid character*/ 2121 { 2122 static const uint8_t source2[]={0x61}; 2123 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2124 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2125 } 2126 #if 0 2127 /* 2128 * Test disabled because currently the UTF-16BE/LE converters are supposed 2129 * to not set errors for unpaired surrogates. 2130 * This may change with 2131 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2132 */ 2133 2134 /*Test for the condition where there is a surrogate pair*/ 2135 { 2136 const uint8_t source2[]={0xd8, 0x01}; 2137 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2138 } 2139 #endif 2140 ucnv_close(cnv); 2141 } 2142 2143 static void 2144 TestUTF16LE() { 2145 /* test input */ 2146 static const uint8_t in[]={ 2147 0x61, 0x00, 2148 0x31, 0x00, 2149 0x4e, 0x2e, 2150 0x4e, 0x00, 2151 0x01, 0xd8, 0x01, 0xdc 2152 }; 2153 2154 /* expected test results */ 2155 static const int32_t results[]={ 2156 /* number of bytes read, code point */ 2157 2, 0x61, 2158 2, 0x31, 2159 2, 0x2e4e, 2160 2, 0x4e, 2161 4, 0x10401 2162 }; 2163 2164 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2165 UErrorCode errorCode=U_ZERO_ERROR; 2166 UConverter *cnv=ucnv_open("utf-16le", &errorCode); 2167 if(U_FAILURE(errorCode)) { 2168 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); 2169 return; 2170 } 2171 TestNextUChar(cnv, source, limit, results, "UTF-16LE"); 2172 /* Test the condition when source >= sourceLimit */ 2173 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2174 /*Test for the condition where there is an invalid character*/ 2175 { 2176 static const uint8_t source2[]={0x61}; 2177 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2178 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2179 } 2180 #if 0 2181 /* 2182 * Test disabled because currently the UTF-16BE/LE converters are supposed 2183 * to not set errors for unpaired surrogates. 2184 * This may change with 2185 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2186 */ 2187 2188 /*Test for the condition where there is a surrogate character*/ 2189 { 2190 static const uint8_t source2[]={0x01, 0xd8}; 2191 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2192 } 2193 #endif 2194 2195 ucnv_close(cnv); 2196 } 2197 2198 static void TestUTF32() { 2199 /* test input */ 2200 static const uint8_t in1[]={ 2201 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff 2202 }; 2203 static const uint8_t in2[]={ 2204 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 2205 }; 2206 static const uint8_t in3[]={ 2207 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 2208 }; 2209 2210 /* expected test results */ 2211 static const int32_t results1[]={ 2212 /* number of bytes read, code point */ 2213 8, 0x100f00, 2214 4, 0xfeff 2215 }; 2216 static const int32_t results2[]={ 2217 /* number of bytes read, code point */ 2218 8, 0x0f1000, 2219 4, 0xfffe 2220 }; 2221 static const int32_t results3[]={ 2222 /* number of bytes read, code point */ 2223 4, 0xfefe, 2224 4, 0x100f00, 2225 4, 0xfffd, /* unmatched surrogate */ 2226 4, 0xfffd /* unmatched surrogate */ 2227 }; 2228 2229 const char *source, *limit; 2230 2231 UErrorCode errorCode=U_ZERO_ERROR; 2232 UConverter *cnv=ucnv_open("UTF-32", &errorCode); 2233 if(U_FAILURE(errorCode)) { 2234 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); 2235 return; 2236 } 2237 2238 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2239 TestNextUChar(cnv, source, limit, results1, "UTF-32"); 2240 2241 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2242 ucnv_resetToUnicode(cnv); 2243 TestNextUChar(cnv, source, limit, results2, "UTF-32"); 2244 2245 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2246 ucnv_resetToUnicode(cnv); 2247 TestNextUChar(cnv, source, limit, results3, "UTF-32"); 2248 2249 /* Test the condition when source >= sourceLimit */ 2250 ucnv_resetToUnicode(cnv); 2251 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2252 2253 ucnv_close(cnv); 2254 } 2255 2256 static void 2257 TestUTF32BE() { 2258 /* test input */ 2259 static const uint8_t in[]={ 2260 0x00, 0x00, 0x00, 0x61, 2261 0x00, 0x00, 0x30, 0x61, 2262 0x00, 0x00, 0xdc, 0x00, 2263 0x00, 0x00, 0xd8, 0x00, 2264 0x00, 0x00, 0xdf, 0xff, 2265 0x00, 0x00, 0xff, 0xfe, 2266 0x00, 0x10, 0xab, 0xcd, 2267 0x00, 0x10, 0xff, 0xff 2268 }; 2269 2270 /* expected test results */ 2271 static const int32_t results[]={ 2272 /* number of bytes read, code point */ 2273 4, 0x61, 2274 4, 0x3061, 2275 4, 0xfffd, 2276 4, 0xfffd, 2277 4, 0xfffd, 2278 4, 0xfffe, 2279 4, 0x10abcd, 2280 4, 0x10ffff 2281 }; 2282 2283 /* error test input */ 2284 static const uint8_t in2[]={ 2285 0x00, 0x00, 0x00, 0x61, 2286 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 2287 0x00, 0x00, 0x00, 0x62, 2288 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2289 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 2290 0x00, 0x00, 0x01, 0x62, 2291 0x00, 0x00, 0x02, 0x62 2292 }; 2293 2294 /* expected error test results */ 2295 static const int32_t results2[]={ 2296 /* number of bytes read, code point */ 2297 4, 0x61, 2298 8, 0x62, 2299 12, 0x162, 2300 4, 0x262 2301 }; 2302 2303 UConverterToUCallback cb; 2304 const void *p; 2305 2306 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2307 UErrorCode errorCode=U_ZERO_ERROR; 2308 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); 2309 if(U_FAILURE(errorCode)) { 2310 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); 2311 return; 2312 } 2313 TestNextUChar(cnv, source, limit, results, "UTF-32BE"); 2314 2315 /* Test the condition when source >= sourceLimit */ 2316 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2317 2318 /* test error behavior with a skip callback */ 2319 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2320 source=(const char *)in2; 2321 limit=(const char *)(in2+sizeof(in2)); 2322 TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); 2323 2324 ucnv_close(cnv); 2325 } 2326 2327 static void 2328 TestUTF32LE() { 2329 /* test input */ 2330 static const uint8_t in[]={ 2331 0x61, 0x00, 0x00, 0x00, 2332 0x61, 0x30, 0x00, 0x00, 2333 0x00, 0xdc, 0x00, 0x00, 2334 0x00, 0xd8, 0x00, 0x00, 2335 0xff, 0xdf, 0x00, 0x00, 2336 0xfe, 0xff, 0x00, 0x00, 2337 0xcd, 0xab, 0x10, 0x00, 2338 0xff, 0xff, 0x10, 0x00 2339 }; 2340 2341 /* expected test results */ 2342 static const int32_t results[]={ 2343 /* number of bytes read, code point */ 2344 4, 0x61, 2345 4, 0x3061, 2346 4, 0xfffd, 2347 4, 0xfffd, 2348 4, 0xfffd, 2349 4, 0xfffe, 2350 4, 0x10abcd, 2351 4, 0x10ffff 2352 }; 2353 2354 /* error test input */ 2355 static const uint8_t in2[]={ 2356 0x61, 0x00, 0x00, 0x00, 2357 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 2358 0x62, 0x00, 0x00, 0x00, 2359 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2360 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 2361 0x62, 0x01, 0x00, 0x00, 2362 0x62, 0x02, 0x00, 0x00, 2363 }; 2364 2365 /* expected error test results */ 2366 static const int32_t results2[]={ 2367 /* number of bytes read, code point */ 2368 4, 0x61, 2369 8, 0x62, 2370 12, 0x162, 2371 4, 0x262, 2372 }; 2373 2374 UConverterToUCallback cb; 2375 const void *p; 2376 2377 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2378 UErrorCode errorCode=U_ZERO_ERROR; 2379 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); 2380 if(U_FAILURE(errorCode)) { 2381 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); 2382 return; 2383 } 2384 TestNextUChar(cnv, source, limit, results, "UTF-32LE"); 2385 2386 /* Test the condition when source >= sourceLimit */ 2387 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2388 2389 /* test error behavior with a skip callback */ 2390 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2391 source=(const char *)in2; 2392 limit=(const char *)(in2+sizeof(in2)); 2393 TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); 2394 2395 ucnv_close(cnv); 2396 } 2397 2398 static void 2399 TestLATIN1() { 2400 /* test input */ 2401 static const uint8_t in[]={ 2402 0x61, 2403 0x31, 2404 0x32, 2405 0xc0, 2406 0xf0, 2407 0xf4, 2408 }; 2409 2410 /* expected test results */ 2411 static const int32_t results[]={ 2412 /* number of bytes read, code point */ 2413 1, 0x61, 2414 1, 0x31, 2415 1, 0x32, 2416 1, 0xc0, 2417 1, 0xf0, 2418 1, 0xf4, 2419 }; 2420 static const uint16_t in1[] = { 2421 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2422 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2423 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2424 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2425 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2426 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2427 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2428 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2429 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2430 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2431 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2432 0xcb, 0x82 2433 }; 2434 static const uint8_t out1[] = { 2435 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2436 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2437 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2438 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2439 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2440 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2441 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2442 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2443 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2444 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2445 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2446 0xcb, 0x82 2447 }; 2448 static const uint16_t in2[]={ 2449 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2450 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2451 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2452 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2453 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2454 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2455 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2456 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2457 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2458 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2459 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2460 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2461 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2462 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2463 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2464 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2465 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2466 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2467 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2468 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2469 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2470 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2471 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2472 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2473 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2474 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2475 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2476 0x37, 0x20, 0x2A, 0x2F, 2477 }; 2478 static const unsigned char out2[]={ 2479 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2480 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2481 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2482 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2483 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2484 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2485 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2486 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2487 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2488 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2489 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2490 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2491 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2492 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2493 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2494 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2495 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2496 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2497 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2498 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2499 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2500 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2501 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2502 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2503 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2504 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2505 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2506 0x37, 0x20, 0x2A, 0x2F, 2507 }; 2508 const char *source=(const char *)in; 2509 const char *limit=(const char *)in+sizeof(in); 2510 2511 UErrorCode errorCode=U_ZERO_ERROR; 2512 UConverter *cnv=ucnv_open("LATIN_1", &errorCode); 2513 if(U_FAILURE(errorCode)) { 2514 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode)); 2515 return; 2516 } 2517 TestNextUChar(cnv, source, limit, results, "LATIN_1"); 2518 /* Test the condition when source >= sourceLimit */ 2519 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2520 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1)); 2521 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2)); 2522 2523 ucnv_close(cnv); 2524 } 2525 2526 static void 2527 TestSBCS() { 2528 /* test input */ 2529 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; 2530 /* expected test results */ 2531 static const int32_t results[]={ 2532 /* number of bytes read, code point */ 2533 1, 0x61, 2534 1, 0xbf, 2535 1, 0xc4, 2536 1, 0x2021, 2537 1, 0xf8ff, 2538 1, 0x00d9 2539 }; 2540 2541 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2542 UErrorCode errorCode=U_ZERO_ERROR; 2543 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); 2544 if(U_FAILURE(errorCode)) { 2545 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode)); 2546 return; 2547 } 2548 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); 2549 /* Test the condition when source >= sourceLimit */ 2550 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2551 /*Test for Illegal character */ /* 2552 { 2553 static const uint8_t input1[]={ 0xA1 }; 2554 const char* illegalsource=(const char*)input1; 2555 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte"); 2556 } 2557 */ 2558 ucnv_close(cnv); 2559 } 2560 2561 static void 2562 TestDBCS() { 2563 /* test input */ 2564 static const uint8_t in[]={ 2565 0x44, 0x6a, 2566 0xc4, 0x9c, 2567 0x7a, 0x74, 2568 0x46, 0xab, 2569 0x42, 0x5b, 2570 2571 }; 2572 2573 /* expected test results */ 2574 static const int32_t results[]={ 2575 /* number of bytes read, code point */ 2576 2, 0x00a7, 2577 2, 0xe1d2, 2578 2, 0x6962, 2579 2, 0xf842, 2580 2, 0xffe5, 2581 }; 2582 2583 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2584 UErrorCode errorCode=U_ZERO_ERROR; 2585 2586 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); 2587 if(U_FAILURE(errorCode)) { 2588 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode)); 2589 return; 2590 } 2591 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); 2592 /* Test the condition when source >= sourceLimit */ 2593 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2594 /*Test for the condition where there is an invalid character*/ 2595 { 2596 static const uint8_t source2[]={0x1a, 0x1b}; 2597 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2598 } 2599 /*Test for the condition where we have a truncated char*/ 2600 { 2601 static const uint8_t source1[]={0xc4}; 2602 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2603 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2604 } 2605 ucnv_close(cnv); 2606 } 2607 2608 static void 2609 TestMBCS() { 2610 /* test input */ 2611 static const uint8_t in[]={ 2612 0x01, 2613 0xa6, 0xa3, 2614 0x00, 2615 0xa6, 0xa1, 2616 0x08, 2617 0xc2, 0x76, 2618 0xc2, 0x78, 2619 2620 }; 2621 2622 /* expected test results */ 2623 static const int32_t results[]={ 2624 /* number of bytes read, code point */ 2625 1, 0x0001, 2626 2, 0x250c, 2627 1, 0x0000, 2628 2, 0x2500, 2629 1, 0x0008, 2630 2, 0xd60c, 2631 2, 0xd60e, 2632 }; 2633 2634 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2635 UErrorCode errorCode=U_ZERO_ERROR; 2636 2637 UConverter *cnv=ucnv_open("ibm-1363", &errorCode); 2638 if(U_FAILURE(errorCode)) { 2639 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode)); 2640 return; 2641 } 2642 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); 2643 /* Test the condition when source >= sourceLimit */ 2644 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2645 /*Test for the condition where there is an invalid character*/ 2646 { 2647 static const uint8_t source2[]={0xa1, 0x80}; 2648 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2649 } 2650 /*Test for the condition where we have a truncated char*/ 2651 { 2652 static const uint8_t source1[]={0xc4}; 2653 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2654 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2655 } 2656 ucnv_close(cnv); 2657 2658 } 2659 2660 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 2661 static void 2662 TestICCRunout() { 2663 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */ 2664 2665 const char *cnvName = "ibm-1363"; 2666 UErrorCode status = U_ZERO_ERROR; 2667 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 }; 2668 /* UChar expectUData[] = { 0x00a1, 0x001a }; */ 2669 const char *source = sourceData; 2670 const char *sourceLim = sourceData+sizeof(sourceData); 2671 UChar c1, c2, c3; 2672 UConverter *cnv=ucnv_open(cnvName, &status); 2673 if(U_FAILURE(status)) { 2674 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status)); 2675 return; 2676 } 2677 2678 #if 0 2679 { 2680 UChar targetBuf[256]; 2681 UChar *target = targetBuf; 2682 UChar *targetLim = target+256; 2683 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status); 2684 2685 log_info("After convert: target@%d, source@%d, status%s\n", 2686 target-targetBuf, source-sourceData, u_errorName(status)); 2687 2688 if(U_FAILURE(status)) { 2689 log_err("Failed to convert: %s\n", u_errorName(status)); 2690 } else { 2691 2692 } 2693 } 2694 #endif 2695 2696 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2697 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status)); 2698 2699 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2700 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status)); 2701 2702 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2703 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status)); 2704 2705 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { 2706 log_verbose("OK\n"); 2707 } else { 2708 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n"); 2709 } 2710 2711 ucnv_close(cnv); 2712 2713 } 2714 #endif 2715 2716 #ifdef U_ENABLE_GENERIC_ISO_2022 2717 2718 static void 2719 TestISO_2022() { 2720 /* test input */ 2721 static const uint8_t in[]={ 2722 0x1b, 0x25, 0x42, 2723 0x31, 2724 0x32, 2725 0x61, 2726 0xc2, 0x80, 2727 0xe0, 0xa0, 0x80, 2728 0xf0, 0x90, 0x80, 0x80 2729 }; 2730 2731 2732 2733 /* expected test results */ 2734 static const int32_t results[]={ 2735 /* number of bytes read, code point */ 2736 4, 0x0031, /* 4 bytes including the escape sequence */ 2737 1, 0x0032, 2738 1, 0x61, 2739 2, 0x80, 2740 3, 0x800, 2741 4, 0x10000 2742 }; 2743 2744 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2745 UErrorCode errorCode=U_ZERO_ERROR; 2746 UConverter *cnv; 2747 2748 cnv=ucnv_open("ISO_2022", &errorCode); 2749 if(U_FAILURE(errorCode)) { 2750 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 2751 return; 2752 } 2753 TestNextUChar(cnv, source, limit, results, "ISO_2022"); 2754 2755 /* Test the condition when source >= sourceLimit */ 2756 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source"); 2757 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2758 /*Test for the condition where we have a truncated char*/ 2759 { 2760 static const uint8_t source1[]={0xc4}; 2761 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2762 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2763 } 2764 /*Test for the condition where there is an invalid character*/ 2765 { 2766 static const uint8_t source2[]={0xa1, 0x01}; 2767 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); 2768 } 2769 ucnv_close(cnv); 2770 } 2771 2772 #endif 2773 2774 static void 2775 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2776 const UChar* uSource; 2777 const UChar* uSourceLimit; 2778 const char* cSource; 2779 const char* cSourceLimit; 2780 UChar *uTargetLimit =NULL; 2781 UChar *uTarget; 2782 char *cTarget; 2783 const char *cTargetLimit; 2784 char *cBuf; 2785 UChar *uBuf; /*,*test;*/ 2786 int32_t uBufSize = 120; 2787 int len=0; 2788 int i=2; 2789 UErrorCode errorCode=U_ZERO_ERROR; 2790 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2791 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2792 ucnv_reset(cnv); 2793 for(;--i>0; ){ 2794 uSource = (UChar*) source; 2795 uSourceLimit=(const UChar*)sourceLimit; 2796 cTarget = cBuf; 2797 uTarget = uBuf; 2798 cSource = cBuf; 2799 cTargetLimit = cBuf; 2800 uTargetLimit = uBuf; 2801 2802 do{ 2803 2804 cTargetLimit = cTargetLimit+ i; 2805 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2806 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2807 errorCode=U_ZERO_ERROR; 2808 continue; 2809 } 2810 2811 if(U_FAILURE(errorCode)){ 2812 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2813 return; 2814 } 2815 2816 }while (uSource<uSourceLimit); 2817 2818 cSourceLimit =cTarget; 2819 do{ 2820 uTargetLimit=uTargetLimit+i; 2821 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2822 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2823 errorCode=U_ZERO_ERROR; 2824 continue; 2825 } 2826 if(U_FAILURE(errorCode)){ 2827 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2828 return; 2829 } 2830 }while(cSource<cSourceLimit); 2831 2832 uSource = source; 2833 /*test =uBuf;*/ 2834 for(len=0;len<(int)(source - sourceLimit);len++){ 2835 if(uBuf[len]!=uSource[len]){ 2836 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2837 } 2838 } 2839 } 2840 free(uBuf); 2841 free(cBuf); 2842 } 2843 /* Test for Jitterbug 778 */ 2844 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2845 const UChar* uSource; 2846 const UChar* uSourceLimit; 2847 const char* cSource; 2848 UChar *uTargetLimit =NULL; 2849 UChar *uTarget; 2850 char *cTarget; 2851 const char *cTargetLimit; 2852 char *cBuf; 2853 UChar *uBuf,*test; 2854 int32_t uBufSize = 120; 2855 int numCharsInTarget=0; 2856 UErrorCode errorCode=U_ZERO_ERROR; 2857 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2858 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 2859 uSource = source; 2860 uSourceLimit=sourceLimit; 2861 cTarget = cBuf; 2862 cTargetLimit = cBuf +uBufSize*5; 2863 uTarget = uBuf; 2864 uTargetLimit = uBuf+ uBufSize*5; 2865 ucnv_reset(cnv); 2866 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); 2867 if(U_FAILURE(errorCode)){ 2868 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2869 return; 2870 } 2871 cSource = cBuf; 2872 test =uBuf; 2873 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode); 2874 if(U_FAILURE(errorCode)){ 2875 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode)); 2876 return; 2877 } 2878 uSource = source; 2879 while(uSource<uSourceLimit){ 2880 if(*test!=*uSource){ 2881 2882 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 2883 } 2884 uSource++; 2885 test++; 2886 } 2887 free(uBuf); 2888 free(cBuf); 2889 } 2890 2891 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2892 const UChar* uSource; 2893 const UChar* uSourceLimit; 2894 const char* cSource; 2895 const char* cSourceLimit; 2896 UChar *uTargetLimit =NULL; 2897 UChar *uTarget; 2898 char *cTarget; 2899 const char *cTargetLimit; 2900 char *cBuf; 2901 UChar *uBuf; /*,*test;*/ 2902 int32_t uBufSize = 120; 2903 int len=0; 2904 int i=2; 2905 const UChar *temp = sourceLimit; 2906 UErrorCode errorCode=U_ZERO_ERROR; 2907 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2908 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2909 2910 ucnv_reset(cnv); 2911 for(;--i>0;){ 2912 uSource = (UChar*) source; 2913 cTarget = cBuf; 2914 uTarget = uBuf; 2915 cSource = cBuf; 2916 cTargetLimit = cBuf; 2917 uTargetLimit = uBuf+uBufSize*5; 2918 cTargetLimit = cTargetLimit+uBufSize*10; 2919 uSourceLimit=uSource; 2920 do{ 2921 2922 if (uSourceLimit < sourceLimit) { 2923 uSourceLimit = uSourceLimit+1; 2924 } 2925 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2926 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2927 errorCode=U_ZERO_ERROR; 2928 continue; 2929 } 2930 2931 if(U_FAILURE(errorCode)){ 2932 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2933 return; 2934 } 2935 2936 }while (uSource<temp); 2937 2938 cSourceLimit =cBuf; 2939 do{ 2940 if (cSourceLimit < cBuf + (cTarget - cBuf)) { 2941 cSourceLimit = cSourceLimit+1; 2942 } 2943 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2944 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2945 errorCode=U_ZERO_ERROR; 2946 continue; 2947 } 2948 if(U_FAILURE(errorCode)){ 2949 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2950 return; 2951 } 2952 }while(cSource<cTarget); 2953 2954 uSource = source; 2955 /*test =uBuf;*/ 2956 for(;len<(int)(source - sourceLimit);len++){ 2957 if(uBuf[len]!=uSource[len]){ 2958 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2959 } 2960 } 2961 } 2962 free(uBuf); 2963 free(cBuf); 2964 } 2965 static void 2966 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, 2967 const uint16_t results[], const char* message){ 2968 /* const char* s0; */ 2969 const char* s=(char*)source; 2970 const uint16_t *r=results; 2971 UErrorCode errorCode=U_ZERO_ERROR; 2972 uint32_t c,exC; 2973 ucnv_reset(cnv); 2974 while(s<limit) { 2975 /* s0=s; */ 2976 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 2977 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 2978 break; /* no more significant input */ 2979 } else if(U_FAILURE(errorCode)) { 2980 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 2981 break; 2982 } else { 2983 if(U16_IS_LEAD(*r)){ 2984 int i =0, len = 2; 2985 U16_NEXT(r, i, len, exC); 2986 r++; 2987 }else{ 2988 exC = *r; 2989 } 2990 if(c!=(uint32_t)(exC)) 2991 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c); 2992 } 2993 r++; 2994 } 2995 } 2996 2997 static int TestJitterbug930(const char* enc){ 2998 UErrorCode err = U_ZERO_ERROR; 2999 UConverter*converter; 3000 char out[80]; 3001 char*target = out; 3002 UChar in[4]; 3003 const UChar*source = in; 3004 int32_t off[80]; 3005 int32_t* offsets = off; 3006 int numOffWritten=0; 3007 UBool flush = 0; 3008 converter = my_ucnv_open(enc, &err); 3009 3010 in[0] = 0x41; /* 0x4E00;*/ 3011 in[1] = 0x4E01; 3012 in[2] = 0x4E02; 3013 in[3] = 0x4E03; 3014 3015 memset(off, '*', sizeof(off)); 3016 3017 ucnv_fromUnicode (converter, 3018 &target, 3019 target+2, 3020 &source, 3021 source+3, 3022 offsets, 3023 flush, 3024 &err); 3025 3026 /* writes three bytes into the output buffer: 41 1B 24 3027 * but offsets contains 0 1 1 3028 */ 3029 while(*offsets< off[10]){ 3030 numOffWritten++; 3031 offsets++; 3032 } 3033 log_verbose("Testing Jitterbug 930 for encoding %s",enc); 3034 if(numOffWritten!= (int)(target-out)){ 3035 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten); 3036 } 3037 3038 err = U_ZERO_ERROR; 3039 3040 memset(off,'*' , sizeof(off)); 3041 3042 flush = 1; 3043 offsets=off; 3044 ucnv_fromUnicode (converter, 3045 &target, 3046 target+4, 3047 &source, 3048 source, 3049 offsets, 3050 flush, 3051 &err); 3052 numOffWritten=0; 3053 while(*offsets< off[10]){ 3054 numOffWritten++; 3055 if(*offsets!= -1){ 3056 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ; 3057 } 3058 offsets++; 3059 } 3060 3061 /* writes 42 43 7A into output buffer, 3062 * offsets contains -1 -1 -1 3063 */ 3064 ucnv_close(converter); 3065 return 0; 3066 } 3067 3068 static void 3069 TestHZ() { 3070 /* test input */ 3071 static const uint16_t in[]={ 3072 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 3073 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 3074 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 3075 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 3076 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 3077 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 3078 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 3079 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 3080 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 3081 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 3082 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 3083 0x005A, 0x005B, 0x005C, 0x000A 3084 }; 3085 const UChar* uSource; 3086 const UChar* uSourceLimit; 3087 const char* cSource; 3088 const char* cSourceLimit; 3089 UChar *uTargetLimit =NULL; 3090 UChar *uTarget; 3091 char *cTarget; 3092 const char *cTargetLimit; 3093 char *cBuf; 3094 UChar *uBuf,*test; 3095 int32_t uBufSize = 120; 3096 UErrorCode errorCode=U_ZERO_ERROR; 3097 UConverter *cnv; 3098 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3099 int32_t* myOff= offsets; 3100 cnv=ucnv_open("HZ", &errorCode); 3101 if(U_FAILURE(errorCode)) { 3102 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)); 3103 return; 3104 } 3105 3106 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3107 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3108 uSource = (const UChar*)in; 3109 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3110 cTarget = cBuf; 3111 cTargetLimit = cBuf +uBufSize*5; 3112 uTarget = uBuf; 3113 uTargetLimit = uBuf+ uBufSize*5; 3114 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3115 if(U_FAILURE(errorCode)){ 3116 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3117 return; 3118 } 3119 cSource = cBuf; 3120 cSourceLimit =cTarget; 3121 test =uBuf; 3122 myOff=offsets; 3123 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3124 if(U_FAILURE(errorCode)){ 3125 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3126 return; 3127 } 3128 uSource = (const UChar*)in; 3129 while(uSource<uSourceLimit){ 3130 if(*test!=*uSource){ 3131 3132 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3133 } 3134 uSource++; 3135 test++; 3136 } 3137 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); 3138 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3139 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3140 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3141 TestJitterbug930("csISO2022JP"); 3142 ucnv_close(cnv); 3143 free(offsets); 3144 free(uBuf); 3145 free(cBuf); 3146 } 3147 3148 static void 3149 TestISCII(){ 3150 /* test input */ 3151 static const uint16_t in[]={ 3152 /* test full range of Devanagari */ 3153 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, 3154 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, 3155 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, 3156 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, 3157 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, 3158 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, 3159 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, 3160 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, 3161 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, 3162 0x096D,0x096E,0x096F, 3163 /* test Soft halant*/ 3164 0x0915,0x094d, 0x200D, 3165 /* test explicit halant */ 3166 0x0915,0x094d, 0x200c, 3167 /* test double danda */ 3168 0x965, 3169 /* test ASCII */ 3170 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3171 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3172 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3173 /* tests from Lotus */ 3174 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, 3175 0x0930,0x094D,0x200D, 3176 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, 3177 0x0915,0x0921,0x002B,0x095F, 3178 /* tamil range */ 3179 0x0B86, 0xB87, 0xB88, 3180 /* telugu range */ 3181 0x0C05, 0x0C02, 0x0C03,0x0c31, 3182 /* kannada range */ 3183 0x0C85, 0xC82, 0x0C83, 3184 /* test Abbr sign and Anudatta */ 3185 0x0970, 0x952, 3186 /* 0x0958, 3187 0x0959, 3188 0x095A, 3189 0x095B, 3190 0x095C, 3191 0x095D, 3192 0x095E, 3193 0x095F,*/ 3194 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, 3195 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, 3196 0x090C , 3197 0x0962, 3198 0x0961 /* Vocallic LL 0xa6, 0xE9 */, 3199 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, 3200 0x0950 /* OM Symbol 0xa1, 0xE9,*/, 3201 0x093D /* Avagraha 0xEA, 0xE9*/, 3202 0x0958, 3203 0x0959, 3204 0x095A, 3205 0x095B, 3206 0x095C, 3207 0x095D, 3208 0x095E, 3209 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 3210 }; 3211 static const unsigned char byteArr[]={ 3212 3213 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, 3214 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, 3215 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, 3216 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, 3217 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, 3218 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 3219 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, 3220 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, 3221 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, 3222 0xf8,0xf9,0xfa, 3223 /* test soft halant */ 3224 0xb3, 0xE8, 0xE9, 3225 /* test explicit halant */ 3226 0xb3, 0xE8, 0xE8, 3227 /* test double danda */ 3228 0xea, 0xea, 3229 /* test ASCII */ 3230 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3231 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3232 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3233 /* test ATR code */ 3234 3235 /* tests from Lotus */ 3236 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, 3237 0xEF,0x42,0xCF,0xE8,0xD9, 3238 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, 3239 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, 3240 /* tamil range */ 3241 0xEF, 0x44, 0xa5, 0xa6, 0xa7, 3242 /* telugu range */ 3243 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, 3244 /* kannada range */ 3245 0xEF, 0x48,0xa4, 0xa2, 0xa3, 3246 /* anudatta and abbreviation sign */ 3247 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, 3248 3249 3250 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ 3251 3252 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ 3253 3254 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ 3255 3256 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ 3257 3258 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ 3259 3260 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ 3261 3262 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ 3263 3264 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ 3265 3266 0xB3, 0xE9, /* Ka + NUKTA */ 3267 3268 0xB4, 0xE9, /* Kha + NUKTA */ 3269 3270 0xB5, 0xE9, /* Ga + NUKTA */ 3271 3272 0xBA, 0xE9, 3273 3274 0xBF, 0xE9, 3275 3276 0xC0, 0xE9, 3277 3278 0xC9, 0xE9, 3279 /* INV halant RA */ 3280 0xD9, 0xE8, 0xCF, 3281 0x00, 0x00A0, 3282 /* just consume unhandled codepoints */ 3283 0xEF, 0x30, 3284 3285 }; 3286 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE); 3287 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr)); 3288 3289 } 3290 3291 static void 3292 TestISO_2022_JP() { 3293 /* test input */ 3294 static const uint16_t in[]={ 3295 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, 3296 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3297 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3298 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3299 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3300 0x201D, 0x3014, 0x000D, 0x000A, 3301 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3302 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3303 }; 3304 const UChar* uSource; 3305 const UChar* uSourceLimit; 3306 const char* cSource; 3307 const char* cSourceLimit; 3308 UChar *uTargetLimit =NULL; 3309 UChar *uTarget; 3310 char *cTarget; 3311 const char *cTargetLimit; 3312 char *cBuf; 3313 UChar *uBuf,*test; 3314 int32_t uBufSize = 120; 3315 UErrorCode errorCode=U_ZERO_ERROR; 3316 UConverter *cnv; 3317 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3318 int32_t* myOff= offsets; 3319 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3320 if(U_FAILURE(errorCode)) { 3321 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode)); 3322 return; 3323 } 3324 3325 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3326 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3327 uSource = (const UChar*)in; 3328 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3329 cTarget = cBuf; 3330 cTargetLimit = cBuf +uBufSize*5; 3331 uTarget = uBuf; 3332 uTargetLimit = uBuf+ uBufSize*5; 3333 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3334 if(U_FAILURE(errorCode)){ 3335 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3336 return; 3337 } 3338 cSource = cBuf; 3339 cSourceLimit =cTarget; 3340 test =uBuf; 3341 myOff=offsets; 3342 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3343 if(U_FAILURE(errorCode)){ 3344 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3345 return; 3346 } 3347 3348 uSource = (const UChar*)in; 3349 while(uSource<uSourceLimit){ 3350 if(*test!=*uSource){ 3351 3352 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3353 } 3354 uSource++; 3355 test++; 3356 } 3357 3358 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3359 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3360 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); 3361 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3362 TestJitterbug930("csISO2022JP"); 3363 ucnv_close(cnv); 3364 free(uBuf); 3365 free(cBuf); 3366 free(offsets); 3367 } 3368 3369 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){ 3370 const UChar* uSource; 3371 const UChar* uSourceLimit; 3372 const char* cSource; 3373 const char* cSourceLimit; 3374 UChar *uTargetLimit =NULL; 3375 UChar *uTarget; 3376 char *cTarget; 3377 const char *cTargetLimit; 3378 char *cBuf; 3379 UChar *uBuf,*test; 3380 int32_t uBufSize = 120*10; 3381 UErrorCode errorCode=U_ZERO_ERROR; 3382 UConverter *cnv; 3383 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); 3384 int32_t* myOff= offsets; 3385 cnv=my_ucnv_open(conv, &errorCode); 3386 if(U_FAILURE(errorCode)) { 3387 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode)); 3388 return; 3389 } 3390 3391 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); 3392 cBuf =(char*)malloc(uBufSize * sizeof(char)); 3393 uSource = (const UChar*)in; 3394 uSourceLimit=uSource+len; 3395 cTarget = cBuf; 3396 cTargetLimit = cBuf +uBufSize; 3397 uTarget = uBuf; 3398 uTargetLimit = uBuf+ uBufSize; 3399 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3400 if(U_FAILURE(errorCode)){ 3401 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3402 return; 3403 } 3404 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/ 3405 cSource = cBuf; 3406 cSourceLimit =cTarget; 3407 test =uBuf; 3408 myOff=offsets; 3409 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3410 if(U_FAILURE(errorCode)){ 3411 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode)); 3412 return; 3413 } 3414 3415 uSource = (const UChar*)in; 3416 while(uSource<uSourceLimit){ 3417 if(*test!=*uSource){ 3418 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ; 3419 } 3420 uSource++; 3421 test++; 3422 } 3423 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); 3424 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); 3425 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); 3426 if(byteArr && byteArrLen!=0){ 3427 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); 3428 TestToAndFromUChars(in,(const UChar*)&in[len],cnv); 3429 { 3430 cSource = byteArr; 3431 cSourceLimit = cSource+byteArrLen; 3432 test=uBuf; 3433 myOff = offsets; 3434 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3435 if(U_FAILURE(errorCode)){ 3436 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3437 return; 3438 } 3439 3440 uSource = (const UChar*)in; 3441 while(uSource<uSourceLimit){ 3442 if(*test!=*uSource){ 3443 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3444 } 3445 uSource++; 3446 test++; 3447 } 3448 } 3449 } 3450 3451 ucnv_close(cnv); 3452 free(uBuf); 3453 free(cBuf); 3454 free(offsets); 3455 } 3456 static UChar U_CALLCONV 3457 _charAt(int32_t offset, void *context) { 3458 return ((char*)context)[offset]; 3459 } 3460 3461 static int32_t 3462 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){ 3463 int32_t srcIndex=0; 3464 int32_t dstIndex=0; 3465 if(U_FAILURE(*status)){ 3466 return 0; 3467 } 3468 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ 3469 *status = U_ILLEGAL_ARGUMENT_ERROR; 3470 return 0; 3471 } 3472 if(srcLen==-1){ 3473 srcLen = (int32_t)uprv_strlen(src); 3474 } 3475 3476 for (; srcIndex<srcLen; ) { 3477 UChar32 c = src[srcIndex++]; 3478 if (c == 0x005C /*'\\'*/) { 3479 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/ 3480 if (c == (UChar32)0xFFFFFFFF) { 3481 *status=U_INVALID_CHAR_FOUND; /* return empty string */ 3482 break; /* invalid escape sequence */ 3483 } 3484 } 3485 if(dstIndex < dstLen){ 3486 if(c>0xFFFF){ 3487 dst[dstIndex++] = U16_LEAD(c); 3488 if(dstIndex<dstLen){ 3489 dst[dstIndex]=U16_TRAIL(c); 3490 }else{ 3491 *status=U_BUFFER_OVERFLOW_ERROR; 3492 } 3493 }else{ 3494 dst[dstIndex]=(UChar)c; 3495 } 3496 3497 }else{ 3498 *status = U_BUFFER_OVERFLOW_ERROR; 3499 } 3500 dstIndex++; /* for preflighting */ 3501 } 3502 return dstIndex; 3503 } 3504 3505 static void 3506 TestFullRoundtrip(const char* cp){ 3507 UChar usource[10] ={0}; 3508 UChar nsrc[10] = {0}; 3509 uint32_t i=1; 3510 int len=0, ulen; 3511 nsrc[0]=0x0061; 3512 /* Test codepoint 0 */ 3513 TestConv(usource,1,cp,"",NULL,0); 3514 TestConv(usource,2,cp,"",NULL,0); 3515 nsrc[2]=0x5555; 3516 TestConv(nsrc,3,cp,"",NULL,0); 3517 3518 for(;i<=0x10FFFF;i++){ 3519 if(i==0xD800){ 3520 i=0xDFFF; 3521 continue; 3522 } 3523 if(i<=0xFFFF){ 3524 usource[0] =(UChar) i; 3525 len=1; 3526 }else{ 3527 usource[0]=U16_LEAD(i); 3528 usource[1]=U16_TRAIL(i); 3529 len=2; 3530 } 3531 ulen=len; 3532 if(i==0x80) { 3533 usource[2]=0; 3534 } 3535 /* Test only single code points */ 3536 TestConv(usource,ulen,cp,"",NULL,0); 3537 /* Test codepoint repeated twice */ 3538 usource[ulen]=usource[0]; 3539 usource[ulen+1]=usource[1]; 3540 ulen+=len; 3541 TestConv(usource,ulen,cp,"",NULL,0); 3542 /* Test codepoint repeated 3 times */ 3543 usource[ulen]=usource[0]; 3544 usource[ulen+1]=usource[1]; 3545 ulen+=len; 3546 TestConv(usource,ulen,cp,"",NULL,0); 3547 /* Test codepoint in between 2 codepoints */ 3548 nsrc[1]=usource[0]; 3549 nsrc[2]=usource[1]; 3550 nsrc[len+1]=0x5555; 3551 TestConv(nsrc,len+2,cp,"",NULL,0); 3552 uprv_memset(usource,0,sizeof(UChar)*10); 3553 } 3554 } 3555 3556 static void 3557 TestRoundTrippingAllUTF(void){ 3558 if(!getTestOption(QUICK_OPTION)){ 3559 log_verbose("Running exhaustive round trip test for BOCU-1\n"); 3560 TestFullRoundtrip("BOCU-1"); 3561 log_verbose("Running exhaustive round trip test for SCSU\n"); 3562 TestFullRoundtrip("SCSU"); 3563 log_verbose("Running exhaustive round trip test for UTF-8\n"); 3564 TestFullRoundtrip("UTF-8"); 3565 log_verbose("Running exhaustive round trip test for CESU-8\n"); 3566 TestFullRoundtrip("CESU-8"); 3567 log_verbose("Running exhaustive round trip test for UTF-16BE\n"); 3568 TestFullRoundtrip("UTF-16BE"); 3569 log_verbose("Running exhaustive round trip test for UTF-16LE\n"); 3570 TestFullRoundtrip("UTF-16LE"); 3571 log_verbose("Running exhaustive round trip test for UTF-16\n"); 3572 TestFullRoundtrip("UTF-16"); 3573 log_verbose("Running exhaustive round trip test for UTF-32BE\n"); 3574 TestFullRoundtrip("UTF-32BE"); 3575 log_verbose("Running exhaustive round trip test for UTF-32LE\n"); 3576 TestFullRoundtrip("UTF-32LE"); 3577 log_verbose("Running exhaustive round trip test for UTF-32\n"); 3578 TestFullRoundtrip("UTF-32"); 3579 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3580 TestFullRoundtrip("UTF-7"); 3581 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3582 TestFullRoundtrip("UTF-7,version=1"); 3583 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"); 3584 TestFullRoundtrip("IMAP-mailbox-name"); 3585 /* 3586 * 3587 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of 3588 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA). 3589 * The old mappings remain as fallbacks. 3590 * This test may be reintroduced at a later time. 3591 * 3592 * 110118 - mow 3593 */ 3594 /* 3595 log_verbose("Running exhaustive round trip test for GB18030\n"); 3596 TestFullRoundtrip("GB18030"); 3597 */ 3598 } 3599 } 3600 3601 static void 3602 TestSCSU() { 3603 3604 static const uint16_t germanUTF16[]={ 3605 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 3606 }; 3607 3608 static const uint8_t germanSCSU[]={ 3609 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 3610 }; 3611 3612 static const uint16_t russianUTF16[]={ 3613 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 3614 }; 3615 3616 static const uint8_t russianSCSU[]={ 3617 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 3618 }; 3619 3620 static const uint16_t japaneseUTF16[]={ 3621 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 3622 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 3623 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 3624 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 3625 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 3626 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 3627 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 3628 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 3629 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 3630 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 3631 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 3632 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 3633 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 3634 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 3635 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 3636 }; 3637 3638 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 3639 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */ 3640 static const uint8_t japaneseSCSU[]={ 3641 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 3642 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 3643 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 3644 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 3645 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 3646 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 3647 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 3648 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 3649 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 3650 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 3651 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 3652 0xcb, 0x82 3653 }; 3654 3655 static const uint16_t allFeaturesUTF16[]={ 3656 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 3657 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 3658 0x01df, 0xf000, 0xdbff, 0xdfff 3659 }; 3660 3661 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter 3662 * result here (34B vs. 35B) 3663 */ 3664 static const uint8_t allFeaturesSCSU[]={ 3665 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, 3666 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, 3667 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, 3668 0xdf, 0x14, 0x80, 0x15, 0xff 3669 }; 3670 static const uint16_t monkeyIn[]={ 3671 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3672 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3673 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3674 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3675 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3676 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3677 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3678 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3679 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3680 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3681 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3682 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3683 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3684 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3685 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3686 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3687 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3688 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3689 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 3690 /* test non-BMP code points */ 3691 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 3692 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 3693 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 3694 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 3695 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 3696 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 3697 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 3698 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 3699 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 3700 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 3701 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 3702 3703 3704 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 3705 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 3706 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 3707 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 3708 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 3709 }; 3710 static const char *fTestCases [] = { 3711 "\\ud800\\udc00", /* smallest surrogate*/ 3712 "\\ud8ff\\udcff", 3713 "\\udBff\\udFff", /* largest surrogate pair*/ 3714 "\\ud834\\udc00", 3715 "\\U0010FFFF", 3716 "Hello \\u9292 \\u9192 World!", 3717 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", 3718 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3719 3720 "\\u0648\\u06c8", /* catch missing reset*/ 3721 "\\u0648\\u06c8", 3722 3723 "\\u4444\\uE001", /* lowest quotable*/ 3724 "\\u4444\\uf2FF", /* highest quotable*/ 3725 "\\u4444\\uf188\\u4444", 3726 "\\u4444\\uf188\\uf288", 3727 "\\u4444\\uf188abc\\u0429\\uf288", 3728 "\\u9292\\u2222", 3729 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", 3730 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3731 "Hello World!123456", 3732 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ 3733 3734 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ 3735 "abc\\u4411d", /* uses SQU*/ 3736 "abc\\u4411\\u4412d",/* uses SCU*/ 3737 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ 3738 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/ 3739 "\\u9292\\u2222", 3740 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", 3741 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c", 3742 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", 3743 3744 "", /* empty input*/ 3745 "\\u0000", /* smallest BMP character*/ 3746 "\\uFFFF", /* largest BMP character*/ 3747 3748 /* regression tests*/ 3749 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", 3750 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff", 3751 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", 3752 "\\u0041\\u00df\\u0401\\u015f", 3753 "\\u9066\\u2123abc", 3754 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5", 3755 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489", 3756 }; 3757 int i=0; 3758 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){ 3759 const char* cSrc = fTestCases[i]; 3760 UErrorCode status = U_ZERO_ERROR; 3761 int32_t cSrcLen,srcLen; 3762 UChar* src; 3763 /* UConverter* cnv = ucnv_open("SCSU",&status); */ 3764 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); 3765 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); 3766 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); 3767 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); 3768 TestConv(src,srcLen,"SCSU","Coverage",NULL,0); 3769 free(src); 3770 } 3771 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3772 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3773 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3774 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3775 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU)); 3776 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU)); 3777 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); 3778 } 3779 3780 #if !UCONFIG_NO_LEGACY_CONVERSION 3781 static void TestJitterbug2346(){ 3782 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, 3783 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; 3784 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; 3785 3786 UChar uTarget[500]={'\0'}; 3787 UChar* utarget=uTarget; 3788 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 3789 3790 char cTarget[500]={'\0'}; 3791 char* ctarget=cTarget; 3792 char* ctargetLimit=cTarget+sizeof(cTarget); 3793 const char* csource=source; 3794 UChar* temp = expected; 3795 UErrorCode err=U_ZERO_ERROR; 3796 3797 UConverter* conv =ucnv_open("ISO_2022_JP",&err); 3798 if(U_FAILURE(err)) { 3799 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 3800 return; 3801 } 3802 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err); 3803 if(U_FAILURE(err)) { 3804 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err)); 3805 return; 3806 } 3807 utargetLimit=utarget; 3808 utarget = uTarget; 3809 while(utarget<utargetLimit){ 3810 if(*temp!=*utarget){ 3811 3812 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ; 3813 } 3814 utarget++; 3815 temp++; 3816 } 3817 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 3818 if(U_FAILURE(err)) { 3819 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err)); 3820 return; 3821 } 3822 ctargetLimit=ctarget; 3823 ctarget =cTarget; 3824 ucnv_close(conv); 3825 3826 3827 } 3828 3829 static void 3830 TestISO_2022_JP_1() { 3831 /* test input */ 3832 static const uint16_t in[]={ 3833 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A, 3834 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3835 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A, 3836 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3837 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3838 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3839 0x201D, 0x000D, 0x000A, 3840 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3841 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A, 3842 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3843 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A, 3844 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A, 3845 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A 3846 }; 3847 const UChar* uSource; 3848 const UChar* uSourceLimit; 3849 const char* cSource; 3850 const char* cSourceLimit; 3851 UChar *uTargetLimit =NULL; 3852 UChar *uTarget; 3853 char *cTarget; 3854 const char *cTargetLimit; 3855 char *cBuf; 3856 UChar *uBuf,*test; 3857 int32_t uBufSize = 120; 3858 UErrorCode errorCode=U_ZERO_ERROR; 3859 UConverter *cnv; 3860 3861 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3862 if(U_FAILURE(errorCode)) { 3863 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3864 return; 3865 } 3866 3867 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3868 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3869 uSource = (const UChar*)in; 3870 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3871 cTarget = cBuf; 3872 cTargetLimit = cBuf +uBufSize*5; 3873 uTarget = uBuf; 3874 uTargetLimit = uBuf+ uBufSize*5; 3875 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode); 3876 if(U_FAILURE(errorCode)){ 3877 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3878 return; 3879 } 3880 cSource = cBuf; 3881 cSourceLimit =cTarget; 3882 test =uBuf; 3883 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode); 3884 if(U_FAILURE(errorCode)){ 3885 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3886 return; 3887 } 3888 uSource = (const UChar*)in; 3889 while(uSource<uSourceLimit){ 3890 if(*test!=*uSource){ 3891 3892 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3893 } 3894 uSource++; 3895 test++; 3896 } 3897 /*ucnv_close(cnv); 3898 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ 3899 /*Test for the condition where there is an invalid character*/ 3900 ucnv_reset(cnv); 3901 { 3902 static const uint8_t source2[]={0x0e,0x24,0x053}; 3903 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); 3904 } 3905 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3906 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3907 ucnv_close(cnv); 3908 free(uBuf); 3909 free(cBuf); 3910 } 3911 3912 static void 3913 TestISO_2022_JP_2() { 3914 /* test input */ 3915 static const uint16_t in[]={ 3916 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3917 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3918 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3919 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3920 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3921 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3922 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3923 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3924 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3925 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3926 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3927 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3928 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3929 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3930 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3931 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3932 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3933 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3934 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A 3935 }; 3936 const UChar* uSource; 3937 const UChar* uSourceLimit; 3938 const char* cSource; 3939 const char* cSourceLimit; 3940 UChar *uTargetLimit =NULL; 3941 UChar *uTarget; 3942 char *cTarget; 3943 const char *cTargetLimit; 3944 char *cBuf; 3945 UChar *uBuf,*test; 3946 int32_t uBufSize = 120; 3947 UErrorCode errorCode=U_ZERO_ERROR; 3948 UConverter *cnv; 3949 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3950 int32_t* myOff= offsets; 3951 cnv=ucnv_open("ISO_2022_JP_2", &errorCode); 3952 if(U_FAILURE(errorCode)) { 3953 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3954 return; 3955 } 3956 3957 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3958 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3959 uSource = (const UChar*)in; 3960 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3961 cTarget = cBuf; 3962 cTargetLimit = cBuf +uBufSize*5; 3963 uTarget = uBuf; 3964 uTargetLimit = uBuf+ uBufSize*5; 3965 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3966 if(U_FAILURE(errorCode)){ 3967 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3968 return; 3969 } 3970 cSource = cBuf; 3971 cSourceLimit =cTarget; 3972 test =uBuf; 3973 myOff=offsets; 3974 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3975 if(U_FAILURE(errorCode)){ 3976 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3977 return; 3978 } 3979 uSource = (const UChar*)in; 3980 while(uSource<uSourceLimit){ 3981 if(*test!=*uSource){ 3982 3983 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3984 } 3985 uSource++; 3986 test++; 3987 } 3988 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3989 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3990 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3991 /*Test for the condition where there is an invalid character*/ 3992 ucnv_reset(cnv); 3993 { 3994 static const uint8_t source2[]={0x0e,0x24,0x053}; 3995 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); 3996 } 3997 ucnv_close(cnv); 3998 free(uBuf); 3999 free(cBuf); 4000 free(offsets); 4001 } 4002 4003 static void 4004 TestISO_2022_KR() { 4005 /* test input */ 4006 static const uint16_t in[]={ 4007 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4008 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4009 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4010 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4011 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4012 ,0x53E3,0x53E4,0x000A,0x000D}; 4013 const UChar* uSource; 4014 const UChar* uSourceLimit; 4015 const char* cSource; 4016 const char* cSourceLimit; 4017 UChar *uTargetLimit =NULL; 4018 UChar *uTarget; 4019 char *cTarget; 4020 const char *cTargetLimit; 4021 char *cBuf; 4022 UChar *uBuf,*test; 4023 int32_t uBufSize = 120; 4024 UErrorCode errorCode=U_ZERO_ERROR; 4025 UConverter *cnv; 4026 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4027 int32_t* myOff= offsets; 4028 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); 4029 if(U_FAILURE(errorCode)) { 4030 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4031 return; 4032 } 4033 4034 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4035 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4036 uSource = (const UChar*)in; 4037 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4038 cTarget = cBuf; 4039 cTargetLimit = cBuf +uBufSize*5; 4040 uTarget = uBuf; 4041 uTargetLimit = uBuf+ uBufSize*5; 4042 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4043 if(U_FAILURE(errorCode)){ 4044 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4045 return; 4046 } 4047 cSource = cBuf; 4048 cSourceLimit =cTarget; 4049 test =uBuf; 4050 myOff=offsets; 4051 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4052 if(U_FAILURE(errorCode)){ 4053 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4054 return; 4055 } 4056 uSource = (const UChar*)in; 4057 while(uSource<uSourceLimit){ 4058 if(*test!=*uSource){ 4059 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4060 } 4061 uSource++; 4062 test++; 4063 } 4064 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4065 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4066 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4067 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4068 TestJitterbug930("csISO2022KR"); 4069 /*Test for the condition where there is an invalid character*/ 4070 ucnv_reset(cnv); 4071 { 4072 static const uint8_t source2[]={0x1b,0x24,0x053}; 4073 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4074 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4075 } 4076 ucnv_close(cnv); 4077 free(uBuf); 4078 free(cBuf); 4079 free(offsets); 4080 } 4081 4082 static void 4083 TestISO_2022_KR_1() { 4084 /* test input */ 4085 static const uint16_t in[]={ 4086 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4087 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4088 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4089 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4090 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4091 ,0x53E3,0x53E4,0x000A,0x000D}; 4092 const UChar* uSource; 4093 const UChar* uSourceLimit; 4094 const char* cSource; 4095 const char* cSourceLimit; 4096 UChar *uTargetLimit =NULL; 4097 UChar *uTarget; 4098 char *cTarget; 4099 const char *cTargetLimit; 4100 char *cBuf; 4101 UChar *uBuf,*test; 4102 int32_t uBufSize = 120; 4103 UErrorCode errorCode=U_ZERO_ERROR; 4104 UConverter *cnv; 4105 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4106 int32_t* myOff= offsets; 4107 cnv=ucnv_open("ibm-25546", &errorCode); 4108 if(U_FAILURE(errorCode)) { 4109 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4110 return; 4111 } 4112 4113 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4114 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4115 uSource = (const UChar*)in; 4116 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4117 cTarget = cBuf; 4118 cTargetLimit = cBuf +uBufSize*5; 4119 uTarget = uBuf; 4120 uTargetLimit = uBuf+ uBufSize*5; 4121 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4122 if(U_FAILURE(errorCode)){ 4123 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4124 return; 4125 } 4126 cSource = cBuf; 4127 cSourceLimit =cTarget; 4128 test =uBuf; 4129 myOff=offsets; 4130 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4131 if(U_FAILURE(errorCode)){ 4132 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4133 return; 4134 } 4135 uSource = (const UChar*)in; 4136 while(uSource<uSourceLimit){ 4137 if(*test!=*uSource){ 4138 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4139 } 4140 uSource++; 4141 test++; 4142 } 4143 ucnv_reset(cnv); 4144 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4145 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4146 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4147 ucnv_reset(cnv); 4148 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4149 /*Test for the condition where there is an invalid character*/ 4150 ucnv_reset(cnv); 4151 { 4152 static const uint8_t source2[]={0x1b,0x24,0x053}; 4153 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4154 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4155 } 4156 ucnv_close(cnv); 4157 free(uBuf); 4158 free(cBuf); 4159 free(offsets); 4160 } 4161 4162 static void TestJitterbug2411(){ 4163 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" 4164 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; 4165 UConverter* kr=NULL, *kr1=NULL; 4166 UErrorCode errorCode = U_ZERO_ERROR; 4167 UChar tgt[100]={'\0'}; 4168 UChar* target = tgt; 4169 UChar* targetLimit = target+100; 4170 kr=ucnv_open("iso-2022-kr", &errorCode); 4171 if(U_FAILURE(errorCode)) { 4172 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode)); 4173 return; 4174 } 4175 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4176 if(U_FAILURE(errorCode)) { 4177 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4178 return; 4179 } 4180 kr1 = ucnv_open("ibm-25546", &errorCode); 4181 if(U_FAILURE(errorCode)) { 4182 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode)); 4183 return; 4184 } 4185 target = tgt; 4186 targetLimit = target+100; 4187 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4188 4189 if(U_FAILURE(errorCode)) { 4190 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4191 return; 4192 } 4193 4194 ucnv_close(kr); 4195 ucnv_close(kr1); 4196 4197 } 4198 4199 static void 4200 TestJIS(){ 4201 /* From Unicode moved to testdata/conversion.txt */ 4202 /*To Unicode*/ 4203 { 4204 static const uint8_t sampleTextJIS[] = { 4205 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ 4206 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4207 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4208 }; 4209 static const uint16_t expectedISO2022JIS[] = { 4210 0x0041, 0x0042, 4211 0xFF81, 0xFF82, 4212 0x3000 4213 }; 4214 static const int32_t toISO2022JISOffs[]={ 4215 3,4, 4216 8,9, 4217 16 4218 }; 4219 4220 static const uint8_t sampleTextJIS7[] = { 4221 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ 4222 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4223 0x1b,0x24,0x42,0x21,0x21, 4224 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ 4225 0x21,0x22, 4226 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4227 }; 4228 static const uint16_t expectedISO2022JIS7[] = { 4229 0x0041, 0x0042, 4230 0xFF81, 0xFF82, 4231 0x3000, 4232 0xFF81, 0xFF82, 4233 0x3001, 4234 0x3000 4235 }; 4236 static const int32_t toISO2022JIS7Offs[]={ 4237 3,4, 4238 8,9, 4239 13,16, 4240 17, 4241 19,27 4242 }; 4243 static const uint8_t sampleTextJIS8[] = { 4244 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ 4245 0xa1,0xc8,0xd9,/*Katakana Set*/ 4246 0x1b,0x28,0x42, 4247 0x41,0x42, 4248 0xb1,0xc3, /*Katakana Set*/ 4249 0x1b,0x24,0x42,0x21,0x21 4250 }; 4251 static const uint16_t expectedISO2022JIS8[] = { 4252 0x0041, 0x0042, 4253 0xff61, 0xff88, 0xff99, 4254 0x0041, 0x0042, 4255 0xff71, 0xff83, 4256 0x3000 4257 }; 4258 static const int32_t toISO2022JIS8Offs[]={ 4259 3, 4, 5, 6, 4260 7, 11, 12, 13, 4261 14, 18, 4262 }; 4263 4264 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, 4265 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE); 4266 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, 4267 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE); 4268 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, 4269 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE); 4270 } 4271 4272 } 4273 4274 4275 #if 0 4276 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 4277 4278 static void TestJitterbug915(){ 4279 /* tests for roundtripping of the below sequence 4280 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / 4281 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / 4282 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / 4283 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / 4284 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / 4285 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / 4286 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / 4287 */ 4288 static const char cSource[]={ 4289 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 4290 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 4291 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 4292 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 4293 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 4294 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 4295 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, 4296 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 4297 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 4298 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 4299 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 4300 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 4301 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 4302 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 4303 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4304 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4305 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 4306 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 4307 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4308 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4309 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 4310 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 4311 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 4312 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 4313 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 4314 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 4315 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 4316 0x37, 0x20, 0x2A, 0x2F 4317 }; 4318 UChar uTarget[500]={'\0'}; 4319 UChar* utarget=uTarget; 4320 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 4321 4322 char cTarget[500]={'\0'}; 4323 char* ctarget=cTarget; 4324 char* ctargetLimit=cTarget+sizeof(cTarget); 4325 const char* csource=cSource; 4326 const char* tempSrc = cSource; 4327 UErrorCode err=U_ZERO_ERROR; 4328 4329 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); 4330 if(U_FAILURE(err)) { 4331 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 4332 return; 4333 } 4334 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err); 4335 if(U_FAILURE(err)) { 4336 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err)); 4337 return; 4338 } 4339 utargetLimit=utarget; 4340 utarget = uTarget; 4341 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 4342 if(U_FAILURE(err)) { 4343 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err)); 4344 return; 4345 } 4346 ctargetLimit=ctarget; 4347 ctarget =cTarget; 4348 while(ctarget<ctargetLimit){ 4349 if(*ctarget != *tempSrc){ 4350 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ; 4351 } 4352 ++ctarget; 4353 ++tempSrc; 4354 } 4355 4356 ucnv_close(conv); 4357 } 4358 4359 static void 4360 TestISO_2022_CN_EXT() { 4361 /* test input */ 4362 static const uint16_t in[]={ 4363 /* test Non-BMP code points */ 4364 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 4365 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 4366 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 4367 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 4368 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 4369 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 4370 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 4371 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 4372 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 4373 0xD869, 0xDED5, 4374 4375 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 4376 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 4377 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 4378 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4379 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4380 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4381 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4382 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4383 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4384 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4385 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4386 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4387 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4388 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A, 4389 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 4390 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A, 4391 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A, 4392 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A, 4393 4394 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A 4395 4396 }; 4397 4398 const UChar* uSource; 4399 const UChar* uSourceLimit; 4400 const char* cSource; 4401 const char* cSourceLimit; 4402 UChar *uTargetLimit =NULL; 4403 UChar *uTarget; 4404 char *cTarget; 4405 const char *cTargetLimit; 4406 char *cBuf; 4407 UChar *uBuf,*test; 4408 int32_t uBufSize = 180; 4409 UErrorCode errorCode=U_ZERO_ERROR; 4410 UConverter *cnv; 4411 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4412 int32_t* myOff= offsets; 4413 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); 4414 if(U_FAILURE(errorCode)) { 4415 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4416 return; 4417 } 4418 4419 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4420 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4421 uSource = (const UChar*)in; 4422 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4423 cTarget = cBuf; 4424 cTargetLimit = cBuf +uBufSize*5; 4425 uTarget = uBuf; 4426 uTargetLimit = uBuf+ uBufSize*5; 4427 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4428 if(U_FAILURE(errorCode)){ 4429 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4430 return; 4431 } 4432 cSource = cBuf; 4433 cSourceLimit =cTarget; 4434 test =uBuf; 4435 myOff=offsets; 4436 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4437 if(U_FAILURE(errorCode)){ 4438 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4439 return; 4440 } 4441 uSource = (const UChar*)in; 4442 while(uSource<uSourceLimit){ 4443 if(*test!=*uSource){ 4444 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4445 } 4446 else{ 4447 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4448 } 4449 uSource++; 4450 test++; 4451 } 4452 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4453 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4454 /*Test for the condition where there is an invalid character*/ 4455 ucnv_reset(cnv); 4456 { 4457 static const uint8_t source2[]={0x0e,0x24,0x053}; 4458 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); 4459 } 4460 ucnv_close(cnv); 4461 free(uBuf); 4462 free(cBuf); 4463 free(offsets); 4464 } 4465 #endif 4466 4467 static void 4468 TestISO_2022_CN() { 4469 /* test input */ 4470 static const uint16_t in[]={ 4471 /* jitterbug 951 */ 4472 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52, 4473 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 4474 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52, 4475 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45, 4476 0x0020, 0x0045, 0x004e, 0x0044, 4477 /**/ 4478 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A, 4479 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A, 4480 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A, 4481 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4482 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4483 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4484 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4485 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4486 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4487 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4488 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4489 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4490 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A, 4491 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A, 4492 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4493 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 4494 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A, 4495 4496 }; 4497 const UChar* uSource; 4498 const UChar* uSourceLimit; 4499 const char* cSource; 4500 const char* cSourceLimit; 4501 UChar *uTargetLimit =NULL; 4502 UChar *uTarget; 4503 char *cTarget; 4504 const char *cTargetLimit; 4505 char *cBuf; 4506 UChar *uBuf,*test; 4507 int32_t uBufSize = 180; 4508 UErrorCode errorCode=U_ZERO_ERROR; 4509 UConverter *cnv; 4510 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4511 int32_t* myOff= offsets; 4512 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); 4513 if(U_FAILURE(errorCode)) { 4514 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4515 return; 4516 } 4517 4518 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4519 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4520 uSource = (const UChar*)in; 4521 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4522 cTarget = cBuf; 4523 cTargetLimit = cBuf +uBufSize*5; 4524 uTarget = uBuf; 4525 uTargetLimit = uBuf+ uBufSize*5; 4526 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4527 if(U_FAILURE(errorCode)){ 4528 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4529 return; 4530 } 4531 cSource = cBuf; 4532 cSourceLimit =cTarget; 4533 test =uBuf; 4534 myOff=offsets; 4535 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4536 if(U_FAILURE(errorCode)){ 4537 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4538 return; 4539 } 4540 uSource = (const UChar*)in; 4541 while(uSource<uSourceLimit){ 4542 if(*test!=*uSource){ 4543 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4544 } 4545 else{ 4546 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4547 } 4548 uSource++; 4549 test++; 4550 } 4551 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); 4552 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4553 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4554 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4555 TestJitterbug930("csISO2022CN"); 4556 /*Test for the condition where there is an invalid character*/ 4557 ucnv_reset(cnv); 4558 { 4559 static const uint8_t source2[]={0x0e,0x24,0x053}; 4560 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); 4561 } 4562 4563 ucnv_close(cnv); 4564 free(uBuf); 4565 free(cBuf); 4566 free(offsets); 4567 } 4568 4569 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */ 4570 typedef struct { 4571 const char * converterName; 4572 const char * inputText; 4573 int inputTextLength; 4574 } EmptySegmentTest; 4575 4576 /* Callback for TestJitterbug6175, should only get called for empty segment errors */ 4577 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, 4578 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) { 4579 if (reason > UCNV_IRREGULAR) { 4580 return; 4581 } 4582 if (reason != UCNV_IRREGULAR) { 4583 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); 4584 } 4585 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ 4586 *err = U_ZERO_ERROR; 4587 ucnv_cbToUWriteSub(toArgs,0,err); 4588 } 4589 4590 enum { kEmptySegmentToUCharsMax = 64 }; 4591 static void TestJitterbug6175(void) { 4592 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; 4593 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; 4594 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; 4595 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; 4596 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; 4597 static const EmptySegmentTest emptySegmentTests[] = { 4598 /* converterName inputText inputTextLength */ 4599 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, 4600 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, 4601 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, 4602 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, 4603 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, 4604 /* terminator: */ 4605 { NULL, NULL, 0, } 4606 }; 4607 const EmptySegmentTest * testPtr; 4608 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { 4609 UErrorCode err = U_ZERO_ERROR; 4610 UConverter * cnv = ucnv_open(testPtr->converterName, &err); 4611 if (U_FAILURE(err)) { 4612 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4613 return; 4614 } 4615 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); 4616 if (U_FAILURE(err)) { 4617 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4618 ucnv_close(cnv); 4619 return; 4620 } 4621 { 4622 UChar toUChars[kEmptySegmentToUCharsMax]; 4623 UChar * toUCharsPtr = toUChars; 4624 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; 4625 const char * inCharsPtr = testPtr->inputText; 4626 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; 4627 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); 4628 } 4629 ucnv_close(cnv); 4630 } 4631 } 4632 4633 static void 4634 TestEBCDIC_STATEFUL() { 4635 /* test input */ 4636 static const uint8_t in[]={ 4637 0x61, 4638 0x1a, 4639 0x0f, 0x4b, 4640 0x42, 4641 0x40, 4642 0x36, 4643 }; 4644 4645 /* expected test results */ 4646 static const int32_t results[]={ 4647 /* number of bytes read, code point */ 4648 1, 0x002f, 4649 1, 0x0092, 4650 2, 0x002e, 4651 1, 0xff62, 4652 1, 0x0020, 4653 1, 0x0096, 4654 4655 }; 4656 static const uint8_t in2[]={ 4657 0x0f, 4658 0xa1, 4659 0x01 4660 }; 4661 4662 /* expected test results */ 4663 static const int32_t results2[]={ 4664 /* number of bytes read, code point */ 4665 2, 0x203E, 4666 1, 0x0001, 4667 }; 4668 4669 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 4670 UErrorCode errorCode=U_ZERO_ERROR; 4671 UConverter *cnv=ucnv_open("ibm-930", &errorCode); 4672 if(U_FAILURE(errorCode)) { 4673 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode)); 4674 return; 4675 } 4676 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); 4677 ucnv_reset(cnv); 4678 /* Test the condition when source >= sourceLimit */ 4679 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 4680 ucnv_reset(cnv); 4681 /*Test for the condition where source > sourcelimit after consuming the shift chracter */ 4682 { 4683 static const uint8_t source1[]={0x0f}; 4684 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); 4685 } 4686 /*Test for the condition where there is an invalid character*/ 4687 ucnv_reset(cnv); 4688 { 4689 static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; 4690 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); 4691 } 4692 ucnv_reset(cnv); 4693 source=(const char*)in2; 4694 limit=(const char*)in2+sizeof(in2); 4695 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); 4696 ucnv_close(cnv); 4697 4698 } 4699 4700 static void 4701 TestGB18030() { 4702 /* test input */ 4703 static const uint8_t in[]={ 4704 0x24, 4705 0x7f, 4706 0x81, 0x30, 0x81, 0x30, 4707 0xa8, 0xbf, 4708 0xa2, 0xe3, 4709 0xd2, 0xbb, 4710 0x82, 0x35, 0x8f, 0x33, 4711 0x84, 0x31, 0xa4, 0x39, 4712 0x90, 0x30, 0x81, 0x30, 4713 0xe3, 0x32, 0x9a, 0x35 4714 #if 0 4715 /* 4716 * Feature removed markus 2000-oct-26 4717 * Only some codepages must match surrogate pairs into supplementary code points - 4718 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c . 4719 * GB 18030 provides direct encodings for supplementary code points, therefore 4720 * it must not combine two single-encoded surrogates into one code point. 4721 */ 4722 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */ 4723 #endif 4724 }; 4725 4726 /* expected test results */ 4727 static const int32_t results[]={ 4728 /* number of bytes read, code point */ 4729 1, 0x24, 4730 1, 0x7f, 4731 4, 0x80, 4732 2, 0x1f9, 4733 2, 0x20ac, 4734 2, 0x4e00, 4735 4, 0x9fa6, 4736 4, 0xffff, 4737 4, 0x10000, 4738 4, 0x10ffff 4739 #if 0 4740 /* Feature removed. See comment above. */ 4741 8, 0x10000 4742 #endif 4743 }; 4744 4745 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ 4746 UErrorCode errorCode=U_ZERO_ERROR; 4747 UConverter *cnv=ucnv_open("gb18030", &errorCode); 4748 if(U_FAILURE(errorCode)) { 4749 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode)); 4750 return; 4751 } 4752 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030"); 4753 ucnv_close(cnv); 4754 } 4755 4756 static void 4757 TestLMBCS() { 4758 /* LMBCS-1 string */ 4759 static const uint8_t pszLMBCS[]={ 4760 0x61, 4761 0x01, 0x29, 4762 0x81, 4763 0xA0, 4764 0x0F, 0x27, 4765 0x0F, 0x91, 4766 0x14, 0x0a, 0x74, 4767 0x14, 0xF6, 0x02, 4768 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ 4769 0x10, 0x88, 0xA0, 4770 }; 4771 4772 /* Unicode UChar32 equivalents */ 4773 static const UChar32 pszUnicode32[]={ 4774 /* code point */ 4775 0x00000061, 4776 0x00002013, 4777 0x000000FC, 4778 0x000000E1, 4779 0x00000007, 4780 0x00000091, 4781 0x00000a74, 4782 0x00000200, 4783 0x00023456, /* code point for surrogate pair */ 4784 0x00005516 4785 }; 4786 4787 /* Unicode UChar equivalents */ 4788 static const UChar pszUnicode[]={ 4789 /* code point */ 4790 0x0061, 4791 0x2013, 4792 0x00FC, 4793 0x00E1, 4794 0x0007, 4795 0x0091, 4796 0x0a74, 4797 0x0200, 4798 0xD84D, /* low surrogate */ 4799 0xDC56, /* high surrogate */ 4800 0x5516 4801 }; 4802 4803 /* expected test results */ 4804 static const int offsets32[]={ 4805 /* number of bytes read, code point */ 4806 0, 4807 1, 4808 3, 4809 4, 4810 5, 4811 7, 4812 9, 4813 12, 4814 15, 4815 21, 4816 24 4817 }; 4818 4819 /* expected test results */ 4820 static const int offsets[]={ 4821 /* number of bytes read, code point */ 4822 0, 4823 1, 4824 3, 4825 4, 4826 5, 4827 7, 4828 9, 4829 12, 4830 15, 4831 18, 4832 21, 4833 24 4834 }; 4835 4836 4837 UConverter *cnv; 4838 4839 #define NAME_LMBCS_1 "LMBCS-1" 4840 #define NAME_LMBCS_2 "LMBCS-2" 4841 4842 4843 /* Some basic open/close/property tests on some LMBCS converters */ 4844 { 4845 4846 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ 4847 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ 4848 char get_subchars [1]; 4849 const char * get_name; 4850 UConverter *cnv1; 4851 UConverter *cnv2; 4852 4853 int8_t len = sizeof(get_subchars); 4854 4855 UErrorCode errorCode=U_ZERO_ERROR; 4856 4857 /* Open */ 4858 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); 4859 if(U_FAILURE(errorCode)) { 4860 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4861 return; 4862 } 4863 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); 4864 if(U_FAILURE(errorCode)) { 4865 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode)); 4866 return; 4867 } 4868 4869 /* Name */ 4870 get_name = ucnv_getName (cnv1, &errorCode); 4871 if (strcmp(NAME_LMBCS_1,get_name)){ 4872 log_err("Unexpected converter name: %s\n", get_name); 4873 } 4874 get_name = ucnv_getName (cnv2, &errorCode); 4875 if (strcmp(NAME_LMBCS_2,get_name)){ 4876 log_err("Unexpected converter name: %s\n", get_name); 4877 } 4878 4879 /* substitution chars */ 4880 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); 4881 if(U_FAILURE(errorCode)) { 4882 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4883 } 4884 if (len!=1){ 4885 log_err("Unexpected length of sub chars\n"); 4886 } 4887 if (get_subchars[0] != expected_subchars[0]){ 4888 log_err("Unexpected value of sub chars\n"); 4889 } 4890 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); 4891 if(U_FAILURE(errorCode)) { 4892 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); 4893 } 4894 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); 4895 if(U_FAILURE(errorCode)) { 4896 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4897 } 4898 if (len!=1){ 4899 log_err("Unexpected length of sub chars\n"); 4900 } 4901 if (get_subchars[0] != new_subchars[0]){ 4902 log_err("Unexpected value of sub chars\n"); 4903 } 4904 ucnv_close(cnv1); 4905 ucnv_close(cnv2); 4906 4907 } 4908 4909 /* LMBCS to Unicode - offsets */ 4910 { 4911 UErrorCode errorCode=U_ZERO_ERROR; 4912 4913 const char * pSource = (const char *)pszLMBCS; 4914 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 4915 4916 UChar Out [sizeof(pszUnicode) + 1]; 4917 UChar * pOut = Out; 4918 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 4919 4920 int32_t off [sizeof(offsets)]; 4921 4922 /* last 'offset' in expected results is just the final size. 4923 (Makes other tests easier). Compensate here: */ 4924 4925 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); 4926 4927 4928 4929 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ 4930 if(U_FAILURE(errorCode)) { 4931 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode)); 4932 return; 4933 } 4934 4935 4936 4937 ucnv_toUnicode (cnv, 4938 &pOut, 4939 OutLimit, 4940 &pSource, 4941 sourceLimit, 4942 off, 4943 TRUE, 4944 &errorCode); 4945 4946 4947 if (memcmp(off,offsets,sizeof(offsets))) 4948 { 4949 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n"); 4950 } 4951 if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) 4952 { 4953 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n"); 4954 } 4955 ucnv_close(cnv); 4956 } 4957 { 4958 /* LMBCS to Unicode - getNextUChar */ 4959 const char * sourceStart; 4960 const char *source=(const char *)pszLMBCS; 4961 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); 4962 const UChar32 *results= pszUnicode32; 4963 const int *off = offsets32; 4964 4965 UErrorCode errorCode=U_ZERO_ERROR; 4966 UChar32 uniChar; 4967 4968 cnv=ucnv_open("LMBCS-1", &errorCode); 4969 if(U_FAILURE(errorCode)) { 4970 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4971 return; 4972 } 4973 else 4974 { 4975 4976 while(source<limit) { 4977 sourceStart=source; 4978 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode); 4979 if(U_FAILURE(errorCode)) { 4980 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode)); 4981 break; 4982 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) { 4983 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 4984 uniChar, (source-sourceStart), *results, *off); 4985 break; 4986 } 4987 results++; 4988 off++; 4989 } 4990 } 4991 ucnv_close(cnv); 4992 } 4993 { /* test locale & optimization group operations: Unicode to LMBCS */ 4994 4995 UErrorCode errorCode=U_ZERO_ERROR; 4996 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); 4997 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); 4998 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); 4999 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ 5000 const UChar * pUniOut = uniString; 5001 UChar * pUniIn = uniString; 5002 uint8_t lmbcsString [4]; 5003 const char * pLMBCSOut = (const char *)lmbcsString; 5004 char * pLMBCSIn = (char *)lmbcsString; 5005 5006 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */ 5007 ucnv_fromUnicode (cnv16he, 5008 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5009 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5010 NULL, 1, &errorCode); 5011 5012 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) 5013 { 5014 log_err("LMBCS-16,locale=he gives unexpected translation\n"); 5015 } 5016 5017 pLMBCSIn= (char *)lmbcsString; 5018 pUniOut = uniString; 5019 ucnv_fromUnicode (cnv01us, 5020 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5021 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5022 NULL, 1, &errorCode); 5023 5024 if (lmbcsString[0] != 0x9F) 5025 { 5026 log_err("LMBCS-1,locale=US gives unexpected translation\n"); 5027 } 5028 5029 /* single byte char from mbcs char set */ 5030 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ 5031 pLMBCSOut = (const char *)lmbcsString; 5032 pUniIn = uniString; 5033 ucnv_toUnicode (cnv16jp, 5034 &pUniIn, pUniIn + 1, 5035 &pLMBCSOut, (pLMBCSOut + 1), 5036 NULL, 1, &errorCode); 5037 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5038 { 5039 log_err("Unexpected results from LMBCS-16 single byte char\n"); 5040 } 5041 /* convert to group 1: should be 3 bytes */ 5042 pLMBCSIn = (char *)lmbcsString; 5043 pUniOut = uniString; 5044 ucnv_fromUnicode (cnv01us, 5045 &pLMBCSIn, (const char *)(pLMBCSIn + 3), 5046 &pUniOut, pUniOut + 1, 5047 NULL, 1, &errorCode); 5048 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1 5049 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE) 5050 { 5051 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); 5052 } 5053 pLMBCSOut = (const char *)lmbcsString; 5054 pUniIn = uniString; 5055 ucnv_toUnicode (cnv01us, 5056 &pUniIn, pUniIn + 1, 5057 &pLMBCSOut, (const char *)(pLMBCSOut + 3), 5058 NULL, 1, &errorCode); 5059 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5060 { 5061 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); 5062 } 5063 pLMBCSIn = (char *)lmbcsString; 5064 pUniOut = uniString; 5065 ucnv_fromUnicode (cnv16jp, 5066 &pLMBCSIn, (const char *)(pLMBCSIn + 1), 5067 &pUniOut, pUniOut + 1, 5068 NULL, 1, &errorCode); 5069 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE) 5070 { 5071 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); 5072 } 5073 ucnv_close(cnv16he); 5074 ucnv_close(cnv16jp); 5075 ucnv_close(cnv01us); 5076 } 5077 { 5078 /* Small source buffer testing, LMBCS -> Unicode */ 5079 5080 UErrorCode errorCode=U_ZERO_ERROR; 5081 5082 const char * pSource = (const char *)pszLMBCS; 5083 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 5084 int codepointCount = 0; 5085 5086 UChar Out [sizeof(pszUnicode) + 1]; 5087 UChar * pOut = Out; 5088 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 5089 5090 5091 cnv = ucnv_open(NAME_LMBCS_1, &errorCode); 5092 if(U_FAILURE(errorCode)) { 5093 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 5094 return; 5095 } 5096 5097 5098 while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) 5099 { 5100 ucnv_toUnicode (cnv, 5101 &pOut, 5102 OutLimit, 5103 &pSource, 5104 (pSource+1), /* claim that this is a 1- byte buffer */ 5105 NULL, 5106 FALSE, /* FALSE means there might be more chars in the next buffer */ 5107 &errorCode); 5108 5109 if (U_SUCCESS (errorCode)) 5110 { 5111 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1]) 5112 { 5113 /* we are on to the next code point: check value */ 5114 5115 if (Out[0] != pszUnicode[codepointCount]){ 5116 log_err("LMBCS->Uni result %lx should have been %lx \n", 5117 Out[0], pszUnicode[codepointCount]); 5118 } 5119 5120 pOut = Out; /* reset for accumulating next code point */ 5121 codepointCount++; 5122 } 5123 } 5124 else 5125 { 5126 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode)); 5127 } 5128 } 5129 { 5130 /* limits & surrogate error testing */ 5131 char LIn [sizeof(pszLMBCS)]; 5132 const char * pLIn = LIn; 5133 5134 char LOut [sizeof(pszLMBCS)]; 5135 char * pLOut = LOut; 5136 5137 UChar UOut [sizeof(pszUnicode)]; 5138 UChar * pUOut = UOut; 5139 5140 UChar UIn [sizeof(pszUnicode)]; 5141 const UChar * pUIn = UIn; 5142 5143 int32_t off [sizeof(offsets)]; 5144 UChar32 uniChar; 5145 5146 errorCode=U_ZERO_ERROR; 5147 5148 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ 5149 pUIn++; 5150 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode); 5151 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5152 { 5153 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); 5154 } 5155 pUIn--; 5156 5157 errorCode=U_ZERO_ERROR; 5158 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); 5159 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5160 { 5161 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode)); 5162 } 5163 errorCode=U_ZERO_ERROR; 5164 5165 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode); 5166 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5167 { 5168 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode)); 5169 } 5170 errorCode=U_ZERO_ERROR; 5171 5172 /* 0 byte source request - no error, no pointer movement */ 5173 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode); 5174 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); 5175 if(U_FAILURE(errorCode)) { 5176 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode)); 5177 } 5178 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn)) 5179 { 5180 log_err("Unexpected pointer move in 0 byte source request \n"); 5181 } 5182 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ 5183 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); 5184 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) 5185 { 5186 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); 5187 } 5188 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ 5189 { 5190 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n"); 5191 } 5192 errorCode = U_ZERO_ERROR; 5193 5194 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ 5195 5196 pUIn = pszUnicode; 5197 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode); 5198 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) 5199 { 5200 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); 5201 } 5202 5203 errorCode = U_ZERO_ERROR; 5204 5205 pLIn = (const char *)pszLMBCS; 5206 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); 5207 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4]) 5208 { 5209 log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); 5210 } 5211 5212 /* unpaired or chopped LMBCS surrogates */ 5213 5214 /* OK high surrogate, Low surrogate is chopped */ 5215 LIn [0] = (char)0x14; 5216 LIn [1] = (char)0xD8; 5217 LIn [2] = (char)0x01; 5218 LIn [3] = (char)0x14; 5219 LIn [4] = (char)0xDC; 5220 pLIn = LIn; 5221 errorCode = U_ZERO_ERROR; 5222 pUOut = UOut; 5223 5224 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 5225 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5226 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5227 { 5228 log_err("Unexpected results on chopped low surrogate\n"); 5229 } 5230 5231 /* chopped at surrogate boundary */ 5232 LIn [0] = (char)0x14; 5233 LIn [1] = (char)0xD8; 5234 LIn [2] = (char)0x01; 5235 pLIn = LIn; 5236 errorCode = U_ZERO_ERROR; 5237 pUOut = UOut; 5238 5239 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); 5240 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) 5241 { 5242 log_err("Unexpected results on chopped at surrogate boundary \n"); 5243 } 5244 5245 /* unpaired surrogate plus valid Unichar */ 5246 LIn [0] = (char)0x14; 5247 LIn [1] = (char)0xD8; 5248 LIn [2] = (char)0x01; 5249 LIn [3] = (char)0x14; 5250 LIn [4] = (char)0xC9; 5251 LIn [5] = (char)0xD0; 5252 pLIn = LIn; 5253 errorCode = U_ZERO_ERROR; 5254 pUOut = UOut; 5255 5256 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); 5257 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) 5258 { 5259 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); 5260 } 5261 5262 /* unpaired surrogate plus chopped Unichar */ 5263 LIn [0] = (char)0x14; 5264 LIn [1] = (char)0xD8; 5265 LIn [2] = (char)0x01; 5266 LIn [3] = (char)0x14; 5267 LIn [4] = (char)0xC9; 5268 5269 pLIn = LIn; 5270 errorCode = U_ZERO_ERROR; 5271 pUOut = UOut; 5272 5273 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5274 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5275 { 5276 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); 5277 } 5278 5279 /* unpaired surrogate plus valid non-Unichar */ 5280 LIn [0] = (char)0x14; 5281 LIn [1] = (char)0xD8; 5282 LIn [2] = (char)0x01; 5283 LIn [3] = (char)0x0F; 5284 LIn [4] = (char)0x3B; 5285 5286 pLIn = LIn; 5287 errorCode = U_ZERO_ERROR; 5288 pUOut = UOut; 5289 5290 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5291 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) 5292 { 5293 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); 5294 } 5295 5296 /* unpaired surrogate plus chopped non-Unichar */ 5297 LIn [0] = (char)0x14; 5298 LIn [1] = (char)0xD8; 5299 LIn [2] = (char)0x01; 5300 LIn [3] = (char)0x0F; 5301 5302 pLIn = LIn; 5303 errorCode = U_ZERO_ERROR; 5304 pUOut = UOut; 5305 5306 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); 5307 5308 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) 5309 { 5310 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n"); 5311 } 5312 } 5313 } 5314 ucnv_close(cnv); /* final cleanup */ 5315 } 5316 5317 5318 static void TestJitterbug255() 5319 { 5320 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; 5321 const char *testBuffer = (const char *)testBytes; 5322 const char *testEnd = (const char *)testBytes + sizeof(testBytes); 5323 UErrorCode status = U_ZERO_ERROR; 5324 /*UChar32 result;*/ 5325 UConverter *cnv = 0; 5326 5327 cnv = ucnv_open("shift-jis", &status); 5328 if (U_FAILURE(status) || cnv == 0) { 5329 log_data_err("Failed to open the converter for SJIS.\n"); 5330 return; 5331 } 5332 while (testBuffer != testEnd) 5333 { 5334 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); 5335 if (U_FAILURE(status)) 5336 { 5337 log_err("Failed to convert the next UChar for SJIS.\n"); 5338 break; 5339 } 5340 } 5341 ucnv_close(cnv); 5342 } 5343 5344 static void TestEBCDICUS4XML() 5345 { 5346 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; 5347 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; 5348 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; 5349 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; 5350 char target_x[] = {0x00, 0x00, 0x00, 0x00}; 5351 UChar *unicodes = unicodes_x; 5352 const UChar *toUnicodeMaps = toUnicodeMaps_x; 5353 char *target = target_x; 5354 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; 5355 UErrorCode status = U_ZERO_ERROR; 5356 UConverter *cnv = 0; 5357 5358 cnv = ucnv_open("ebcdic-xml-us", &status); 5359 if (U_FAILURE(status) || cnv == 0) { 5360 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); 5361 return; 5362 } 5363 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status); 5364 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) { 5365 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", 5366 u_errorName(status)); 5367 printUSeqErr(unicodes_x, 3); 5368 printUSeqErr(toUnicodeMaps, 3); 5369 } 5370 status = U_ZERO_ERROR; 5371 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status); 5372 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) { 5373 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", 5374 u_errorName(status)); 5375 printSeqErr((const unsigned char*)target_x, 3); 5376 printSeqErr((const unsigned char*)fromUnicodeMaps, 3); 5377 } 5378 ucnv_close(cnv); 5379 } 5380 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ 5381 5382 #if !UCONFIG_NO_COLLATION 5383 5384 static void TestJitterbug981(){ 5385 const UChar* rules; 5386 int32_t rules_length, target_cap, bytes_needed, buff_size; 5387 UErrorCode status = U_ZERO_ERROR; 5388 UConverter *utf8cnv; 5389 UCollator* myCollator; 5390 char *buff; 5391 int numNeeded=0; 5392 utf8cnv = ucnv_open ("utf8", &status); 5393 if(U_FAILURE(status)){ 5394 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status)); 5395 return; 5396 } 5397 myCollator = ucol_open("zh", &status); 5398 if(U_FAILURE(status)){ 5399 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status)); 5400 ucnv_close(utf8cnv); 5401 return; 5402 } 5403 5404 rules = ucol_getRules(myCollator, &rules_length); 5405 if(rules_length == 0) { 5406 log_data_err("missing zh tailoring rule string\n"); 5407 ucol_close(myCollator); 5408 ucnv_close(utf8cnv); 5409 return; 5410 } 5411 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); 5412 buff = malloc(buff_size); 5413 5414 target_cap = 0; 5415 do { 5416 ucnv_reset(utf8cnv); 5417 status = U_ZERO_ERROR; 5418 if(target_cap >= buff_size) { 5419 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size); 5420 break; 5421 } 5422 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, 5423 rules, rules_length, &status); 5424 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5425 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5426 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5427 break; 5428 } 5429 numNeeded = bytes_needed; 5430 } while (status == U_BUFFER_OVERFLOW_ERROR); 5431 ucol_close(myCollator); 5432 ucnv_close(utf8cnv); 5433 free(buff); 5434 } 5435 5436 #endif 5437 5438 #if !UCONFIG_NO_LEGACY_CONVERSION 5439 static void TestJitterbug1293(){ 5440 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; 5441 char target[256]; 5442 UErrorCode status = U_ZERO_ERROR; 5443 UConverter* conv=NULL; 5444 int32_t target_cap, bytes_needed, numNeeded = 0; 5445 conv = ucnv_open("shift-jis",&status); 5446 if(U_FAILURE(status)){ 5447 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status)); 5448 return; 5449 } 5450 5451 do{ 5452 target_cap =0; 5453 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status); 5454 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5455 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5456 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5457 } 5458 numNeeded = bytes_needed; 5459 } while (status == U_BUFFER_OVERFLOW_ERROR); 5460 if(U_FAILURE(status)){ 5461 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status)); 5462 return; 5463 } 5464 ucnv_close(conv); 5465 } 5466 #endif 5467 5468 static void TestJB5275_1(){ 5469 5470 static const char* data = "\x3B\xB3\x0A" /* Easy characters */ 5471 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5472 /* Switch script: */ 5473 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */ 5474 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/ 5475 "\xEF\x40\x3B\xB3\x0A"; 5476 static const UChar expected[] ={ 5477 0x003b, 0x0a15, 0x000a, /* Easy characters */ 5478 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */ 5479 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/ 5480 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/ 5481 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ 5482 }; 5483 5484 UErrorCode status = U_ZERO_ERROR; 5485 UConverter* conv = ucnv_open("iscii-gur", &status); 5486 UChar dest[100] = {'\0'}; 5487 UChar* target = dest; 5488 UChar* targetLimit = dest+100; 5489 const char* source = data; 5490 const char* sourceLimit = data+strlen(data); 5491 const UChar* exp = expected; 5492 5493 if (U_FAILURE(status)) { 5494 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status)); 5495 return; 5496 } 5497 5498 log_verbose("Testing switching back to default script when new line is encountered.\n"); 5499 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5500 if(U_FAILURE(status)){ 5501 log_err("conversion failed: %s \n", u_errorName(status)); 5502 } 5503 targetLimit = target; 5504 target = dest; 5505 printUSeq(target, targetLimit-target); 5506 while(target<targetLimit){ 5507 if(*exp!=*target){ 5508 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5509 } 5510 target++; 5511 exp++; 5512 } 5513 ucnv_close(conv); 5514 } 5515 5516 static void TestJB5275(){ 5517 static const char* data = 5518 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */ 5519 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */ 5520 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */ 5521 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5522 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ 5523 "\xEF\x48\x38\xB3\x0A" /* Kannada test */ 5524 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ 5525 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ 5526 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ 5527 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; 5528 static const UChar expected[] ={ 5529 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */ 5530 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */ 5531 0x0038, 0x0C95, 0x000A, /* Kannada test */ 5532 0x0039, 0x0D15, 0x000A, /* Malayalam test */ 5533 0x003A, 0x0A95, 0x000A, /* Gujarati test */ 5534 0x003B, 0x0A15, 0x000A, /* Punjabi test */ 5535 }; 5536 5537 UErrorCode status = U_ZERO_ERROR; 5538 UConverter* conv = ucnv_open("iscii", &status); 5539 UChar dest[100] = {'\0'}; 5540 UChar* target = dest; 5541 UChar* targetLimit = dest+100; 5542 const char* source = data; 5543 const char* sourceLimit = data+strlen(data); 5544 const UChar* exp = expected; 5545 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5546 if(U_FAILURE(status)){ 5547 log_err("conversion failed: %s \n", u_errorName(status)); 5548 } 5549 targetLimit = target; 5550 target = dest; 5551 5552 printUSeq(target, targetLimit-target); 5553 5554 while(target<targetLimit){ 5555 if(*exp!=*target){ 5556 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5557 } 5558 target++; 5559 exp++; 5560 } 5561 ucnv_close(conv); 5562 } 5563 5564 static void 5565 TestIsFixedWidth() { 5566 UErrorCode status = U_ZERO_ERROR; 5567 UConverter *cnv = NULL; 5568 int32_t i; 5569 5570 const char *fixedWidth[] = { 5571 "US-ASCII", 5572 "UTF32", 5573 "ibm-5478_P100-1995" 5574 }; 5575 5576 const char *notFixedWidth[] = { 5577 "GB18030", 5578 "UTF8", 5579 "windows-949-2000", 5580 "UTF16" 5581 }; 5582 5583 for (i = 0; i < LENGTHOF(fixedWidth); i++) { 5584 cnv = ucnv_open(fixedWidth[i], &status); 5585 if (cnv == NULL || U_FAILURE(status)) { 5586 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status)); 5587 continue; 5588 } 5589 5590 if (!ucnv_isFixedWidth(cnv, &status)) { 5591 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]); 5592 } 5593 ucnv_close(cnv); 5594 } 5595 5596 for (i = 0; i < LENGTHOF(notFixedWidth); i++) { 5597 cnv = ucnv_open(notFixedWidth[i], &status); 5598 if (cnv == NULL || U_FAILURE(status)) { 5599 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status)); 5600 continue; 5601 } 5602 5603 if (ucnv_isFixedWidth(cnv, &status)) { 5604 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]); 5605 } 5606 ucnv_close(cnv); 5607 } 5608 } 5609