1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 /******************************************************************************* 9 * 10 * File nucnvtst.c 11 * 12 * Modification History: 13 * Name Description 14 * Steven R. Loomis 7/8/1999 Adding input buffer test 15 ******************************************************************************** 16 */ 17 #include <stdio.h> 18 #include "cstring.h" 19 #include "unicode/uloc.h" 20 #include "unicode/ucnv.h" 21 #include "unicode/ucnv_err.h" 22 #include "unicode/ucnv_cb.h" 23 #include "cintltst.h" 24 #include "unicode/utypes.h" 25 #include "unicode/ustring.h" 26 #include "unicode/ucol.h" 27 #include "unicode/utf16.h" 28 #include "cmemory.h" 29 #include "nucnvtst.h" 30 31 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); 32 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); 33 #if !UCONFIG_NO_COLLATION 34 static void TestJitterbug981(void); 35 #endif 36 #if !UCONFIG_NO_LEGACY_CONVERSION 37 static void TestJitterbug1293(void); 38 #endif 39 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; 40 static void TestConverterTypesAndStarters(void); 41 static void TestAmbiguous(void); 42 static void TestSignatureDetection(void); 43 static void TestUTF7(void); 44 static void TestIMAP(void); 45 static void TestUTF8(void); 46 static void TestCESU8(void); 47 static void TestUTF16(void); 48 static void TestUTF16BE(void); 49 static void TestUTF16LE(void); 50 static void TestUTF32(void); 51 static void TestUTF32BE(void); 52 static void TestUTF32LE(void); 53 static void TestLATIN1(void); 54 55 #if !UCONFIG_NO_LEGACY_CONVERSION 56 static void TestSBCS(void); 57 static void TestDBCS(void); 58 static void TestMBCS(void); 59 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 60 static void TestICCRunout(void); 61 #endif 62 63 #ifdef U_ENABLE_GENERIC_ISO_2022 64 static void TestISO_2022(void); 65 #endif 66 67 static void TestISO_2022_JP(void); 68 static void TestISO_2022_JP_1(void); 69 static void TestISO_2022_JP_2(void); 70 static void TestISO_2022_KR(void); 71 static void TestISO_2022_KR_1(void); 72 static void TestISO_2022_CN(void); 73 #if 0 74 /* 75 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 76 */ 77 static void TestISO_2022_CN_EXT(void); 78 #endif 79 static void TestJIS(void); 80 static void TestHZ(void); 81 #endif 82 83 static void TestSCSU(void); 84 85 #if !UCONFIG_NO_LEGACY_CONVERSION 86 static void TestEBCDIC_STATEFUL(void); 87 static void TestGB18030(void); 88 static void TestLMBCS(void); 89 static void TestJitterbug255(void); 90 static void TestEBCDICUS4XML(void); 91 #if 0 92 /* 93 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 94 */ 95 static void TestJitterbug915(void); 96 #endif 97 static void TestISCII(void); 98 99 static void TestCoverageMBCS(void); 100 static void TestJitterbug2346(void); 101 static void TestJitterbug2411(void); 102 static void TestJB5275(void); 103 static void TestJB5275_1(void); 104 static void TestJitterbug6175(void); 105 106 static void TestIsFixedWidth(void); 107 #endif 108 109 static void TestInBufSizes(void); 110 111 static void TestRoundTrippingAllUTF(void); 112 static void TestConv(const uint16_t in[], 113 int len, 114 const char* conv, 115 const char* lang, 116 char byteArr[], 117 int byteArrLen); 118 119 /* open a converter, using test data if it begins with '@' */ 120 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); 121 122 123 #define NEW_MAX_BUFFER 999 124 125 static int32_t gInBufferSize = NEW_MAX_BUFFER; 126 static int32_t gOutBufferSize = NEW_MAX_BUFFER; 127 static char gNuConvTestName[1024]; 128 129 #define nct_min(x,y) ((x<y) ? x : y) 130 131 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) 132 { 133 if(cnv && cnv[0] == '@') { 134 return ucnv_openPackage(loadTestData(err), cnv+1, err); 135 } else { 136 return ucnv_open(cnv, err); 137 } 138 } 139 140 static void printSeq(const unsigned char* a, int len) 141 { 142 int i=0; 143 log_verbose("{"); 144 while (i<len) 145 log_verbose("0x%02x ", a[i++]); 146 log_verbose("}\n"); 147 } 148 149 static void printUSeq(const UChar* a, int len) 150 { 151 int i=0; 152 log_verbose("{U+"); 153 while (i<len) log_verbose("0x%04x ", a[i++]); 154 log_verbose("}\n"); 155 } 156 157 static void printSeqErr(const unsigned char* a, int len) 158 { 159 int i=0; 160 fprintf(stderr, "{"); 161 while (i<len) 162 fprintf(stderr, "0x%02x ", a[i++]); 163 fprintf(stderr, "}\n"); 164 } 165 166 static void printUSeqErr(const UChar* a, int len) 167 { 168 int i=0; 169 fprintf(stderr, "{U+"); 170 while (i<len) 171 fprintf(stderr, "0x%04x ", a[i++]); 172 fprintf(stderr,"}\n"); 173 } 174 175 static void 176 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message) 177 { 178 const char* s0; 179 const char* s=(char*)source; 180 const int32_t *r=results; 181 UErrorCode errorCode=U_ZERO_ERROR; 182 UChar32 c; 183 184 while(s<limit) { 185 s0=s; 186 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 187 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 188 break; /* no more significant input */ 189 } else if(U_FAILURE(errorCode)) { 190 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 191 break; 192 } else if( 193 /* test the expected number of input bytes only if >=0 */ 194 (*r>=0 && (int32_t)(s-s0)!=*r) || 195 c!=*(r+1) 196 ) { 197 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 198 message, c, (s-s0), *(r+1), *r); 199 break; 200 } 201 r+=2; 202 } 203 } 204 205 static void 206 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message) 207 { 208 const char* s=(char*)source; 209 UErrorCode errorCode=U_ZERO_ERROR; 210 uint32_t c; 211 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 212 if(errorCode != expected){ 213 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode)); 214 } 215 if(c != 0xFFFD && c != 0xffff){ 216 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c); 217 } 218 219 } 220 221 static void TestInBufSizes(void) 222 { 223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); 224 #if 1 225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); 226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); 227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); 228 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); 229 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); 230 TestNewConvertWithBufferSizes(1,1); 231 TestNewConvertWithBufferSizes(2,3); 232 TestNewConvertWithBufferSizes(3,2); 233 #endif 234 } 235 236 static void TestOutBufSizes(void) 237 { 238 #if 1 239 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); 240 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); 241 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); 242 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); 243 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); 244 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); 245 246 #endif 247 } 248 249 250 void addTestNewConvert(TestNode** root) 251 { 252 #if !UCONFIG_NO_FILE_IO 253 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); 254 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); 255 #endif 256 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); 257 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); 258 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); 259 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); 260 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); 261 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); 262 263 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ 264 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); 265 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); 266 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); 267 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); 268 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); 269 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); 270 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); 271 272 #if !UCONFIG_NO_LEGACY_CONVERSION 273 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); 274 #endif 275 276 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); 277 278 #if !UCONFIG_NO_LEGACY_CONVERSION 279 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); 280 #if !UCONFIG_NO_FILE_IO 281 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); 282 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); 283 #endif 284 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); 285 286 #ifdef U_ENABLE_GENERIC_ISO_2022 287 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); 288 #endif 289 290 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); 291 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); 292 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); 293 // android-changed (no have ISO_2022_JP_2) -- addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); 294 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); 295 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); 296 // android-changed (no ISO-2022-CN) -- addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); 297 /* 298 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 299 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); 300 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); 301 */ 302 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); 303 #endif 304 305 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); 306 307 #if !UCONFIG_NO_LEGACY_CONVERSION 308 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); 309 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); 310 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); 311 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); 312 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); 313 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); 314 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); 315 #if !UCONFIG_NO_COLLATION 316 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); 317 #endif 318 319 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); 320 #endif 321 322 323 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 324 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); 325 #endif 326 327 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); 328 329 #if !UCONFIG_NO_LEGACY_CONVERSION 330 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); 331 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); 332 // android-removed (no full ISO2022 CJK tables) -- addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); 333 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth"); 334 #endif 335 } 336 337 338 /* Note that this test already makes use of statics, so it's not really 339 multithread safe. 340 This convenience function lets us make the error messages actually useful. 341 */ 342 343 static void setNuConvTestName(const char *codepage, const char *direction) 344 { 345 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 346 codepage, 347 direction, 348 (int)gInBufferSize, 349 (int)gOutBufferSize); 350 } 351 352 typedef enum 353 { 354 TC_OK = 0, /* test was OK */ 355 TC_MISMATCH = 1, /* Match failed - err was printed */ 356 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */ 357 } ETestConvertResult; 358 359 /* Note: This function uses global variables and it will not do offset 360 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 361 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 362 const char *codepage, const int32_t *expectOffsets , UBool useFallback) 363 { 364 UErrorCode status = U_ZERO_ERROR; 365 UConverter *conv = 0; 366 char junkout[NEW_MAX_BUFFER]; /* FIX */ 367 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 368 char *p; 369 const UChar *src; 370 char *end; 371 char *targ; 372 int32_t *offs; 373 int i; 374 int32_t realBufferSize; 375 char *realBufferEnd; 376 const UChar *realSourceEnd; 377 const UChar *sourceLimit; 378 UBool checkOffsets = TRUE; 379 UBool doFlush; 380 381 for(i=0;i<NEW_MAX_BUFFER;i++) 382 junkout[i] = (char)0xF0; 383 for(i=0;i<NEW_MAX_BUFFER;i++) 384 junokout[i] = 0xFF; 385 386 setNuConvTestName(codepage, "FROM"); 387 388 log_verbose("\n========= %s\n", gNuConvTestName); 389 390 conv = my_ucnv_open(codepage, &status); 391 392 if(U_FAILURE(status)) 393 { 394 log_data_err("Couldn't open converter %s\n",codepage); 395 return TC_FAIL; 396 } 397 if(useFallback){ 398 ucnv_setFallback(conv,useFallback); 399 } 400 401 log_verbose("Converter opened..\n"); 402 403 src = source; 404 targ = junkout; 405 offs = junokout; 406 407 realBufferSize = UPRV_LENGTHOF(junkout); 408 realBufferEnd = junkout + realBufferSize; 409 realSourceEnd = source + sourceLen; 410 411 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 412 checkOffsets = FALSE; 413 414 do 415 { 416 end = nct_min(targ + gOutBufferSize, realBufferEnd); 417 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 418 419 doFlush = (UBool)(sourceLimit == realSourceEnd); 420 421 if(targ == realBufferEnd) { 422 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 423 return TC_FAIL; 424 } 425 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 426 427 428 status = U_ZERO_ERROR; 429 430 ucnv_fromUnicode (conv, 431 &targ, 432 end, 433 &src, 434 sourceLimit, 435 checkOffsets ? offs : NULL, 436 doFlush, /* flush if we're at the end of the input data */ 437 &status); 438 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 439 440 if(U_FAILURE(status)) { 441 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 442 return TC_FAIL; 443 } 444 445 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 446 sourceLen, targ-junkout); 447 448 if(getTestOption(VERBOSITY_OPTION)) 449 { 450 char junk[9999]; 451 char offset_str[9999]; 452 char *ptr; 453 454 junk[0] = 0; 455 offset_str[0] = 0; 456 for(ptr = junkout;ptr<targ;ptr++) { 457 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); 458 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout])); 459 } 460 461 log_verbose(junk); 462 printSeq((const uint8_t *)expect, expectLen); 463 if ( checkOffsets ) { 464 log_verbose("\nOffsets:"); 465 log_verbose(offset_str); 466 } 467 log_verbose("\n"); 468 } 469 ucnv_close(conv); 470 471 if(expectLen != targ-junkout) { 472 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 473 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 474 fprintf(stderr, "Got:\n"); 475 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 476 fprintf(stderr, "Expected:\n"); 477 printSeqErr((const unsigned char*)expect, expectLen); 478 return TC_MISMATCH; 479 } 480 481 if (checkOffsets && (expectOffsets != 0) ) { 482 log_verbose("comparing %d offsets..\n", targ-junkout); 483 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 484 log_err("did not get the expected offsets. %s\n", gNuConvTestName); 485 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 486 log_err("\n"); 487 log_err("Got : "); 488 for(p=junkout;p<targ;p++) { 489 log_err("%d,", junokout[p-junkout]); 490 } 491 log_err("\n"); 492 log_err("Expected: "); 493 for(i=0; i<(targ-junkout); i++) { 494 log_err("%d,", expectOffsets[i]); 495 } 496 log_err("\n"); 497 } 498 } 499 500 log_verbose("comparing..\n"); 501 if(!memcmp(junkout, expect, expectLen)) { 502 log_verbose("Matches!\n"); 503 return TC_OK; 504 } else { 505 log_err("String does not match u->%s\n", gNuConvTestName); 506 printUSeqErr(source, sourceLen); 507 fprintf(stderr, "Got:\n"); 508 printSeqErr((const unsigned char *)junkout, expectLen); 509 fprintf(stderr, "Expected:\n"); 510 printSeqErr((const unsigned char *)expect, expectLen); 511 512 return TC_MISMATCH; 513 } 514 } 515 516 /* Note: This function uses global variables and it will not do offset 517 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 518 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 519 const char *codepage, const int32_t *expectOffsets, UBool useFallback) 520 { 521 UErrorCode status = U_ZERO_ERROR; 522 UConverter *conv = 0; 523 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 524 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 525 const char *src; 526 const char *realSourceEnd; 527 const char *srcLimit; 528 UChar *p; 529 UChar *targ; 530 UChar *end; 531 int32_t *offs; 532 int i; 533 UBool checkOffsets = TRUE; 534 535 int32_t realBufferSize; 536 UChar *realBufferEnd; 537 538 539 for(i=0;i<NEW_MAX_BUFFER;i++) 540 junkout[i] = 0xFFFE; 541 542 for(i=0;i<NEW_MAX_BUFFER;i++) 543 junokout[i] = -1; 544 545 setNuConvTestName(codepage, "TO"); 546 547 log_verbose("\n========= %s\n", gNuConvTestName); 548 549 conv = my_ucnv_open(codepage, &status); 550 551 if(U_FAILURE(status)) 552 { 553 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 554 return TC_FAIL; 555 } 556 if(useFallback){ 557 ucnv_setFallback(conv,useFallback); 558 } 559 log_verbose("Converter opened..\n"); 560 561 src = (const char *)source; 562 targ = junkout; 563 offs = junokout; 564 565 realBufferSize = UPRV_LENGTHOF(junkout); 566 realBufferEnd = junkout + realBufferSize; 567 realSourceEnd = src + sourcelen; 568 569 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 570 checkOffsets = FALSE; 571 572 do 573 { 574 end = nct_min( targ + gOutBufferSize, realBufferEnd); 575 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 576 577 if(targ == realBufferEnd) 578 { 579 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); 580 return TC_FAIL; 581 } 582 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 583 584 /* oldTarg = targ; */ 585 586 status = U_ZERO_ERROR; 587 588 ucnv_toUnicode (conv, 589 &targ, 590 end, 591 &src, 592 srcLimit, 593 checkOffsets ? offs : NULL, 594 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ 595 &status); 596 597 /* offs += (targ-oldTarg); */ 598 599 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 600 601 if(U_FAILURE(status)) 602 { 603 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 604 return TC_FAIL; 605 } 606 607 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 608 sourcelen, targ-junkout); 609 if(getTestOption(VERBOSITY_OPTION)) 610 { 611 char junk[9999]; 612 char offset_str[9999]; 613 UChar *ptr; 614 615 junk[0] = 0; 616 offset_str[0] = 0; 617 618 for(ptr = junkout;ptr<targ;ptr++) 619 { 620 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 621 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 622 } 623 624 log_verbose(junk); 625 printUSeq(expect, expectlen); 626 if ( checkOffsets ) 627 { 628 log_verbose("\nOffsets:"); 629 log_verbose(offset_str); 630 } 631 log_verbose("\n"); 632 } 633 ucnv_close(conv); 634 635 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 636 637 if (checkOffsets && (expectOffsets != 0)) 638 { 639 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 640 log_err("did not get the expected offsets. %s\n",gNuConvTestName); 641 log_err("Got: "); 642 for(p=junkout;p<targ;p++) { 643 log_err("%d,", junokout[p-junkout]); 644 } 645 log_err("\n"); 646 log_err("Expected: "); 647 for(i=0; i<(targ-junkout); i++) { 648 log_err("%d,", expectOffsets[i]); 649 } 650 log_err("\n"); 651 log_err("output: "); 652 for(i=0; i<(targ-junkout); i++) { 653 log_err("%X,", junkout[i]); 654 } 655 log_err("\n"); 656 log_err("input: "); 657 for(i=0; i<(src-(const char *)source); i++) { 658 log_err("%X,", (unsigned char)source[i]); 659 } 660 log_err("\n"); 661 } 662 } 663 664 if(!memcmp(junkout, expect, expectlen*2)) 665 { 666 log_verbose("Matches!\n"); 667 return TC_OK; 668 } 669 else 670 { 671 log_err("String does not match. %s\n", gNuConvTestName); 672 log_verbose("String does not match. %s\n", gNuConvTestName); 673 printf("\nGot:"); 674 printUSeqErr(junkout, expectlen); 675 printf("\nExpected:"); 676 printUSeqErr(expect, expectlen); 677 return TC_MISMATCH; 678 } 679 } 680 681 682 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 683 { 684 /** test chars #1 */ 685 /* 1 2 3 1Han 2Han 3Han . */ 686 static const UChar sampleText[] = 687 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 }; 688 static const UChar sampleTextRoundTripUnmappable[] = 689 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; 690 691 692 static const uint8_t expectedUTF8[] = 693 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; 694 static const int32_t toUTF8Offs[] = 695 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; 696 static const int32_t fmUTF8Offs[] = 697 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e }; 698 699 #ifdef U_ENABLE_GENERIC_ISO_2022 700 /* Same as UTF8, but with ^[%B preceeding */ 701 static const const uint8_t expectedISO2022[] = 702 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 703 static const int32_t toISO2022Offs[] = 704 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 705 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ 706 static const int32_t fmISO2022Offs[] = 707 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */ 708 #endif 709 710 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ 711 static const uint8_t expectedIBM930[] = 712 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f }; 713 static const int32_t toIBM930Offs[] = 714 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 }; 715 static const int32_t fmIBM930Offs[] = 716 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; 717 718 /* 1 2 3 0 h1 h2 h3 . MBCS*/ 719 static const uint8_t expectedIBM943[] = 720 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc }; 721 static const int32_t toIBM943Offs [] = 722 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 }; 723 static const int32_t fmIBM943Offs[] = 724 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; 725 726 /* 1 2 3 0 h1 h2 h3 . DBCS*/ 727 static const uint8_t expectedIBM9027[] = 728 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; 729 static const int32_t toIBM9027Offs [] = 730 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; 731 732 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 733 static const uint8_t expectedIBM920[] = 734 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; 735 static const int32_t toIBM920Offs [] = 736 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 737 738 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 739 static const uint8_t expectedISO88593[] = 740 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 741 static const int32_t toISO88593Offs[] = 742 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 743 744 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ 745 static const uint8_t expectedLATIN1[] = 746 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 747 static const int32_t toLATIN1Offs[] = 748 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 749 750 751 /* etc */ 752 static const uint8_t expectedUTF16BE[] = 753 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; 754 static const int32_t toUTF16BEOffs[]= 755 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 756 static const int32_t fmUTF16BEOffs[] = 757 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 758 759 static const uint8_t expectedUTF16LE[] = 760 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; 761 static const int32_t toUTF16LEOffs[]= 762 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 763 static const int32_t fmUTF16LEOffs[] = 764 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 765 766 static const uint8_t expectedUTF32BE[] = 767 { 0x00, 0x00, 0x00, 0x31, 768 0x00, 0x00, 0x00, 0x32, 769 0x00, 0x00, 0x00, 0x33, 770 0x00, 0x00, 0x00, 0x00, 771 0x00, 0x00, 0x4e, 0x00, 772 0x00, 0x00, 0x4e, 0x8c, 773 0x00, 0x00, 0x4e, 0x09, 774 0x00, 0x00, 0x00, 0x2e, 775 0x00, 0x02, 0x00, 0x21 }; 776 static const int32_t toUTF32BEOffs[]= 777 { 0x00, 0x00, 0x00, 0x00, 778 0x01, 0x01, 0x01, 0x01, 779 0x02, 0x02, 0x02, 0x02, 780 0x03, 0x03, 0x03, 0x03, 781 0x04, 0x04, 0x04, 0x04, 782 0x05, 0x05, 0x05, 0x05, 783 0x06, 0x06, 0x06, 0x06, 784 0x07, 0x07, 0x07, 0x07, 785 0x08, 0x08, 0x08, 0x08, 786 0x08, 0x08, 0x08, 0x08 }; 787 static const int32_t fmUTF32BEOffs[] = 788 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 789 790 static const uint8_t expectedUTF32LE[] = 791 { 0x31, 0x00, 0x00, 0x00, 792 0x32, 0x00, 0x00, 0x00, 793 0x33, 0x00, 0x00, 0x00, 794 0x00, 0x00, 0x00, 0x00, 795 0x00, 0x4e, 0x00, 0x00, 796 0x8c, 0x4e, 0x00, 0x00, 797 0x09, 0x4e, 0x00, 0x00, 798 0x2e, 0x00, 0x00, 0x00, 799 0x21, 0x00, 0x02, 0x00 }; 800 static const int32_t toUTF32LEOffs[]= 801 { 0x00, 0x00, 0x00, 0x00, 802 0x01, 0x01, 0x01, 0x01, 803 0x02, 0x02, 0x02, 0x02, 804 0x03, 0x03, 0x03, 0x03, 805 0x04, 0x04, 0x04, 0x04, 806 0x05, 0x05, 0x05, 0x05, 807 0x06, 0x06, 0x06, 0x06, 808 0x07, 0x07, 0x07, 0x07, 809 0x08, 0x08, 0x08, 0x08, 810 0x08, 0x08, 0x08, 0x08 }; 811 static const int32_t fmUTF32LEOffs[] = 812 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 813 814 815 816 817 /** Test chars #2 **/ 818 819 /* Sahha [health], slashed h's */ 820 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; 821 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; 822 823 /* LMBCS */ 824 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; 825 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; 826 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; 827 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; 828 /*********************************** START OF CODE finally *************/ 829 830 gInBufferSize = insize; 831 gOutBufferSize = outsize; 832 833 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); 834 835 836 /*UTF-8*/ 837 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 838 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); 839 840 log_verbose("Test surrogate behaviour for UTF8\n"); 841 { 842 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; 843 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, 844 0xf0, 0x90, 0x90, 0x81, 845 0xef, 0xbf, 0xbd 846 }; 847 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; 848 testConvertFromU(testinput, UPRV_LENGTHOF(testinput), 849 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); 850 851 852 } 853 854 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 855 /*ISO-2022*/ 856 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 857 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); 858 #endif 859 860 /*UTF16 LE*/ 861 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 862 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); 863 /*UTF16 BE*/ 864 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 865 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); 866 /*UTF32 LE*/ 867 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 868 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); 869 /*UTF32 BE*/ 870 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 871 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); 872 873 /*LATIN_1*/ 874 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 875 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); 876 877 #if !UCONFIG_NO_LEGACY_CONVERSION 878 /*EBCDIC_STATEFUL*/ 879 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 880 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); 881 882 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 883 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 884 885 /*MBCS*/ 886 887 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 888 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); 889 /*DBCS*/ 890 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 891 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); 892 /*SBCS*/ 893 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 894 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); 895 /*SBCS*/ 896 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText), 897 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 898 #endif 899 900 901 /****/ 902 903 /*UTF-8*/ 904 testConvertToU(expectedUTF8, sizeof(expectedUTF8), 905 sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,FALSE); 906 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 907 /*ISO-2022*/ 908 testConvertToU(expectedISO2022, sizeof(expectedISO2022), 909 sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,FALSE); 910 #endif 911 912 /*UTF16 LE*/ 913 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 914 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE); 915 /*UTF16 BE*/ 916 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), 917 sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,FALSE); 918 /*UTF32 LE*/ 919 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), 920 sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,FALSE); 921 /*UTF32 BE*/ 922 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), 923 sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,FALSE); 924 925 #if !UCONFIG_NO_LEGACY_CONVERSION 926 /*EBCDIC_STATEFUL*/ 927 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable, 928 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,FALSE); 929 /*MBCS*/ 930 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable, 931 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,FALSE); 932 #endif 933 934 /* Try it again to make sure it still works */ 935 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 936 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE); 937 938 #if !UCONFIG_NO_LEGACY_CONVERSION 939 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), 940 malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,FALSE); 941 942 testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars), 943 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); 944 945 /*LMBCS*/ 946 testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), 947 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); 948 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), 949 LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,FALSE); 950 #endif 951 952 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ 953 { 954 /* encode directly set D and set O */ 955 static const uint8_t utf7[] = { 956 /* 957 Hi Mom -+Jjo--! 958 A+ImIDkQ. 959 +- 960 +ZeVnLIqe- 961 */ 962 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 963 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 964 0x2b, 0x2d, 965 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d 966 }; 967 static const UChar unicode[] = { 968 /* 969 Hi Mom -<WHITE SMILING FACE>-! 970 A<NOT IDENTICAL TO><ALPHA>. 971 + 972 [Japanese word "nihongo"] 973 */ 974 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 975 0x41, 0x2262, 0x0391, 0x2e, 976 0x2b, 977 0x65e5, 0x672c, 0x8a9e 978 }; 979 static const int32_t toUnicodeOffsets[] = { 980 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 981 15, 17, 19, 23, 982 24, 983 27, 29, 32 984 }; 985 static const int32_t fromUnicodeOffsets[] = { 986 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 987 11, 12, 12, 12, 13, 13, 13, 13, 14, 988 15, 15, 989 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 990 }; 991 992 /* same but escaping set O (the exclamation mark) */ 993 static const uint8_t utf7Restricted[] = { 994 /* 995 Hi Mom -+Jjo--+ACE- 996 A+ImIDkQ. 997 +- 998 +ZeVnLIqe- 999 */ 1000 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, 1001 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 1002 0x2b, 0x2d, 1003 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d 1004 }; 1005 static const int32_t toUnicodeOffsetsR[] = { 1006 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, 1007 19, 21, 23, 27, 1008 28, 1009 31, 33, 36 1010 }; 1011 static const int32_t fromUnicodeOffsetsR[] = { 1012 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, 1013 11, 12, 12, 12, 13, 13, 13, 13, 14, 1014 15, 15, 1015 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 1016 }; 1017 1018 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); 1019 1020 testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,FALSE); 1021 1022 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); 1023 1024 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,FALSE); 1025 } 1026 1027 /* 1028 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, 1029 * modified according to RFC 2060, 1030 * and supplemented with the one example in RFC 2060 itself. 1031 */ 1032 { 1033 static const uint8_t imap[] = { 1034 /* Hi Mom -&Jjo--! 1035 A&ImIDkQ-. 1036 &- 1037 &ZeVnLIqe- 1038 \ 1039 ~peter 1040 /mail 1041 /&ZeVnLIqe- 1042 /&U,BTFw- 1043 */ 1044 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 1045 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, 1046 0x26, 0x2d, 1047 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1048 0x5c, 1049 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1050 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1051 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1052 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d 1053 }; 1054 static const UChar unicode[] = { 1055 /* Hi Mom -<WHITE SMILING FACE>-! 1056 A<NOT IDENTICAL TO><ALPHA>. 1057 & 1058 [Japanese word "nihongo"] 1059 \ 1060 ~peter 1061 /mail 1062 /<65e5, 672c, 8a9e> 1063 /<53f0, 5317> 1064 */ 1065 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 1066 0x41, 0x2262, 0x0391, 0x2e, 1067 0x26, 1068 0x65e5, 0x672c, 0x8a9e, 1069 0x5c, 1070 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1071 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1072 0x2f, 0x65e5, 0x672c, 0x8a9e, 1073 0x2f, 0x53f0, 0x5317 1074 }; 1075 static const int32_t toUnicodeOffsets[] = { 1076 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 1077 15, 17, 19, 24, 1078 25, 1079 28, 30, 33, 1080 37, 1081 38, 39, 40, 41, 42, 43, 1082 44, 45, 46, 47, 48, 1083 49, 51, 53, 56, 1084 60, 62, 64 1085 }; 1086 static const int32_t fromUnicodeOffsets[] = { 1087 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 1088 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, 1089 15, 15, 1090 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 1091 19, 1092 20, 21, 22, 23, 24, 25, 1093 26, 27, 28, 29, 30, 1094 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 1095 35, 36, 36, 36, 37, 37, 37, 37, 37 1096 }; 1097 1098 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); 1099 1100 testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,FALSE); 1101 } 1102 1103 /* Test UTF-8 bad data handling*/ 1104 { 1105 static const uint8_t utf8[]={ 1106 0x61, 1107 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1108 0x00, 1109 0x62, 1110 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1111 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1112 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ 1113 0xdf, 0xbf, /* 7ff */ 1114 0xbf, /* truncated tail */ 1115 0xf4, 0x90, 0x80, 0x80, /* 110000 */ 1116 0x02 1117 }; 1118 1119 static const uint16_t utf8Expected[]={ 1120 0x0061, 1121 0xfffd, 0xfffd, 0xfffd, 0xfffd, 1122 0x0000, 1123 0x0062, 1124 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 1125 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 1126 0xdbff, 0xdfff, 1127 0x07ff, 1128 0xfffd, 1129 0xfffd, 0xfffd, 0xfffd, 0xfffd, 1130 0x0002 1131 }; 1132 1133 static const int32_t utf8Offsets[]={ 1134 0, 1135 1, 2, 3, 4, 1136 5, 1137 6, 1138 7, 8, 9, 10, 11, 1139 12, 13, 14, 15, 16, 1140 17, 17, 1141 21, 1142 23, 1143 24, 25, 26, 27, 1144 28 1145 }; 1146 testConvertToU(utf8, sizeof(utf8), 1147 utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,FALSE); 1148 1149 } 1150 1151 /* Test UTF-32BE bad data handling*/ 1152 { 1153 static const uint8_t utf32[]={ 1154 0x00, 0x00, 0x00, 0x61, 1155 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 1156 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1157 0x00, 0x00, 0x00, 0x62, 1158 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1159 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 1160 0x00, 0x00, 0x01, 0x62, 1161 0x00, 0x00, 0x02, 0x62 1162 }; 1163 static const uint16_t utf32Expected[]={ 1164 0x0061, 1165 0xfffd, /* 0x110000 out of range */ 1166 0xDBFF, /* 0x10FFFF in range */ 1167 0xDFFF, 1168 0x0062, 1169 0xfffd, /* 0xffffffff out of range */ 1170 0xfffd, /* 0x7fffffff out of range */ 1171 0x0162, 1172 0x0262 1173 }; 1174 static const int32_t utf32Offsets[]={ 1175 0, 4, 8, 8, 12, 16, 20, 24, 28 1176 }; 1177 static const uint8_t utf32ExpectedBack[]={ 1178 0x00, 0x00, 0x00, 0x61, 1179 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ 1180 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1181 0x00, 0x00, 0x00, 0x62, 1182 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ 1183 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ 1184 0x00, 0x00, 0x01, 0x62, 1185 0x00, 0x00, 0x02, 0x62 1186 }; 1187 static const int32_t utf32OffsetsBack[]={ 1188 0,0,0,0, 1189 1,1,1,1, 1190 2,2,2,2, 1191 4,4,4,4, 1192 5,5,5,5, 1193 6,6,6,6, 1194 7,7,7,7, 1195 8,8,8,8 1196 }; 1197 1198 testConvertToU(utf32, sizeof(utf32), 1199 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,FALSE); 1200 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected), 1201 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE); 1202 } 1203 1204 /* Test UTF-32LE bad data handling*/ 1205 { 1206 static const uint8_t utf32[]={ 1207 0x61, 0x00, 0x00, 0x00, 1208 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 1209 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1210 0x62, 0x00, 0x00, 0x00, 1211 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1212 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 1213 0x62, 0x01, 0x00, 0x00, 1214 0x62, 0x02, 0x00, 0x00, 1215 }; 1216 1217 static const uint16_t utf32Expected[]={ 1218 0x0061, 1219 0xfffd, /* 0x110000 out of range */ 1220 0xDBFF, /* 0x10FFFF in range */ 1221 0xDFFF, 1222 0x0062, 1223 0xfffd, /* 0xffffffff out of range */ 1224 0xfffd, /* 0x7fffffff out of range */ 1225 0x0162, 1226 0x0262 1227 }; 1228 static const int32_t utf32Offsets[]={ 1229 0, 4, 8, 8, 12, 16, 20, 24, 28 1230 }; 1231 static const uint8_t utf32ExpectedBack[]={ 1232 0x61, 0x00, 0x00, 0x00, 1233 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ 1234 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1235 0x62, 0x00, 0x00, 0x00, 1236 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ 1237 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ 1238 0x62, 0x01, 0x00, 0x00, 1239 0x62, 0x02, 0x00, 0x00 1240 }; 1241 static const int32_t utf32OffsetsBack[]={ 1242 0,0,0,0, 1243 1,1,1,1, 1244 2,2,2,2, 1245 4,4,4,4, 1246 5,5,5,5, 1247 6,6,6,6, 1248 7,7,7,7, 1249 8,8,8,8 1250 }; 1251 testConvertToU(utf32, sizeof(utf32), 1252 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,FALSE ); 1253 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected), 1254 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE); 1255 } 1256 } 1257 1258 static void TestCoverageMBCS(){ 1259 #if 0 1260 UErrorCode status = U_ZERO_ERROR; 1261 const char *directory = loadTestData(&status); 1262 char* tdpath = NULL; 1263 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); 1264 int len = strlen(directory); 1265 char* index=NULL; 1266 1267 tdpath = (char*) malloc(sizeof(char) * (len * 2)); 1268 uprv_strcpy(saveDirectory,u_getDataDirectory()); 1269 log_verbose("Retrieved data directory %s \n",saveDirectory); 1270 uprv_strcpy(tdpath,directory); 1271 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); 1272 1273 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ 1274 *(index+1)=0; 1275 } 1276 u_setDataDirectory(tdpath); 1277 log_verbose("ICU data directory is set to: %s \n" ,tdpath); 1278 #endif 1279 1280 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm 1281 which is test file for MBCS conversion with single-byte codepage data.*/ 1282 { 1283 1284 /* MBCS with single byte codepage data test1.ucm*/ 1285 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; 1286 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; 1287 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; 1288 1289 /*from Unicode*/ 1290 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput), 1291 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); 1292 } 1293 1294 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm 1295 which is test file for MBCS conversion with three-byte codepage data.*/ 1296 { 1297 1298 /* MBCS with three byte codepage data test3.ucm*/ 1299 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1300 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; 1301 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; 1302 1303 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; 1304 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1305 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; 1306 1307 /*from Unicode*/ 1308 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput), 1309 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); 1310 1311 /*to Unicode*/ 1312 testConvertToU(test3input, sizeof(test3input), 1313 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,FALSE); 1314 1315 } 1316 1317 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm 1318 which is test file for MBCS conversion with four-byte codepage data.*/ 1319 { 1320 1321 /* MBCS with three byte codepage data test4.ucm*/ 1322 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1323 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; 1324 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; 1325 1326 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; 1327 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1328 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; 1329 1330 /*from Unicode*/ 1331 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput), 1332 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); 1333 1334 /*to Unicode*/ 1335 testConvertToU(test4input, sizeof(test4input), 1336 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,FALSE ); 1337 1338 } 1339 #if 0 1340 free(tdpath); 1341 /* restore the original data directory */ 1342 log_verbose("Setting the data directory to %s \n", saveDirectory); 1343 u_setDataDirectory(saveDirectory); 1344 free(saveDirectory); 1345 #endif 1346 1347 } 1348 1349 static void TestConverterType(const char *convName, UConverterType convType) { 1350 UConverter* myConverter; 1351 UErrorCode err = U_ZERO_ERROR; 1352 1353 myConverter = my_ucnv_open(convName, &err); 1354 1355 if (U_FAILURE(err)) { 1356 log_data_err("Failed to create an %s converter\n", convName); 1357 return; 1358 } 1359 else 1360 { 1361 if (ucnv_getType(myConverter)!=convType) { 1362 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", 1363 convName, convType); 1364 } 1365 else { 1366 log_verbose("ucnv_getType %s ok\n", convName); 1367 } 1368 } 1369 ucnv_close(myConverter); 1370 } 1371 1372 static void TestConverterTypesAndStarters() 1373 { 1374 #if !UCONFIG_NO_LEGACY_CONVERSION 1375 UConverter* myConverter; 1376 UErrorCode err = U_ZERO_ERROR; 1377 UBool mystarters[256]; 1378 1379 /* const UBool expectedKSCstarters[256] = { 1380 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1383 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1384 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1385 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1386 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1387 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1388 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1389 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1390 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1391 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1392 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1393 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1394 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1395 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1396 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1397 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, 1398 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1399 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1400 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1401 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1402 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1403 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1404 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1405 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ 1406 1407 1408 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); 1409 1410 myConverter = ucnv_open("ksc", &err); 1411 if (U_FAILURE(err)) { 1412 log_data_err("Failed to create an ibm-ksc converter\n"); 1413 return; 1414 } 1415 else 1416 { 1417 if (ucnv_getType(myConverter)!=UCNV_MBCS) 1418 log_err("ucnv_getType Failed for ibm-949\n"); 1419 else 1420 log_verbose("ucnv_getType ibm-949 ok\n"); 1421 1422 if(myConverter!=NULL) 1423 ucnv_getStarters(myConverter, mystarters, &err); 1424 1425 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) 1426 log_err("Failed ucnv_getStarters for ksc\n"); 1427 else 1428 log_verbose("ucnv_getStarters ok\n");*/ 1429 1430 } 1431 ucnv_close(myConverter); 1432 1433 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); 1434 TestConverterType("ibm-878", UCNV_SBCS); 1435 #endif 1436 1437 TestConverterType("iso-8859-1", UCNV_LATIN_1); 1438 1439 TestConverterType("ibm-1208", UCNV_UTF8); 1440 1441 TestConverterType("utf-8", UCNV_UTF8); 1442 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); 1443 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); 1444 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); 1445 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); 1446 1447 #if !UCONFIG_NO_LEGACY_CONVERSION 1448 1449 #if defined(U_ENABLE_GENERIC_ISO_2022) 1450 TestConverterType("iso-2022", UCNV_ISO_2022); 1451 #endif 1452 1453 TestConverterType("hz", UCNV_HZ); 1454 #endif 1455 1456 TestConverterType("scsu", UCNV_SCSU); 1457 1458 #if !UCONFIG_NO_LEGACY_CONVERSION 1459 TestConverterType("x-iscii-de", UCNV_ISCII); 1460 #endif 1461 1462 TestConverterType("ascii", UCNV_US_ASCII); 1463 TestConverterType("utf-7", UCNV_UTF7); 1464 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); 1465 TestConverterType("bocu-1", UCNV_BOCU1); 1466 } 1467 1468 static void 1469 TestAmbiguousConverter(UConverter *cnv) { 1470 static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; 1471 UChar outUnicode[20]={ 0, 0, 0, 0 }; 1472 1473 const char *s; 1474 UChar *u; 1475 UErrorCode errorCode; 1476 UBool isAmbiguous; 1477 1478 /* try to convert an 'a', a square bracket and a US-ASCII backslash */ 1479 errorCode=U_ZERO_ERROR; 1480 s=inBytes; 1481 u=outUnicode; 1482 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); 1483 if(U_FAILURE(errorCode)) { 1484 /* we do not care about general failures in this test; the input may just not be mappable */ 1485 return; 1486 } 1487 1488 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { 1489 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ 1490 /* There are some encodings that are partially ASCII based, 1491 like the ISO-7 and GSM series of codepages, which we ignore. */ 1492 return; 1493 } 1494 1495 isAmbiguous=ucnv_isAmbiguous(cnv); 1496 1497 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ 1498 if((outUnicode[2]!=0x5c)!=isAmbiguous) { 1499 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", 1500 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); 1501 return; 1502 } 1503 1504 if(outUnicode[2]!=0x5c) { 1505 /* needs fixup, fix it */ 1506 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); 1507 if(outUnicode[2]!=0x5c) { 1508 /* the fix failed */ 1509 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); 1510 return; 1511 } 1512 } 1513 } 1514 1515 static void TestAmbiguous() 1516 { 1517 UErrorCode status = U_ZERO_ERROR; 1518 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; 1519 static const char target[] = { 1520 /* "\\usr\\local\\share\\data\\icutest.txt" */ 1521 0x5c, 0x75, 0x73, 0x72, 1522 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 1523 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, 1524 0x5c, 0x64, 0x61, 0x74, 0x61, 1525 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, 1526 0 1527 }; 1528 UChar asciiResult[200], sjisResult[200]; 1529 int32_t /*asciiLength = 0,*/ sjisLength = 0, i; 1530 const char *name; 1531 1532 /* enumerate all converters */ 1533 status=U_ZERO_ERROR; 1534 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { 1535 cnv=ucnv_open(name, &status); 1536 if(U_SUCCESS(status)) { 1537 /* BEGIN android-changed. To save space Android does not build full ISO-2022-CN CJK tables. */ 1538 const char* cnvName = ucnv_getName(cnv, &status); 1539 if (strlen(cnvName) < 8 || 1540 strncmp(cnvName, "ISO_2022_CN", 8) != 0) { 1541 TestAmbiguousConverter(cnv); 1542 } 1543 /* END android-changed */ 1544 ucnv_close(cnv); 1545 } else { 1546 log_err("error: unable to open available converter \"%s\"\n", name); 1547 status=U_ZERO_ERROR; 1548 } 1549 } 1550 1551 #if !UCONFIG_NO_LEGACY_CONVERSION 1552 sjis_cnv = ucnv_open("ibm-943", &status); 1553 if (U_FAILURE(status)) 1554 { 1555 log_data_err("Failed to create a SJIS converter\n"); 1556 return; 1557 } 1558 ascii_cnv = ucnv_open("LATIN-1", &status); 1559 if (U_FAILURE(status)) 1560 { 1561 log_data_err("Failed to create a LATIN-1 converter\n"); 1562 ucnv_close(sjis_cnv); 1563 return; 1564 } 1565 /* convert target from SJIS to Unicode */ 1566 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status); 1567 if (U_FAILURE(status)) 1568 { 1569 log_err("Failed to convert the SJIS string.\n"); 1570 ucnv_close(sjis_cnv); 1571 ucnv_close(ascii_cnv); 1572 return; 1573 } 1574 /* convert target from Latin-1 to Unicode */ 1575 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status); 1576 if (U_FAILURE(status)) 1577 { 1578 log_err("Failed to convert the Latin-1 string.\n"); 1579 ucnv_close(sjis_cnv); 1580 ucnv_close(ascii_cnv); 1581 return; 1582 } 1583 if (!ucnv_isAmbiguous(sjis_cnv)) 1584 { 1585 log_err("SJIS converter should contain ambiguous character mappings.\n"); 1586 ucnv_close(sjis_cnv); 1587 ucnv_close(ascii_cnv); 1588 return; 1589 } 1590 if (u_strcmp(sjisResult, asciiResult) == 0) 1591 { 1592 log_err("File separators for SJIS don't need to be fixed.\n"); 1593 } 1594 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); 1595 if (u_strcmp(sjisResult, asciiResult) != 0) 1596 { 1597 log_err("Fixing file separator for SJIS failed.\n"); 1598 } 1599 ucnv_close(sjis_cnv); 1600 ucnv_close(ascii_cnv); 1601 #endif 1602 } 1603 1604 static void 1605 TestSignatureDetection(){ 1606 /* with null terminated strings */ 1607 { 1608 static const char* data[] = { 1609 "\xFE\xFF\x00\x00", /* UTF-16BE */ 1610 "\xFF\xFE\x00\x00", /* UTF-16LE */ 1611 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1612 "\x0E\xFE\xFF\x00", /* SCSU */ 1613 1614 "\xFE\xFF", /* UTF-16BE */ 1615 "\xFF\xFE", /* UTF-16LE */ 1616 "\xEF\xBB\xBF", /* UTF-8 */ 1617 "\x0E\xFE\xFF", /* SCSU */ 1618 1619 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1620 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1621 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1622 "\x0E\xFE\xFF\x41", /* SCSU */ 1623 1624 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ 1625 "\x2B\x2F\x76\x38\x41", /* UTF-7 */ 1626 "\x2B\x2F\x76\x39\x41", /* UTF-7 */ 1627 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ 1628 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ 1629 1630 "\xDD\x73\x66\x73" /* UTF-EBCDIC */ 1631 }; 1632 static const char* expected[] = { 1633 "UTF-16BE", 1634 "UTF-16LE", 1635 "UTF-8", 1636 "SCSU", 1637 1638 "UTF-16BE", 1639 "UTF-16LE", 1640 "UTF-8", 1641 "SCSU", 1642 1643 "UTF-16BE", 1644 "UTF-16LE", 1645 "UTF-8", 1646 "SCSU", 1647 1648 "UTF-7", 1649 "UTF-7", 1650 "UTF-7", 1651 "UTF-7", 1652 "UTF-7", 1653 "UTF-EBCDIC" 1654 }; 1655 static const int32_t expectedLength[] ={ 1656 2, 1657 2, 1658 3, 1659 3, 1660 1661 2, 1662 2, 1663 3, 1664 3, 1665 1666 2, 1667 2, 1668 3, 1669 3, 1670 1671 5, 1672 4, 1673 4, 1674 4, 1675 4, 1676 4 1677 }; 1678 int i=0; 1679 UErrorCode err; 1680 int32_t signatureLength = -1; 1681 const char* source = NULL; 1682 const char* enc = NULL; 1683 for( ; i<UPRV_LENGTHOF(data); i++){ 1684 err = U_ZERO_ERROR; 1685 source = data[i]; 1686 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err); 1687 if(U_FAILURE(err)){ 1688 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1689 continue; 1690 } 1691 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1692 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1693 continue; 1694 } 1695 if(signatureLength != expectedLength[i]){ 1696 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1697 } 1698 } 1699 } 1700 { 1701 static const char* data[] = { 1702 "\xFE\xFF\x00", /* UTF-16BE */ 1703 "\xFF\xFE\x00", /* UTF-16LE */ 1704 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1705 "\x0E\xFE\xFF\x00", /* SCSU */ 1706 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1707 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1708 "\xFE\xFF", /* UTF-16BE */ 1709 "\xFF\xFE", /* UTF-16LE */ 1710 "\xEF\xBB\xBF", /* UTF-8 */ 1711 "\x0E\xFE\xFF", /* SCSU */ 1712 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1713 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1714 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1715 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1716 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1717 "\x0E\xFE\xFF\x41", /* SCSU */ 1718 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ 1719 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ 1720 "\xFB\xEE\x28", /* BOCU-1 */ 1721 "\xFF\x41\x42" /* NULL */ 1722 }; 1723 static const int len[] = { 1724 3, 1725 3, 1726 4, 1727 4, 1728 4, 1729 4, 1730 2, 1731 2, 1732 3, 1733 3, 1734 4, 1735 4, 1736 4, 1737 4, 1738 4, 1739 4, 1740 5, 1741 5, 1742 3, 1743 3 1744 }; 1745 1746 static const char* expected[] = { 1747 "UTF-16BE", 1748 "UTF-16LE", 1749 "UTF-8", 1750 "SCSU", 1751 "UTF-32BE", 1752 "UTF-32LE", 1753 "UTF-16BE", 1754 "UTF-16LE", 1755 "UTF-8", 1756 "SCSU", 1757 "UTF-32BE", 1758 "UTF-32LE", 1759 "UTF-16BE", 1760 "UTF-16LE", 1761 "UTF-8", 1762 "SCSU", 1763 "UTF-32BE", 1764 "UTF-32LE", 1765 "BOCU-1", 1766 NULL 1767 }; 1768 static const int32_t expectedLength[] ={ 1769 2, 1770 2, 1771 3, 1772 3, 1773 4, 1774 4, 1775 2, 1776 2, 1777 3, 1778 3, 1779 4, 1780 4, 1781 2, 1782 2, 1783 3, 1784 3, 1785 4, 1786 4, 1787 3, 1788 0 1789 }; 1790 int i=0; 1791 UErrorCode err; 1792 int32_t signatureLength = -1; 1793 int32_t sourceLength=-1; 1794 const char* source = NULL; 1795 const char* enc = NULL; 1796 for( ; i<UPRV_LENGTHOF(data); i++){ 1797 err = U_ZERO_ERROR; 1798 source = data[i]; 1799 sourceLength = len[i]; 1800 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err); 1801 if(U_FAILURE(err)){ 1802 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1803 continue; 1804 } 1805 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1806 if(expected[i] !=NULL){ 1807 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1808 continue; 1809 } 1810 } 1811 if(signatureLength != expectedLength[i]){ 1812 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1813 } 1814 } 1815 } 1816 } 1817 1818 static void TestUTF7() { 1819 /* test input */ 1820 static const uint8_t in[]={ 1821 /* H - +Jjo- - ! +- +2AHcAQ */ 1822 0x48, 1823 0x2d, 1824 0x2b, 0x4a, 0x6a, 0x6f, 1825 0x2d, 0x2d, 1826 0x21, 1827 0x2b, 0x2d, 1828 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 1829 }; 1830 1831 /* expected test results */ 1832 static const int32_t results[]={ 1833 /* number of bytes read, code point */ 1834 1, 0x48, 1835 1, 0x2d, 1836 4, 0x263a, /* <WHITE SMILING FACE> */ 1837 2, 0x2d, 1838 1, 0x21, 1839 2, 0x2b, 1840 7, 0x10401 1841 }; 1842 1843 const char *cnvName; 1844 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1845 UErrorCode errorCode=U_ZERO_ERROR; 1846 UConverter *cnv=ucnv_open("UTF-7", &errorCode); 1847 if(U_FAILURE(errorCode)) { 1848 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); 1849 return; 1850 } 1851 TestNextUChar(cnv, source, limit, results, "UTF-7"); 1852 /* Test the condition when source >= sourceLimit */ 1853 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1854 cnvName = ucnv_getName(cnv, &errorCode); 1855 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { 1856 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1857 } 1858 ucnv_close(cnv); 1859 } 1860 1861 static void TestIMAP() { 1862 /* test input */ 1863 static const uint8_t in[]={ 1864 /* H - &Jjo- - ! &- &2AHcAQ- \ */ 1865 0x48, 1866 0x2d, 1867 0x26, 0x4a, 0x6a, 0x6f, 1868 0x2d, 0x2d, 1869 0x21, 1870 0x26, 0x2d, 1871 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d 1872 }; 1873 1874 /* expected test results */ 1875 static const int32_t results[]={ 1876 /* number of bytes read, code point */ 1877 1, 0x48, 1878 1, 0x2d, 1879 4, 0x263a, /* <WHITE SMILING FACE> */ 1880 2, 0x2d, 1881 1, 0x21, 1882 2, 0x26, 1883 7, 0x10401 1884 }; 1885 1886 const char *cnvName; 1887 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1888 UErrorCode errorCode=U_ZERO_ERROR; 1889 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); 1890 if(U_FAILURE(errorCode)) { 1891 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); 1892 return; 1893 } 1894 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); 1895 /* Test the condition when source >= sourceLimit */ 1896 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1897 cnvName = ucnv_getName(cnv, &errorCode); 1898 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { 1899 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1900 } 1901 ucnv_close(cnv); 1902 } 1903 1904 static void TestUTF8() { 1905 /* test input */ 1906 static const uint8_t in[]={ 1907 0x61, 1908 0xc2, 0x80, 1909 0xe0, 0xa0, 0x80, 1910 0xf0, 0x90, 0x80, 0x80, 1911 0xf4, 0x84, 0x8c, 0xa1, 1912 0xf0, 0x90, 0x90, 0x81 1913 }; 1914 1915 /* expected test results */ 1916 static const int32_t results[]={ 1917 /* number of bytes read, code point */ 1918 1, 0x61, 1919 2, 0x80, 1920 3, 0x800, 1921 4, 0x10000, 1922 4, 0x104321, 1923 4, 0x10401 1924 }; 1925 1926 /* error test input */ 1927 static const uint8_t in2[]={ 1928 0x61, 1929 0xc0, 0x80, /* illegal non-shortest form */ 1930 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1931 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1932 0xc0, 0xc0, /* illegal trail byte */ 1933 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1934 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1935 0xfe, /* illegal byte altogether */ 1936 0x62 1937 }; 1938 1939 /* expected error test results */ 1940 static const int32_t results2[]={ 1941 /* number of bytes read, code point */ 1942 1, 0x61, 1943 22, 0x62 1944 }; 1945 1946 UConverterToUCallback cb; 1947 const void *p; 1948 1949 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 1950 UErrorCode errorCode=U_ZERO_ERROR; 1951 UConverter *cnv=ucnv_open("UTF-8", &errorCode); 1952 if(U_FAILURE(errorCode)) { 1953 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); 1954 return; 1955 } 1956 TestNextUChar(cnv, source, limit, results, "UTF-8"); 1957 /* Test the condition when source >= sourceLimit */ 1958 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1959 1960 /* test error behavior with a skip callback */ 1961 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 1962 source=(const char *)in2; 1963 limit=(const char *)(in2+sizeof(in2)); 1964 TestNextUChar(cnv, source, limit, results2, "UTF-8"); 1965 1966 ucnv_close(cnv); 1967 } 1968 1969 static void TestCESU8() { 1970 /* test input */ 1971 static const uint8_t in[]={ 1972 0x61, 1973 0xc2, 0x80, 1974 0xe0, 0xa0, 0x80, 1975 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 1976 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, 1977 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1978 0xef, 0xbf, 0xbc 1979 }; 1980 1981 /* expected test results */ 1982 static const int32_t results[]={ 1983 /* number of bytes read, code point */ 1984 1, 0x61, 1985 2, 0x80, 1986 3, 0x800, 1987 6, 0x10000, 1988 3, 0xdc01, 1989 -1,0xd802, /* may read 3 or 6 bytes */ 1990 -1,0x10ffff,/* may read 0 or 3 bytes */ 1991 3, 0xfffc 1992 }; 1993 1994 /* error test input */ 1995 static const uint8_t in2[]={ 1996 0x61, 1997 0xc0, 0x80, /* illegal non-shortest form */ 1998 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1999 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 2000 0xc0, 0xc0, /* illegal trail byte */ 2001 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ 2002 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ 2003 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ 2004 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 2005 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 2006 0xfe, /* illegal byte altogether */ 2007 0x62 2008 }; 2009 2010 /* expected error test results */ 2011 static const int32_t results2[]={ 2012 /* number of bytes read, code point */ 2013 1, 0x61, 2014 34, 0x62 2015 }; 2016 2017 UConverterToUCallback cb; 2018 const void *p; 2019 2020 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 2021 UErrorCode errorCode=U_ZERO_ERROR; 2022 UConverter *cnv=ucnv_open("CESU-8", &errorCode); 2023 if(U_FAILURE(errorCode)) { 2024 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); 2025 return; 2026 } 2027 TestNextUChar(cnv, source, limit, results, "CESU-8"); 2028 /* Test the condition when source >= sourceLimit */ 2029 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2030 2031 /* test error behavior with a skip callback */ 2032 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2033 source=(const char *)in2; 2034 limit=(const char *)(in2+sizeof(in2)); 2035 TestNextUChar(cnv, source, limit, results2, "CESU-8"); 2036 2037 ucnv_close(cnv); 2038 } 2039 2040 static void TestUTF16() { 2041 /* test input */ 2042 static const uint8_t in1[]={ 2043 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff 2044 }; 2045 static const uint8_t in2[]={ 2046 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff 2047 }; 2048 static const uint8_t in3[]={ 2049 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 2050 }; 2051 2052 /* expected test results */ 2053 static const int32_t results1[]={ 2054 /* number of bytes read, code point */ 2055 4, 0x4e00, 2056 2, 0xfeff 2057 }; 2058 static const int32_t results2[]={ 2059 /* number of bytes read, code point */ 2060 4, 0x004e, 2061 2, 0xfffe 2062 }; 2063 static const int32_t results3[]={ 2064 /* number of bytes read, code point */ 2065 2, 0xfefe, 2066 2, 0x4e00, 2067 2, 0xfeff, 2068 4, 0x20001 2069 }; 2070 2071 const char *source, *limit; 2072 2073 UErrorCode errorCode=U_ZERO_ERROR; 2074 UConverter *cnv=ucnv_open("UTF-16", &errorCode); 2075 if(U_FAILURE(errorCode)) { 2076 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); 2077 return; 2078 } 2079 2080 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2081 TestNextUChar(cnv, source, limit, results1, "UTF-16"); 2082 2083 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2084 ucnv_resetToUnicode(cnv); 2085 TestNextUChar(cnv, source, limit, results2, "UTF-16"); 2086 2087 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2088 ucnv_resetToUnicode(cnv); 2089 TestNextUChar(cnv, source, limit, results3, "UTF-16"); 2090 2091 /* Test the condition when source >= sourceLimit */ 2092 ucnv_resetToUnicode(cnv); 2093 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2094 2095 ucnv_close(cnv); 2096 } 2097 2098 static void TestUTF16BE() { 2099 /* test input */ 2100 static const uint8_t in[]={ 2101 0x00, 0x61, 2102 0x00, 0xc0, 2103 0x00, 0x31, 2104 0x00, 0xf4, 2105 0xce, 0xfe, 2106 0xd8, 0x01, 0xdc, 0x01 2107 }; 2108 2109 /* expected test results */ 2110 static const int32_t results[]={ 2111 /* number of bytes read, code point */ 2112 2, 0x61, 2113 2, 0xc0, 2114 2, 0x31, 2115 2, 0xf4, 2116 2, 0xcefe, 2117 4, 0x10401 2118 }; 2119 2120 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2121 UErrorCode errorCode=U_ZERO_ERROR; 2122 UConverter *cnv=ucnv_open("utf-16be", &errorCode); 2123 if(U_FAILURE(errorCode)) { 2124 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); 2125 return; 2126 } 2127 TestNextUChar(cnv, source, limit, results, "UTF-16BE"); 2128 /* Test the condition when source >= sourceLimit */ 2129 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2130 /*Test for the condition where there is an invalid character*/ 2131 { 2132 static const uint8_t source2[]={0x61}; 2133 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2134 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2135 } 2136 #if 0 2137 /* 2138 * Test disabled because currently the UTF-16BE/LE converters are supposed 2139 * to not set errors for unpaired surrogates. 2140 * This may change with 2141 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2142 */ 2143 2144 /*Test for the condition where there is a surrogate pair*/ 2145 { 2146 const uint8_t source2[]={0xd8, 0x01}; 2147 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2148 } 2149 #endif 2150 ucnv_close(cnv); 2151 } 2152 2153 static void 2154 TestUTF16LE() { 2155 /* test input */ 2156 static const uint8_t in[]={ 2157 0x61, 0x00, 2158 0x31, 0x00, 2159 0x4e, 0x2e, 2160 0x4e, 0x00, 2161 0x01, 0xd8, 0x01, 0xdc 2162 }; 2163 2164 /* expected test results */ 2165 static const int32_t results[]={ 2166 /* number of bytes read, code point */ 2167 2, 0x61, 2168 2, 0x31, 2169 2, 0x2e4e, 2170 2, 0x4e, 2171 4, 0x10401 2172 }; 2173 2174 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2175 UErrorCode errorCode=U_ZERO_ERROR; 2176 UConverter *cnv=ucnv_open("utf-16le", &errorCode); 2177 if(U_FAILURE(errorCode)) { 2178 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); 2179 return; 2180 } 2181 TestNextUChar(cnv, source, limit, results, "UTF-16LE"); 2182 /* Test the condition when source >= sourceLimit */ 2183 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2184 /*Test for the condition where there is an invalid character*/ 2185 { 2186 static const uint8_t source2[]={0x61}; 2187 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2188 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2189 } 2190 #if 0 2191 /* 2192 * Test disabled because currently the UTF-16BE/LE converters are supposed 2193 * to not set errors for unpaired surrogates. 2194 * This may change with 2195 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2196 */ 2197 2198 /*Test for the condition where there is a surrogate character*/ 2199 { 2200 static const uint8_t source2[]={0x01, 0xd8}; 2201 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2202 } 2203 #endif 2204 2205 ucnv_close(cnv); 2206 } 2207 2208 static void TestUTF32() { 2209 /* test input */ 2210 static const uint8_t in1[]={ 2211 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff 2212 }; 2213 static const uint8_t in2[]={ 2214 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 2215 }; 2216 static const uint8_t in3[]={ 2217 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 2218 }; 2219 2220 /* expected test results */ 2221 static const int32_t results1[]={ 2222 /* number of bytes read, code point */ 2223 8, 0x100f00, 2224 4, 0xfeff 2225 }; 2226 static const int32_t results2[]={ 2227 /* number of bytes read, code point */ 2228 8, 0x0f1000, 2229 4, 0xfffe 2230 }; 2231 static const int32_t results3[]={ 2232 /* number of bytes read, code point */ 2233 4, 0xfefe, 2234 4, 0x100f00, 2235 4, 0xfffd, /* unmatched surrogate */ 2236 4, 0xfffd /* unmatched surrogate */ 2237 }; 2238 2239 const char *source, *limit; 2240 2241 UErrorCode errorCode=U_ZERO_ERROR; 2242 UConverter *cnv=ucnv_open("UTF-32", &errorCode); 2243 if(U_FAILURE(errorCode)) { 2244 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); 2245 return; 2246 } 2247 2248 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2249 TestNextUChar(cnv, source, limit, results1, "UTF-32"); 2250 2251 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2252 ucnv_resetToUnicode(cnv); 2253 TestNextUChar(cnv, source, limit, results2, "UTF-32"); 2254 2255 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2256 ucnv_resetToUnicode(cnv); 2257 TestNextUChar(cnv, source, limit, results3, "UTF-32"); 2258 2259 /* Test the condition when source >= sourceLimit */ 2260 ucnv_resetToUnicode(cnv); 2261 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2262 2263 ucnv_close(cnv); 2264 } 2265 2266 static void 2267 TestUTF32BE() { 2268 /* test input */ 2269 static const uint8_t in[]={ 2270 0x00, 0x00, 0x00, 0x61, 2271 0x00, 0x00, 0x30, 0x61, 2272 0x00, 0x00, 0xdc, 0x00, 2273 0x00, 0x00, 0xd8, 0x00, 2274 0x00, 0x00, 0xdf, 0xff, 2275 0x00, 0x00, 0xff, 0xfe, 2276 0x00, 0x10, 0xab, 0xcd, 2277 0x00, 0x10, 0xff, 0xff 2278 }; 2279 2280 /* expected test results */ 2281 static const int32_t results[]={ 2282 /* number of bytes read, code point */ 2283 4, 0x61, 2284 4, 0x3061, 2285 4, 0xfffd, 2286 4, 0xfffd, 2287 4, 0xfffd, 2288 4, 0xfffe, 2289 4, 0x10abcd, 2290 4, 0x10ffff 2291 }; 2292 2293 /* error test input */ 2294 static const uint8_t in2[]={ 2295 0x00, 0x00, 0x00, 0x61, 2296 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 2297 0x00, 0x00, 0x00, 0x62, 2298 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2299 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 2300 0x00, 0x00, 0x01, 0x62, 2301 0x00, 0x00, 0x02, 0x62 2302 }; 2303 2304 /* expected error test results */ 2305 static const int32_t results2[]={ 2306 /* number of bytes read, code point */ 2307 4, 0x61, 2308 8, 0x62, 2309 12, 0x162, 2310 4, 0x262 2311 }; 2312 2313 UConverterToUCallback cb; 2314 const void *p; 2315 2316 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2317 UErrorCode errorCode=U_ZERO_ERROR; 2318 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); 2319 if(U_FAILURE(errorCode)) { 2320 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); 2321 return; 2322 } 2323 TestNextUChar(cnv, source, limit, results, "UTF-32BE"); 2324 2325 /* Test the condition when source >= sourceLimit */ 2326 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2327 2328 /* test error behavior with a skip callback */ 2329 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2330 source=(const char *)in2; 2331 limit=(const char *)(in2+sizeof(in2)); 2332 TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); 2333 2334 ucnv_close(cnv); 2335 } 2336 2337 static void 2338 TestUTF32LE() { 2339 /* test input */ 2340 static const uint8_t in[]={ 2341 0x61, 0x00, 0x00, 0x00, 2342 0x61, 0x30, 0x00, 0x00, 2343 0x00, 0xdc, 0x00, 0x00, 2344 0x00, 0xd8, 0x00, 0x00, 2345 0xff, 0xdf, 0x00, 0x00, 2346 0xfe, 0xff, 0x00, 0x00, 2347 0xcd, 0xab, 0x10, 0x00, 2348 0xff, 0xff, 0x10, 0x00 2349 }; 2350 2351 /* expected test results */ 2352 static const int32_t results[]={ 2353 /* number of bytes read, code point */ 2354 4, 0x61, 2355 4, 0x3061, 2356 4, 0xfffd, 2357 4, 0xfffd, 2358 4, 0xfffd, 2359 4, 0xfffe, 2360 4, 0x10abcd, 2361 4, 0x10ffff 2362 }; 2363 2364 /* error test input */ 2365 static const uint8_t in2[]={ 2366 0x61, 0x00, 0x00, 0x00, 2367 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 2368 0x62, 0x00, 0x00, 0x00, 2369 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2370 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 2371 0x62, 0x01, 0x00, 0x00, 2372 0x62, 0x02, 0x00, 0x00, 2373 }; 2374 2375 /* expected error test results */ 2376 static const int32_t results2[]={ 2377 /* number of bytes read, code point */ 2378 4, 0x61, 2379 8, 0x62, 2380 12, 0x162, 2381 4, 0x262, 2382 }; 2383 2384 UConverterToUCallback cb; 2385 const void *p; 2386 2387 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2388 UErrorCode errorCode=U_ZERO_ERROR; 2389 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); 2390 if(U_FAILURE(errorCode)) { 2391 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); 2392 return; 2393 } 2394 TestNextUChar(cnv, source, limit, results, "UTF-32LE"); 2395 2396 /* Test the condition when source >= sourceLimit */ 2397 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2398 2399 /* test error behavior with a skip callback */ 2400 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2401 source=(const char *)in2; 2402 limit=(const char *)(in2+sizeof(in2)); 2403 TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); 2404 2405 ucnv_close(cnv); 2406 } 2407 2408 static void 2409 TestLATIN1() { 2410 /* test input */ 2411 static const uint8_t in[]={ 2412 0x61, 2413 0x31, 2414 0x32, 2415 0xc0, 2416 0xf0, 2417 0xf4, 2418 }; 2419 2420 /* expected test results */ 2421 static const int32_t results[]={ 2422 /* number of bytes read, code point */ 2423 1, 0x61, 2424 1, 0x31, 2425 1, 0x32, 2426 1, 0xc0, 2427 1, 0xf0, 2428 1, 0xf4, 2429 }; 2430 static const uint16_t in1[] = { 2431 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2432 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2433 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2434 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2435 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2436 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2437 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2438 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2439 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2440 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2441 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2442 0xcb, 0x82 2443 }; 2444 static const uint8_t out1[] = { 2445 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2446 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2447 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2448 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2449 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2450 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2451 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2452 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2453 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2454 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2455 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2456 0xcb, 0x82 2457 }; 2458 static const uint16_t in2[]={ 2459 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2460 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2461 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2462 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2463 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2464 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2465 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2466 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2467 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2468 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2469 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2470 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2471 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2472 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2473 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2474 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2475 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2476 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2477 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2478 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2479 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2480 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2481 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2482 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2483 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2484 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2485 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2486 0x37, 0x20, 0x2A, 0x2F, 2487 }; 2488 static const unsigned char out2[]={ 2489 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2490 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2491 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2492 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2493 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2494 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2495 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2496 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2497 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2498 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2499 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2500 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2501 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2502 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2503 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2504 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2505 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2506 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2507 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2508 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2509 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2510 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2511 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2512 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2513 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2514 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2515 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2516 0x37, 0x20, 0x2A, 0x2F, 2517 }; 2518 const char *source=(const char *)in; 2519 const char *limit=(const char *)in+sizeof(in); 2520 2521 UErrorCode errorCode=U_ZERO_ERROR; 2522 UConverter *cnv=ucnv_open("LATIN_1", &errorCode); 2523 if(U_FAILURE(errorCode)) { 2524 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode)); 2525 return; 2526 } 2527 TestNextUChar(cnv, source, limit, results, "LATIN_1"); 2528 /* Test the condition when source >= sourceLimit */ 2529 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2530 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1)); 2531 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2)); 2532 2533 ucnv_close(cnv); 2534 } 2535 2536 static void 2537 TestSBCS() { 2538 /* test input */ 2539 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; 2540 /* expected test results */ 2541 static const int32_t results[]={ 2542 /* number of bytes read, code point */ 2543 1, 0x61, 2544 1, 0xbf, 2545 1, 0xc4, 2546 1, 0x2021, 2547 1, 0xf8ff, 2548 1, 0x00d9 2549 }; 2550 2551 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2552 UErrorCode errorCode=U_ZERO_ERROR; 2553 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); 2554 if(U_FAILURE(errorCode)) { 2555 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode)); 2556 return; 2557 } 2558 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); 2559 /* Test the condition when source >= sourceLimit */ 2560 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2561 /*Test for Illegal character */ /* 2562 { 2563 static const uint8_t input1[]={ 0xA1 }; 2564 const char* illegalsource=(const char*)input1; 2565 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte"); 2566 } 2567 */ 2568 ucnv_close(cnv); 2569 } 2570 2571 static void 2572 TestDBCS() { 2573 /* test input */ 2574 static const uint8_t in[]={ 2575 0x44, 0x6a, 2576 0xc4, 0x9c, 2577 0x7a, 0x74, 2578 0x46, 0xab, 2579 0x42, 0x5b, 2580 2581 }; 2582 2583 /* expected test results */ 2584 static const int32_t results[]={ 2585 /* number of bytes read, code point */ 2586 2, 0x00a7, 2587 2, 0xe1d2, 2588 2, 0x6962, 2589 2, 0xf842, 2590 2, 0xffe5, 2591 }; 2592 2593 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2594 UErrorCode errorCode=U_ZERO_ERROR; 2595 2596 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); 2597 if(U_FAILURE(errorCode)) { 2598 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode)); 2599 return; 2600 } 2601 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); 2602 /* Test the condition when source >= sourceLimit */ 2603 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2604 /*Test for the condition where there is an invalid character*/ 2605 { 2606 static const uint8_t source2[]={0x1a, 0x1b}; 2607 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2608 } 2609 /*Test for the condition where we have a truncated char*/ 2610 { 2611 static const uint8_t source1[]={0xc4}; 2612 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2613 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2614 } 2615 ucnv_close(cnv); 2616 } 2617 2618 static void 2619 TestMBCS() { 2620 /* test input */ 2621 static const uint8_t in[]={ 2622 0x01, 2623 0xa6, 0xa3, 2624 0x00, 2625 0xa6, 0xa1, 2626 0x08, 2627 0xc2, 0x76, 2628 0xc2, 0x78, 2629 2630 }; 2631 2632 /* expected test results */ 2633 static const int32_t results[]={ 2634 /* number of bytes read, code point */ 2635 1, 0x0001, 2636 2, 0x250c, 2637 1, 0x0000, 2638 2, 0x2500, 2639 1, 0x0008, 2640 2, 0xd60c, 2641 2, 0xd60e, 2642 }; 2643 2644 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2645 UErrorCode errorCode=U_ZERO_ERROR; 2646 2647 UConverter *cnv=ucnv_open("ibm-1363", &errorCode); 2648 if(U_FAILURE(errorCode)) { 2649 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode)); 2650 return; 2651 } 2652 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); 2653 /* Test the condition when source >= sourceLimit */ 2654 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2655 /*Test for the condition where there is an invalid character*/ 2656 { 2657 static const uint8_t source2[]={0xa1, 0x80}; 2658 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2659 } 2660 /*Test for the condition where we have a truncated char*/ 2661 { 2662 static const uint8_t source1[]={0xc4}; 2663 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2664 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2665 } 2666 ucnv_close(cnv); 2667 2668 } 2669 2670 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 2671 static void 2672 TestICCRunout() { 2673 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */ 2674 2675 const char *cnvName = "ibm-1363"; 2676 UErrorCode status = U_ZERO_ERROR; 2677 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 }; 2678 /* UChar expectUData[] = { 0x00a1, 0x001a }; */ 2679 const char *source = sourceData; 2680 const char *sourceLim = sourceData+sizeof(sourceData); 2681 UChar c1, c2, c3; 2682 UConverter *cnv=ucnv_open(cnvName, &status); 2683 if(U_FAILURE(status)) { 2684 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status)); 2685 return; 2686 } 2687 2688 #if 0 2689 { 2690 UChar targetBuf[256]; 2691 UChar *target = targetBuf; 2692 UChar *targetLim = target+256; 2693 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status); 2694 2695 log_info("After convert: target@%d, source@%d, status%s\n", 2696 target-targetBuf, source-sourceData, u_errorName(status)); 2697 2698 if(U_FAILURE(status)) { 2699 log_err("Failed to convert: %s\n", u_errorName(status)); 2700 } else { 2701 2702 } 2703 } 2704 #endif 2705 2706 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2707 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status)); 2708 2709 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2710 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status)); 2711 2712 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2713 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status)); 2714 2715 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { 2716 log_verbose("OK\n"); 2717 } else { 2718 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n"); 2719 } 2720 2721 ucnv_close(cnv); 2722 2723 } 2724 #endif 2725 2726 #ifdef U_ENABLE_GENERIC_ISO_2022 2727 2728 static void 2729 TestISO_2022() { 2730 /* test input */ 2731 static const uint8_t in[]={ 2732 0x1b, 0x25, 0x42, 2733 0x31, 2734 0x32, 2735 0x61, 2736 0xc2, 0x80, 2737 0xe0, 0xa0, 0x80, 2738 0xf0, 0x90, 0x80, 0x80 2739 }; 2740 2741 2742 2743 /* expected test results */ 2744 static const int32_t results[]={ 2745 /* number of bytes read, code point */ 2746 4, 0x0031, /* 4 bytes including the escape sequence */ 2747 1, 0x0032, 2748 1, 0x61, 2749 2, 0x80, 2750 3, 0x800, 2751 4, 0x10000 2752 }; 2753 2754 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2755 UErrorCode errorCode=U_ZERO_ERROR; 2756 UConverter *cnv; 2757 2758 cnv=ucnv_open("ISO_2022", &errorCode); 2759 if(U_FAILURE(errorCode)) { 2760 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 2761 return; 2762 } 2763 TestNextUChar(cnv, source, limit, results, "ISO_2022"); 2764 2765 /* Test the condition when source >= sourceLimit */ 2766 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source"); 2767 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2768 /*Test for the condition where we have a truncated char*/ 2769 { 2770 static const uint8_t source1[]={0xc4}; 2771 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2772 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2773 } 2774 /*Test for the condition where there is an invalid character*/ 2775 { 2776 static const uint8_t source2[]={0xa1, 0x01}; 2777 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); 2778 } 2779 ucnv_close(cnv); 2780 } 2781 2782 #endif 2783 2784 static void 2785 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2786 const UChar* uSource; 2787 const UChar* uSourceLimit; 2788 const char* cSource; 2789 const char* cSourceLimit; 2790 UChar *uTargetLimit =NULL; 2791 UChar *uTarget; 2792 char *cTarget; 2793 const char *cTargetLimit; 2794 char *cBuf; 2795 UChar *uBuf; /*,*test;*/ 2796 int32_t uBufSize = 120; 2797 int len=0; 2798 int i=2; 2799 UErrorCode errorCode=U_ZERO_ERROR; 2800 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2801 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2802 ucnv_reset(cnv); 2803 for(;--i>0; ){ 2804 uSource = (UChar*) source; 2805 uSourceLimit=(const UChar*)sourceLimit; 2806 cTarget = cBuf; 2807 uTarget = uBuf; 2808 cSource = cBuf; 2809 cTargetLimit = cBuf; 2810 uTargetLimit = uBuf; 2811 2812 do{ 2813 2814 cTargetLimit = cTargetLimit+ i; 2815 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2816 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2817 errorCode=U_ZERO_ERROR; 2818 continue; 2819 } 2820 2821 if(U_FAILURE(errorCode)){ 2822 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2823 return; 2824 } 2825 2826 }while (uSource<uSourceLimit); 2827 2828 cSourceLimit =cTarget; 2829 do{ 2830 uTargetLimit=uTargetLimit+i; 2831 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2832 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2833 errorCode=U_ZERO_ERROR; 2834 continue; 2835 } 2836 if(U_FAILURE(errorCode)){ 2837 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2838 return; 2839 } 2840 }while(cSource<cSourceLimit); 2841 2842 uSource = source; 2843 /*test =uBuf;*/ 2844 for(len=0;len<(int)(source - sourceLimit);len++){ 2845 if(uBuf[len]!=uSource[len]){ 2846 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2847 } 2848 } 2849 } 2850 free(uBuf); 2851 free(cBuf); 2852 } 2853 /* Test for Jitterbug 778 */ 2854 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2855 const UChar* uSource; 2856 const UChar* uSourceLimit; 2857 const char* cSource; 2858 UChar *uTargetLimit =NULL; 2859 UChar *uTarget; 2860 char *cTarget; 2861 const char *cTargetLimit; 2862 char *cBuf; 2863 UChar *uBuf,*test; 2864 int32_t uBufSize = 120; 2865 int numCharsInTarget=0; 2866 UErrorCode errorCode=U_ZERO_ERROR; 2867 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2868 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 2869 uSource = source; 2870 uSourceLimit=sourceLimit; 2871 cTarget = cBuf; 2872 cTargetLimit = cBuf +uBufSize*5; 2873 uTarget = uBuf; 2874 uTargetLimit = uBuf+ uBufSize*5; 2875 ucnv_reset(cnv); 2876 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); 2877 if(U_FAILURE(errorCode)){ 2878 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2879 return; 2880 } 2881 cSource = cBuf; 2882 test =uBuf; 2883 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode); 2884 if(U_FAILURE(errorCode)){ 2885 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode)); 2886 return; 2887 } 2888 uSource = source; 2889 while(uSource<uSourceLimit){ 2890 if(*test!=*uSource){ 2891 2892 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 2893 } 2894 uSource++; 2895 test++; 2896 } 2897 free(uBuf); 2898 free(cBuf); 2899 } 2900 2901 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2902 const UChar* uSource; 2903 const UChar* uSourceLimit; 2904 const char* cSource; 2905 const char* cSourceLimit; 2906 UChar *uTargetLimit =NULL; 2907 UChar *uTarget; 2908 char *cTarget; 2909 const char *cTargetLimit; 2910 char *cBuf; 2911 UChar *uBuf; /*,*test;*/ 2912 int32_t uBufSize = 120; 2913 int len=0; 2914 int i=2; 2915 const UChar *temp = sourceLimit; 2916 UErrorCode errorCode=U_ZERO_ERROR; 2917 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2918 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2919 2920 ucnv_reset(cnv); 2921 for(;--i>0;){ 2922 uSource = (UChar*) source; 2923 cTarget = cBuf; 2924 uTarget = uBuf; 2925 cSource = cBuf; 2926 cTargetLimit = cBuf; 2927 uTargetLimit = uBuf+uBufSize*5; 2928 cTargetLimit = cTargetLimit+uBufSize*10; 2929 uSourceLimit=uSource; 2930 do{ 2931 2932 if (uSourceLimit < sourceLimit) { 2933 uSourceLimit = uSourceLimit+1; 2934 } 2935 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2936 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2937 errorCode=U_ZERO_ERROR; 2938 continue; 2939 } 2940 2941 if(U_FAILURE(errorCode)){ 2942 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2943 return; 2944 } 2945 2946 }while (uSource<temp); 2947 2948 cSourceLimit =cBuf; 2949 do{ 2950 if (cSourceLimit < cBuf + (cTarget - cBuf)) { 2951 cSourceLimit = cSourceLimit+1; 2952 } 2953 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2954 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2955 errorCode=U_ZERO_ERROR; 2956 continue; 2957 } 2958 if(U_FAILURE(errorCode)){ 2959 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2960 return; 2961 } 2962 }while(cSource<cTarget); 2963 2964 uSource = source; 2965 /*test =uBuf;*/ 2966 for(;len<(int)(source - sourceLimit);len++){ 2967 if(uBuf[len]!=uSource[len]){ 2968 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2969 } 2970 } 2971 } 2972 free(uBuf); 2973 free(cBuf); 2974 } 2975 static void 2976 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, 2977 const uint16_t results[], const char* message){ 2978 /* const char* s0; */ 2979 const char* s=(char*)source; 2980 const uint16_t *r=results; 2981 UErrorCode errorCode=U_ZERO_ERROR; 2982 uint32_t c,exC; 2983 ucnv_reset(cnv); 2984 while(s<limit) { 2985 /* s0=s; */ 2986 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 2987 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 2988 break; /* no more significant input */ 2989 } else if(U_FAILURE(errorCode)) { 2990 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 2991 break; 2992 } else { 2993 if(U16_IS_LEAD(*r)){ 2994 int i =0, len = 2; 2995 U16_NEXT(r, i, len, exC); 2996 r++; 2997 }else{ 2998 exC = *r; 2999 } 3000 if(c!=(uint32_t)(exC)) 3001 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c); 3002 } 3003 r++; 3004 } 3005 } 3006 3007 static int TestJitterbug930(const char* enc){ 3008 UErrorCode err = U_ZERO_ERROR; 3009 UConverter*converter; 3010 char out[80]; 3011 char*target = out; 3012 UChar in[4]; 3013 const UChar*source = in; 3014 int32_t off[80]; 3015 int32_t* offsets = off; 3016 int numOffWritten=0; 3017 UBool flush = 0; 3018 converter = my_ucnv_open(enc, &err); 3019 3020 in[0] = 0x41; /* 0x4E00;*/ 3021 in[1] = 0x4E01; 3022 in[2] = 0x4E02; 3023 in[3] = 0x4E03; 3024 3025 memset(off, '*', sizeof(off)); 3026 3027 ucnv_fromUnicode (converter, 3028 &target, 3029 target+2, 3030 &source, 3031 source+3, 3032 offsets, 3033 flush, 3034 &err); 3035 3036 /* writes three bytes into the output buffer: 41 1B 24 3037 * but offsets contains 0 1 1 3038 */ 3039 while(*offsets< off[10]){ 3040 numOffWritten++; 3041 offsets++; 3042 } 3043 log_verbose("Testing Jitterbug 930 for encoding %s",enc); 3044 if(numOffWritten!= (int)(target-out)){ 3045 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten); 3046 } 3047 3048 err = U_ZERO_ERROR; 3049 3050 memset(off,'*' , sizeof(off)); 3051 3052 flush = 1; 3053 offsets=off; 3054 ucnv_fromUnicode (converter, 3055 &target, 3056 target+4, 3057 &source, 3058 source, 3059 offsets, 3060 flush, 3061 &err); 3062 numOffWritten=0; 3063 while(*offsets< off[10]){ 3064 numOffWritten++; 3065 if(*offsets!= -1){ 3066 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ; 3067 } 3068 offsets++; 3069 } 3070 3071 /* writes 42 43 7A into output buffer, 3072 * offsets contains -1 -1 -1 3073 */ 3074 ucnv_close(converter); 3075 return 0; 3076 } 3077 3078 static void 3079 TestHZ() { 3080 /* test input */ 3081 static const uint16_t in[]={ 3082 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 3083 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 3084 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 3085 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 3086 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 3087 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 3088 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 3089 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 3090 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 3091 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 3092 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 3093 0x005A, 0x005B, 0x005C, 0x000A 3094 }; 3095 const UChar* uSource; 3096 const UChar* uSourceLimit; 3097 const char* cSource; 3098 const char* cSourceLimit; 3099 UChar *uTargetLimit =NULL; 3100 UChar *uTarget; 3101 char *cTarget; 3102 const char *cTargetLimit; 3103 char *cBuf; 3104 UChar *uBuf,*test; 3105 int32_t uBufSize = 120; 3106 UErrorCode errorCode=U_ZERO_ERROR; 3107 UConverter *cnv; 3108 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3109 int32_t* myOff= offsets; 3110 cnv=ucnv_open("HZ", &errorCode); 3111 if(U_FAILURE(errorCode)) { 3112 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)); 3113 return; 3114 } 3115 3116 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3117 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3118 uSource = (const UChar*)in; 3119 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); 3120 cTarget = cBuf; 3121 cTargetLimit = cBuf +uBufSize*5; 3122 uTarget = uBuf; 3123 uTargetLimit = uBuf+ uBufSize*5; 3124 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3125 if(U_FAILURE(errorCode)){ 3126 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3127 return; 3128 } 3129 cSource = cBuf; 3130 cSourceLimit =cTarget; 3131 test =uBuf; 3132 myOff=offsets; 3133 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3134 if(U_FAILURE(errorCode)){ 3135 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3136 return; 3137 } 3138 uSource = (const UChar*)in; 3139 while(uSource<uSourceLimit){ 3140 if(*test!=*uSource){ 3141 3142 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3143 } 3144 uSource++; 3145 test++; 3146 } 3147 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); 3148 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 3149 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 3150 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 3151 TestJitterbug930("csISO2022JP"); 3152 ucnv_close(cnv); 3153 free(offsets); 3154 free(uBuf); 3155 free(cBuf); 3156 } 3157 3158 static void 3159 TestISCII(){ 3160 /* test input */ 3161 static const uint16_t in[]={ 3162 /* test full range of Devanagari */ 3163 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, 3164 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, 3165 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, 3166 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, 3167 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, 3168 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, 3169 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, 3170 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, 3171 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, 3172 0x096D,0x096E,0x096F, 3173 /* test Soft halant*/ 3174 0x0915,0x094d, 0x200D, 3175 /* test explicit halant */ 3176 0x0915,0x094d, 0x200c, 3177 /* test double danda */ 3178 0x965, 3179 /* test ASCII */ 3180 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3181 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3182 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3183 /* tests from Lotus */ 3184 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, 3185 0x0930,0x094D,0x200D, 3186 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, 3187 0x0915,0x0921,0x002B,0x095F, 3188 /* tamil range */ 3189 0x0B86, 0xB87, 0xB88, 3190 /* telugu range */ 3191 0x0C05, 0x0C02, 0x0C03,0x0c31, 3192 /* kannada range */ 3193 0x0C85, 0xC82, 0x0C83, 3194 /* test Abbr sign and Anudatta */ 3195 0x0970, 0x952, 3196 /* 0x0958, 3197 0x0959, 3198 0x095A, 3199 0x095B, 3200 0x095C, 3201 0x095D, 3202 0x095E, 3203 0x095F,*/ 3204 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, 3205 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, 3206 0x090C , 3207 0x0962, 3208 0x0961 /* Vocallic LL 0xa6, 0xE9 */, 3209 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, 3210 0x0950 /* OM Symbol 0xa1, 0xE9,*/, 3211 0x093D /* Avagraha 0xEA, 0xE9*/, 3212 0x0958, 3213 0x0959, 3214 0x095A, 3215 0x095B, 3216 0x095C, 3217 0x095D, 3218 0x095E, 3219 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 3220 }; 3221 static const unsigned char byteArr[]={ 3222 3223 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, 3224 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, 3225 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, 3226 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, 3227 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, 3228 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 3229 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, 3230 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, 3231 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, 3232 0xf8,0xf9,0xfa, 3233 /* test soft halant */ 3234 0xb3, 0xE8, 0xE9, 3235 /* test explicit halant */ 3236 0xb3, 0xE8, 0xE8, 3237 /* test double danda */ 3238 0xea, 0xea, 3239 /* test ASCII */ 3240 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3241 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3242 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3243 /* test ATR code */ 3244 3245 /* tests from Lotus */ 3246 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, 3247 0xEF,0x42,0xCF,0xE8,0xD9, 3248 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, 3249 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, 3250 /* tamil range */ 3251 0xEF, 0x44, 0xa5, 0xa6, 0xa7, 3252 /* telugu range */ 3253 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, 3254 /* kannada range */ 3255 0xEF, 0x48,0xa4, 0xa2, 0xa3, 3256 /* anudatta and abbreviation sign */ 3257 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, 3258 3259 3260 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ 3261 3262 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ 3263 3264 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ 3265 3266 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ 3267 3268 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ 3269 3270 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ 3271 3272 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ 3273 3274 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ 3275 3276 0xB3, 0xE9, /* Ka + NUKTA */ 3277 3278 0xB4, 0xE9, /* Kha + NUKTA */ 3279 3280 0xB5, 0xE9, /* Ga + NUKTA */ 3281 3282 0xBA, 0xE9, 3283 3284 0xBF, 0xE9, 3285 3286 0xC0, 0xE9, 3287 3288 0xC9, 0xE9, 3289 /* INV halant RA */ 3290 0xD9, 0xE8, 0xCF, 3291 0x00, 0x00A0, 3292 /* just consume unhandled codepoints */ 3293 0xEF, 0x30, 3294 3295 }; 3296 testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,TRUE); 3297 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr)); 3298 3299 } 3300 3301 static void 3302 TestISO_2022_JP() { 3303 /* test input */ 3304 static const uint16_t in[]={ 3305 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, 3306 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3307 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3308 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3309 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3310 0x201D, 0x3014, 0x000D, 0x000A, 3311 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3312 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3313 }; 3314 const UChar* uSource; 3315 const UChar* uSourceLimit; 3316 const char* cSource; 3317 const char* cSourceLimit; 3318 UChar *uTargetLimit =NULL; 3319 UChar *uTarget; 3320 char *cTarget; 3321 const char *cTargetLimit; 3322 char *cBuf; 3323 UChar *uBuf,*test; 3324 int32_t uBufSize = 120; 3325 UErrorCode errorCode=U_ZERO_ERROR; 3326 UConverter *cnv; 3327 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3328 int32_t* myOff= offsets; 3329 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3330 if(U_FAILURE(errorCode)) { 3331 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode)); 3332 return; 3333 } 3334 3335 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3336 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3337 uSource = (const UChar*)in; 3338 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); 3339 cTarget = cBuf; 3340 cTargetLimit = cBuf +uBufSize*5; 3341 uTarget = uBuf; 3342 uTargetLimit = uBuf+ uBufSize*5; 3343 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3344 if(U_FAILURE(errorCode)){ 3345 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3346 return; 3347 } 3348 cSource = cBuf; 3349 cSourceLimit =cTarget; 3350 test =uBuf; 3351 myOff=offsets; 3352 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3353 if(U_FAILURE(errorCode)){ 3354 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3355 return; 3356 } 3357 3358 uSource = (const UChar*)in; 3359 while(uSource<uSourceLimit){ 3360 if(*test!=*uSource){ 3361 3362 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3363 } 3364 uSource++; 3365 test++; 3366 } 3367 3368 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 3369 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 3370 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); 3371 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 3372 TestJitterbug930("csISO2022JP"); 3373 ucnv_close(cnv); 3374 free(uBuf); 3375 free(cBuf); 3376 free(offsets); 3377 } 3378 3379 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){ 3380 const UChar* uSource; 3381 const UChar* uSourceLimit; 3382 const char* cSource; 3383 const char* cSourceLimit; 3384 UChar *uTargetLimit =NULL; 3385 UChar *uTarget; 3386 char *cTarget; 3387 const char *cTargetLimit; 3388 char *cBuf; 3389 UChar *uBuf,*test; 3390 int32_t uBufSize = 120*10; 3391 UErrorCode errorCode=U_ZERO_ERROR; 3392 UConverter *cnv; 3393 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); 3394 int32_t* myOff= offsets; 3395 cnv=my_ucnv_open(conv, &errorCode); 3396 if(U_FAILURE(errorCode)) { 3397 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode)); 3398 return; 3399 } 3400 3401 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); 3402 cBuf =(char*)malloc(uBufSize * sizeof(char)); 3403 uSource = (const UChar*)in; 3404 uSourceLimit=uSource+len; 3405 cTarget = cBuf; 3406 cTargetLimit = cBuf +uBufSize; 3407 uTarget = uBuf; 3408 uTargetLimit = uBuf+ uBufSize; 3409 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3410 if(U_FAILURE(errorCode)){ 3411 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3412 return; 3413 } 3414 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/ 3415 cSource = cBuf; 3416 cSourceLimit =cTarget; 3417 test =uBuf; 3418 myOff=offsets; 3419 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3420 if(U_FAILURE(errorCode)){ 3421 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode)); 3422 return; 3423 } 3424 3425 uSource = (const UChar*)in; 3426 while(uSource<uSourceLimit){ 3427 if(*test!=*uSource){ 3428 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ; 3429 } 3430 uSource++; 3431 test++; 3432 } 3433 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); 3434 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); 3435 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); 3436 if(byteArr && byteArrLen!=0){ 3437 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); 3438 TestToAndFromUChars(in,(const UChar*)&in[len],cnv); 3439 { 3440 cSource = byteArr; 3441 cSourceLimit = cSource+byteArrLen; 3442 test=uBuf; 3443 myOff = offsets; 3444 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3445 if(U_FAILURE(errorCode)){ 3446 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3447 return; 3448 } 3449 3450 uSource = (const UChar*)in; 3451 while(uSource<uSourceLimit){ 3452 if(*test!=*uSource){ 3453 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3454 } 3455 uSource++; 3456 test++; 3457 } 3458 } 3459 } 3460 3461 ucnv_close(cnv); 3462 free(uBuf); 3463 free(cBuf); 3464 free(offsets); 3465 } 3466 static UChar U_CALLCONV 3467 _charAt(int32_t offset, void *context) { 3468 return ((char*)context)[offset]; 3469 } 3470 3471 static int32_t 3472 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){ 3473 int32_t srcIndex=0; 3474 int32_t dstIndex=0; 3475 if(U_FAILURE(*status)){ 3476 return 0; 3477 } 3478 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ 3479 *status = U_ILLEGAL_ARGUMENT_ERROR; 3480 return 0; 3481 } 3482 if(srcLen==-1){ 3483 srcLen = (int32_t)uprv_strlen(src); 3484 } 3485 3486 for (; srcIndex<srcLen; ) { 3487 UChar32 c = src[srcIndex++]; 3488 if (c == 0x005C /*'\\'*/) { 3489 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/ 3490 if (c == (UChar32)0xFFFFFFFF) { 3491 *status=U_INVALID_CHAR_FOUND; /* return empty string */ 3492 break; /* invalid escape sequence */ 3493 } 3494 } 3495 if(dstIndex < dstLen){ 3496 if(c>0xFFFF){ 3497 dst[dstIndex++] = U16_LEAD(c); 3498 if(dstIndex<dstLen){ 3499 dst[dstIndex]=U16_TRAIL(c); 3500 }else{ 3501 *status=U_BUFFER_OVERFLOW_ERROR; 3502 } 3503 }else{ 3504 dst[dstIndex]=(UChar)c; 3505 } 3506 3507 }else{ 3508 *status = U_BUFFER_OVERFLOW_ERROR; 3509 } 3510 dstIndex++; /* for preflighting */ 3511 } 3512 return dstIndex; 3513 } 3514 3515 static void 3516 TestFullRoundtrip(const char* cp){ 3517 UChar usource[10] ={0}; 3518 UChar nsrc[10] = {0}; 3519 uint32_t i=1; 3520 int len=0, ulen; 3521 nsrc[0]=0x0061; 3522 /* Test codepoint 0 */ 3523 TestConv(usource,1,cp,"",NULL,0); 3524 TestConv(usource,2,cp,"",NULL,0); 3525 nsrc[2]=0x5555; 3526 TestConv(nsrc,3,cp,"",NULL,0); 3527 3528 for(;i<=0x10FFFF;i++){ 3529 if(i==0xD800){ 3530 i=0xDFFF; 3531 continue; 3532 } 3533 if(i<=0xFFFF){ 3534 usource[0] =(UChar) i; 3535 len=1; 3536 }else{ 3537 usource[0]=U16_LEAD(i); 3538 usource[1]=U16_TRAIL(i); 3539 len=2; 3540 } 3541 ulen=len; 3542 if(i==0x80) { 3543 usource[2]=0; 3544 } 3545 /* Test only single code points */ 3546 TestConv(usource,ulen,cp,"",NULL,0); 3547 /* Test codepoint repeated twice */ 3548 usource[ulen]=usource[0]; 3549 usource[ulen+1]=usource[1]; 3550 ulen+=len; 3551 TestConv(usource,ulen,cp,"",NULL,0); 3552 /* Test codepoint repeated 3 times */ 3553 usource[ulen]=usource[0]; 3554 usource[ulen+1]=usource[1]; 3555 ulen+=len; 3556 TestConv(usource,ulen,cp,"",NULL,0); 3557 /* Test codepoint in between 2 codepoints */ 3558 nsrc[1]=usource[0]; 3559 nsrc[2]=usource[1]; 3560 nsrc[len+1]=0x5555; 3561 TestConv(nsrc,len+2,cp,"",NULL,0); 3562 uprv_memset(usource,0,sizeof(UChar)*10); 3563 } 3564 } 3565 3566 static void 3567 TestRoundTrippingAllUTF(void){ 3568 if(!getTestOption(QUICK_OPTION)){ 3569 log_verbose("Running exhaustive round trip test for BOCU-1\n"); 3570 TestFullRoundtrip("BOCU-1"); 3571 log_verbose("Running exhaustive round trip test for SCSU\n"); 3572 TestFullRoundtrip("SCSU"); 3573 log_verbose("Running exhaustive round trip test for UTF-8\n"); 3574 TestFullRoundtrip("UTF-8"); 3575 log_verbose("Running exhaustive round trip test for CESU-8\n"); 3576 TestFullRoundtrip("CESU-8"); 3577 log_verbose("Running exhaustive round trip test for UTF-16BE\n"); 3578 TestFullRoundtrip("UTF-16BE"); 3579 log_verbose("Running exhaustive round trip test for UTF-16LE\n"); 3580 TestFullRoundtrip("UTF-16LE"); 3581 log_verbose("Running exhaustive round trip test for UTF-16\n"); 3582 TestFullRoundtrip("UTF-16"); 3583 log_verbose("Running exhaustive round trip test for UTF-32BE\n"); 3584 TestFullRoundtrip("UTF-32BE"); 3585 log_verbose("Running exhaustive round trip test for UTF-32LE\n"); 3586 TestFullRoundtrip("UTF-32LE"); 3587 log_verbose("Running exhaustive round trip test for UTF-32\n"); 3588 TestFullRoundtrip("UTF-32"); 3589 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3590 TestFullRoundtrip("UTF-7"); 3591 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3592 TestFullRoundtrip("UTF-7,version=1"); 3593 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"); 3594 TestFullRoundtrip("IMAP-mailbox-name"); 3595 /* 3596 * 3597 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of 3598 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA). 3599 * The old mappings remain as fallbacks. 3600 * This test may be reintroduced at a later time. 3601 * 3602 * 110118 - mow 3603 */ 3604 /* 3605 log_verbose("Running exhaustive round trip test for GB18030\n"); 3606 TestFullRoundtrip("GB18030"); 3607 */ 3608 } 3609 } 3610 3611 static void 3612 TestSCSU() { 3613 3614 static const uint16_t germanUTF16[]={ 3615 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 3616 }; 3617 3618 static const uint8_t germanSCSU[]={ 3619 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 3620 }; 3621 3622 static const uint16_t russianUTF16[]={ 3623 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 3624 }; 3625 3626 static const uint8_t russianSCSU[]={ 3627 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 3628 }; 3629 3630 static const uint16_t japaneseUTF16[]={ 3631 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 3632 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 3633 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 3634 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 3635 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 3636 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 3637 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 3638 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 3639 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 3640 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 3641 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 3642 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 3643 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 3644 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 3645 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 3646 }; 3647 3648 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 3649 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */ 3650 static const uint8_t japaneseSCSU[]={ 3651 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 3652 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 3653 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 3654 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 3655 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 3656 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 3657 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 3658 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 3659 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 3660 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 3661 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 3662 0xcb, 0x82 3663 }; 3664 3665 static const uint16_t allFeaturesUTF16[]={ 3666 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 3667 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 3668 0x01df, 0xf000, 0xdbff, 0xdfff 3669 }; 3670 3671 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter 3672 * result here (34B vs. 35B) 3673 */ 3674 static const uint8_t allFeaturesSCSU[]={ 3675 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, 3676 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, 3677 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, 3678 0xdf, 0x14, 0x80, 0x15, 0xff 3679 }; 3680 static const uint16_t monkeyIn[]={ 3681 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3682 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3683 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3684 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3685 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3686 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3687 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3688 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3689 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3690 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3691 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3692 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3693 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3694 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3695 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3696 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3697 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3698 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3699 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 3700 /* test non-BMP code points */ 3701 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 3702 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 3703 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 3704 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 3705 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 3706 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 3707 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 3708 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 3709 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 3710 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 3711 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 3712 3713 3714 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 3715 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 3716 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 3717 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 3718 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 3719 }; 3720 static const char *fTestCases [] = { 3721 "\\ud800\\udc00", /* smallest surrogate*/ 3722 "\\ud8ff\\udcff", 3723 "\\udBff\\udFff", /* largest surrogate pair*/ 3724 "\\ud834\\udc00", 3725 "\\U0010FFFF", 3726 "Hello \\u9292 \\u9192 World!", 3727 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", 3728 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3729 3730 "\\u0648\\u06c8", /* catch missing reset*/ 3731 "\\u0648\\u06c8", 3732 3733 "\\u4444\\uE001", /* lowest quotable*/ 3734 "\\u4444\\uf2FF", /* highest quotable*/ 3735 "\\u4444\\uf188\\u4444", 3736 "\\u4444\\uf188\\uf288", 3737 "\\u4444\\uf188abc\\u0429\\uf288", 3738 "\\u9292\\u2222", 3739 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", 3740 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3741 "Hello World!123456", 3742 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ 3743 3744 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ 3745 "abc\\u4411d", /* uses SQU*/ 3746 "abc\\u4411\\u4412d",/* uses SCU*/ 3747 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ 3748 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/ 3749 "\\u9292\\u2222", 3750 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", 3751 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c", 3752 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", 3753 3754 "", /* empty input*/ 3755 "\\u0000", /* smallest BMP character*/ 3756 "\\uFFFF", /* largest BMP character*/ 3757 3758 /* regression tests*/ 3759 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", 3760 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff", 3761 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", 3762 "\\u0041\\u00df\\u0401\\u015f", 3763 "\\u9066\\u2123abc", 3764 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5", 3765 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489", 3766 }; 3767 int i=0; 3768 for(;i<UPRV_LENGTHOF(fTestCases);i++){ 3769 const char* cSrc = fTestCases[i]; 3770 UErrorCode status = U_ZERO_ERROR; 3771 int32_t cSrcLen,srcLen; 3772 UChar* src; 3773 /* UConverter* cnv = ucnv_open("SCSU",&status); */ 3774 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); 3775 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); 3776 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); 3777 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); 3778 TestConv(src,srcLen,"SCSU","Coverage",NULL,0); 3779 free(src); 3780 } 3781 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3782 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3783 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3784 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3785 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU)); 3786 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU)); 3787 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); 3788 } 3789 3790 #if !UCONFIG_NO_LEGACY_CONVERSION 3791 static void TestJitterbug2346(){ 3792 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, 3793 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; 3794 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; 3795 3796 UChar uTarget[500]={'\0'}; 3797 UChar* utarget=uTarget; 3798 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 3799 3800 char cTarget[500]={'\0'}; 3801 char* ctarget=cTarget; 3802 char* ctargetLimit=cTarget+sizeof(cTarget); 3803 const char* csource=source; 3804 UChar* temp = expected; 3805 UErrorCode err=U_ZERO_ERROR; 3806 3807 UConverter* conv =ucnv_open("ISO_2022_JP",&err); 3808 if(U_FAILURE(err)) { 3809 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 3810 return; 3811 } 3812 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err); 3813 if(U_FAILURE(err)) { 3814 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err)); 3815 return; 3816 } 3817 utargetLimit=utarget; 3818 utarget = uTarget; 3819 while(utarget<utargetLimit){ 3820 if(*temp!=*utarget){ 3821 3822 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ; 3823 } 3824 utarget++; 3825 temp++; 3826 } 3827 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 3828 if(U_FAILURE(err)) { 3829 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err)); 3830 return; 3831 } 3832 ctargetLimit=ctarget; 3833 ctarget =cTarget; 3834 ucnv_close(conv); 3835 3836 3837 } 3838 3839 static void 3840 TestISO_2022_JP_1() { 3841 /* test input */ 3842 static const uint16_t in[]={ 3843 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A, 3844 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3845 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A, 3846 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3847 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3848 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3849 0x201D, 0x000D, 0x000A, 3850 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3851 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A, 3852 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3853 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A, 3854 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A, 3855 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A 3856 }; 3857 const UChar* uSource; 3858 const UChar* uSourceLimit; 3859 const char* cSource; 3860 const char* cSourceLimit; 3861 UChar *uTargetLimit =NULL; 3862 UChar *uTarget; 3863 char *cTarget; 3864 const char *cTargetLimit; 3865 char *cBuf; 3866 UChar *uBuf,*test; 3867 int32_t uBufSize = 120; 3868 UErrorCode errorCode=U_ZERO_ERROR; 3869 UConverter *cnv; 3870 3871 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3872 if(U_FAILURE(errorCode)) { 3873 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3874 return; 3875 } 3876 3877 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3878 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3879 uSource = (const UChar*)in; 3880 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); 3881 cTarget = cBuf; 3882 cTargetLimit = cBuf +uBufSize*5; 3883 uTarget = uBuf; 3884 uTargetLimit = uBuf+ uBufSize*5; 3885 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode); 3886 if(U_FAILURE(errorCode)){ 3887 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3888 return; 3889 } 3890 cSource = cBuf; 3891 cSourceLimit =cTarget; 3892 test =uBuf; 3893 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode); 3894 if(U_FAILURE(errorCode)){ 3895 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3896 return; 3897 } 3898 uSource = (const UChar*)in; 3899 while(uSource<uSourceLimit){ 3900 if(*test!=*uSource){ 3901 3902 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3903 } 3904 uSource++; 3905 test++; 3906 } 3907 /*ucnv_close(cnv); 3908 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ 3909 /*Test for the condition where there is an invalid character*/ 3910 ucnv_reset(cnv); 3911 { 3912 static const uint8_t source2[]={0x0e,0x24,0x053}; 3913 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); 3914 } 3915 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 3916 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 3917 ucnv_close(cnv); 3918 free(uBuf); 3919 free(cBuf); 3920 } 3921 3922 static void 3923 TestISO_2022_JP_2() { 3924 /* test input */ 3925 static const uint16_t in[]={ 3926 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3927 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3928 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3929 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3930 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3931 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3932 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3933 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3934 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3935 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3936 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3937 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3938 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3939 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3940 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3941 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3942 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3943 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3944 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A 3945 }; 3946 const UChar* uSource; 3947 const UChar* uSourceLimit; 3948 const char* cSource; 3949 const char* cSourceLimit; 3950 UChar *uTargetLimit =NULL; 3951 UChar *uTarget; 3952 char *cTarget; 3953 const char *cTargetLimit; 3954 char *cBuf; 3955 UChar *uBuf,*test; 3956 int32_t uBufSize = 120; 3957 UErrorCode errorCode=U_ZERO_ERROR; 3958 UConverter *cnv; 3959 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3960 int32_t* myOff= offsets; 3961 cnv=ucnv_open("ISO_2022_JP_2", &errorCode); 3962 if(U_FAILURE(errorCode)) { 3963 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3964 return; 3965 } 3966 3967 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3968 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3969 uSource = (const UChar*)in; 3970 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); 3971 cTarget = cBuf; 3972 cTargetLimit = cBuf +uBufSize*5; 3973 uTarget = uBuf; 3974 uTargetLimit = uBuf+ uBufSize*5; 3975 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3976 if(U_FAILURE(errorCode)){ 3977 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3978 return; 3979 } 3980 cSource = cBuf; 3981 cSourceLimit =cTarget; 3982 test =uBuf; 3983 myOff=offsets; 3984 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3985 if(U_FAILURE(errorCode)){ 3986 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3987 return; 3988 } 3989 uSource = (const UChar*)in; 3990 while(uSource<uSourceLimit){ 3991 if(*test!=*uSource){ 3992 3993 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3994 } 3995 uSource++; 3996 test++; 3997 } 3998 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 3999 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 4000 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 4001 /*Test for the condition where there is an invalid character*/ 4002 ucnv_reset(cnv); 4003 { 4004 static const uint8_t source2[]={0x0e,0x24,0x053}; 4005 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); 4006 } 4007 ucnv_close(cnv); 4008 free(uBuf); 4009 free(cBuf); 4010 free(offsets); 4011 } 4012 4013 static void 4014 TestISO_2022_KR() { 4015 /* test input */ 4016 static const uint16_t in[]={ 4017 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4018 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4019 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4020 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4021 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4022 ,0x53E3,0x53E4,0x000A,0x000D}; 4023 const UChar* uSource; 4024 const UChar* uSourceLimit; 4025 const char* cSource; 4026 const char* cSourceLimit; 4027 UChar *uTargetLimit =NULL; 4028 UChar *uTarget; 4029 char *cTarget; 4030 const char *cTargetLimit; 4031 char *cBuf; 4032 UChar *uBuf,*test; 4033 int32_t uBufSize = 120; 4034 UErrorCode errorCode=U_ZERO_ERROR; 4035 UConverter *cnv; 4036 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4037 int32_t* myOff= offsets; 4038 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); 4039 if(U_FAILURE(errorCode)) { 4040 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4041 return; 4042 } 4043 4044 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4045 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4046 uSource = (const UChar*)in; 4047 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); 4048 cTarget = cBuf; 4049 cTargetLimit = cBuf +uBufSize*5; 4050 uTarget = uBuf; 4051 uTargetLimit = uBuf+ uBufSize*5; 4052 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4053 if(U_FAILURE(errorCode)){ 4054 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4055 return; 4056 } 4057 cSource = cBuf; 4058 cSourceLimit =cTarget; 4059 test =uBuf; 4060 myOff=offsets; 4061 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4062 if(U_FAILURE(errorCode)){ 4063 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4064 return; 4065 } 4066 uSource = (const UChar*)in; 4067 while(uSource<uSourceLimit){ 4068 if(*test!=*uSource){ 4069 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4070 } 4071 uSource++; 4072 test++; 4073 } 4074 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4075 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 4076 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 4077 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 4078 TestJitterbug930("csISO2022KR"); 4079 /*Test for the condition where there is an invalid character*/ 4080 ucnv_reset(cnv); 4081 { 4082 static const uint8_t source2[]={0x1b,0x24,0x053}; 4083 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4084 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4085 } 4086 ucnv_close(cnv); 4087 free(uBuf); 4088 free(cBuf); 4089 free(offsets); 4090 } 4091 4092 static void 4093 TestISO_2022_KR_1() { 4094 /* test input */ 4095 static const uint16_t in[]={ 4096 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4097 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4098 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4099 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4100 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4101 ,0x53E3,0x53E4,0x000A,0x000D}; 4102 const UChar* uSource; 4103 const UChar* uSourceLimit; 4104 const char* cSource; 4105 const char* cSourceLimit; 4106 UChar *uTargetLimit =NULL; 4107 UChar *uTarget; 4108 char *cTarget; 4109 const char *cTargetLimit; 4110 char *cBuf; 4111 UChar *uBuf,*test; 4112 int32_t uBufSize = 120; 4113 UErrorCode errorCode=U_ZERO_ERROR; 4114 UConverter *cnv; 4115 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4116 int32_t* myOff= offsets; 4117 cnv=ucnv_open("ibm-25546", &errorCode); 4118 if(U_FAILURE(errorCode)) { 4119 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4120 return; 4121 } 4122 4123 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4124 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4125 uSource = (const UChar*)in; 4126 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); 4127 cTarget = cBuf; 4128 cTargetLimit = cBuf +uBufSize*5; 4129 uTarget = uBuf; 4130 uTargetLimit = uBuf+ uBufSize*5; 4131 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4132 if(U_FAILURE(errorCode)){ 4133 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4134 return; 4135 } 4136 cSource = cBuf; 4137 cSourceLimit =cTarget; 4138 test =uBuf; 4139 myOff=offsets; 4140 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4141 if(U_FAILURE(errorCode)){ 4142 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4143 return; 4144 } 4145 uSource = (const UChar*)in; 4146 while(uSource<uSourceLimit){ 4147 if(*test!=*uSource){ 4148 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4149 } 4150 uSource++; 4151 test++; 4152 } 4153 ucnv_reset(cnv); 4154 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4155 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 4156 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 4157 ucnv_reset(cnv); 4158 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 4159 /*Test for the condition where there is an invalid character*/ 4160 ucnv_reset(cnv); 4161 { 4162 static const uint8_t source2[]={0x1b,0x24,0x053}; 4163 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4164 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4165 } 4166 ucnv_close(cnv); 4167 free(uBuf); 4168 free(cBuf); 4169 free(offsets); 4170 } 4171 4172 static void TestJitterbug2411(){ 4173 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" 4174 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; 4175 UConverter* kr=NULL, *kr1=NULL; 4176 UErrorCode errorCode = U_ZERO_ERROR; 4177 UChar tgt[100]={'\0'}; 4178 UChar* target = tgt; 4179 UChar* targetLimit = target+100; 4180 kr=ucnv_open("iso-2022-kr", &errorCode); 4181 if(U_FAILURE(errorCode)) { 4182 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode)); 4183 return; 4184 } 4185 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4186 if(U_FAILURE(errorCode)) { 4187 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4188 return; 4189 } 4190 kr1 = ucnv_open("ibm-25546", &errorCode); 4191 if(U_FAILURE(errorCode)) { 4192 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode)); 4193 return; 4194 } 4195 target = tgt; 4196 targetLimit = target+100; 4197 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4198 4199 if(U_FAILURE(errorCode)) { 4200 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4201 return; 4202 } 4203 4204 ucnv_close(kr); 4205 ucnv_close(kr1); 4206 4207 } 4208 4209 static void 4210 TestJIS(){ 4211 /* From Unicode moved to testdata/conversion.txt */ 4212 /*To Unicode*/ 4213 { 4214 static const uint8_t sampleTextJIS[] = { 4215 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ 4216 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4217 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4218 }; 4219 static const uint16_t expectedISO2022JIS[] = { 4220 0x0041, 0x0042, 4221 0xFF81, 0xFF82, 4222 0x3000 4223 }; 4224 static const int32_t toISO2022JISOffs[]={ 4225 3,4, 4226 8,9, 4227 16 4228 }; 4229 4230 static const uint8_t sampleTextJIS7[] = { 4231 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ 4232 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4233 0x1b,0x24,0x42,0x21,0x21, 4234 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ 4235 0x21,0x22, 4236 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4237 }; 4238 static const uint16_t expectedISO2022JIS7[] = { 4239 0x0041, 0x0042, 4240 0xFF81, 0xFF82, 4241 0x3000, 4242 0xFF81, 0xFF82, 4243 0x3001, 4244 0x3000 4245 }; 4246 static const int32_t toISO2022JIS7Offs[]={ 4247 3,4, 4248 8,9, 4249 13,16, 4250 17, 4251 19,27 4252 }; 4253 static const uint8_t sampleTextJIS8[] = { 4254 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ 4255 0xa1,0xc8,0xd9,/*Katakana Set*/ 4256 0x1b,0x28,0x42, 4257 0x41,0x42, 4258 0xb1,0xc3, /*Katakana Set*/ 4259 0x1b,0x24,0x42,0x21,0x21 4260 }; 4261 static const uint16_t expectedISO2022JIS8[] = { 4262 0x0041, 0x0042, 4263 0xff61, 0xff88, 0xff99, 4264 0x0041, 0x0042, 4265 0xff71, 0xff83, 4266 0x3000 4267 }; 4268 static const int32_t toISO2022JIS8Offs[]={ 4269 3, 4, 5, 6, 4270 7, 11, 12, 13, 4271 14, 18, 4272 }; 4273 4274 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, 4275 UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,TRUE); 4276 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, 4277 UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,TRUE); 4278 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, 4279 UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,TRUE); 4280 } 4281 4282 } 4283 4284 4285 #if 0 4286 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 4287 4288 static void TestJitterbug915(){ 4289 /* tests for roundtripping of the below sequence 4290 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / 4291 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / 4292 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / 4293 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / 4294 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / 4295 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / 4296 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / 4297 */ 4298 static const char cSource[]={ 4299 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 4300 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 4301 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 4302 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 4303 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 4304 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 4305 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, 4306 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 4307 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 4308 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 4309 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 4310 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 4311 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 4312 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 4313 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4314 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4315 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 4316 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 4317 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4318 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4319 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 4320 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 4321 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 4322 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 4323 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 4324 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 4325 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 4326 0x37, 0x20, 0x2A, 0x2F 4327 }; 4328 UChar uTarget[500]={'\0'}; 4329 UChar* utarget=uTarget; 4330 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 4331 4332 char cTarget[500]={'\0'}; 4333 char* ctarget=cTarget; 4334 char* ctargetLimit=cTarget+sizeof(cTarget); 4335 const char* csource=cSource; 4336 const char* tempSrc = cSource; 4337 UErrorCode err=U_ZERO_ERROR; 4338 4339 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); 4340 if(U_FAILURE(err)) { 4341 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 4342 return; 4343 } 4344 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err); 4345 if(U_FAILURE(err)) { 4346 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err)); 4347 return; 4348 } 4349 utargetLimit=utarget; 4350 utarget = uTarget; 4351 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 4352 if(U_FAILURE(err)) { 4353 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err)); 4354 return; 4355 } 4356 ctargetLimit=ctarget; 4357 ctarget =cTarget; 4358 while(ctarget<ctargetLimit){ 4359 if(*ctarget != *tempSrc){ 4360 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ; 4361 } 4362 ++ctarget; 4363 ++tempSrc; 4364 } 4365 4366 ucnv_close(conv); 4367 } 4368 4369 static void 4370 TestISO_2022_CN_EXT() { 4371 /* test input */ 4372 static const uint16_t in[]={ 4373 /* test Non-BMP code points */ 4374 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 4375 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 4376 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 4377 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 4378 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 4379 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 4380 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 4381 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 4382 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 4383 0xD869, 0xDED5, 4384 4385 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 4386 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 4387 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 4388 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4389 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4390 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4391 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4392 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4393 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4394 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4395 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4396 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4397 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4398 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A, 4399 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 4400 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A, 4401 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A, 4402 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A, 4403 4404 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A 4405 4406 }; 4407 4408 const UChar* uSource; 4409 const UChar* uSourceLimit; 4410 const char* cSource; 4411 const char* cSourceLimit; 4412 UChar *uTargetLimit =NULL; 4413 UChar *uTarget; 4414 char *cTarget; 4415 const char *cTargetLimit; 4416 char *cBuf; 4417 UChar *uBuf,*test; 4418 int32_t uBufSize = 180; 4419 UErrorCode errorCode=U_ZERO_ERROR; 4420 UConverter *cnv; 4421 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4422 int32_t* myOff= offsets; 4423 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); 4424 if(U_FAILURE(errorCode)) { 4425 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4426 return; 4427 } 4428 4429 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4430 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4431 uSource = (const UChar*)in; 4432 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); 4433 cTarget = cBuf; 4434 cTargetLimit = cBuf +uBufSize*5; 4435 uTarget = uBuf; 4436 uTargetLimit = uBuf+ uBufSize*5; 4437 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4438 if(U_FAILURE(errorCode)){ 4439 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4440 return; 4441 } 4442 cSource = cBuf; 4443 cSourceLimit =cTarget; 4444 test =uBuf; 4445 myOff=offsets; 4446 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4447 if(U_FAILURE(errorCode)){ 4448 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4449 return; 4450 } 4451 uSource = (const UChar*)in; 4452 while(uSource<uSourceLimit){ 4453 if(*test!=*uSource){ 4454 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4455 } 4456 else{ 4457 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4458 } 4459 uSource++; 4460 test++; 4461 } 4462 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 4463 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 4464 /*Test for the condition where there is an invalid character*/ 4465 ucnv_reset(cnv); 4466 { 4467 static const uint8_t source2[]={0x0e,0x24,0x053}; 4468 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); 4469 } 4470 ucnv_close(cnv); 4471 free(uBuf); 4472 free(cBuf); 4473 free(offsets); 4474 } 4475 #endif 4476 4477 static void 4478 TestISO_2022_CN() { 4479 /* test input */ 4480 static const uint16_t in[]={ 4481 /* jitterbug 951 */ 4482 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52, 4483 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 4484 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52, 4485 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45, 4486 0x0020, 0x0045, 0x004e, 0x0044, 4487 /**/ 4488 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A, 4489 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A, 4490 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A, 4491 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4492 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4493 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4494 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4495 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4496 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4497 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4498 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4499 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4500 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A, 4501 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A, 4502 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4503 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 4504 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A, 4505 4506 }; 4507 const UChar* uSource; 4508 const UChar* uSourceLimit; 4509 const char* cSource; 4510 const char* cSourceLimit; 4511 UChar *uTargetLimit =NULL; 4512 UChar *uTarget; 4513 char *cTarget; 4514 const char *cTargetLimit; 4515 char *cBuf; 4516 UChar *uBuf,*test; 4517 int32_t uBufSize = 180; 4518 UErrorCode errorCode=U_ZERO_ERROR; 4519 UConverter *cnv; 4520 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4521 int32_t* myOff= offsets; 4522 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); 4523 if(U_FAILURE(errorCode)) { 4524 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4525 return; 4526 } 4527 4528 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4529 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4530 uSource = (const UChar*)in; 4531 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in); 4532 cTarget = cBuf; 4533 cTargetLimit = cBuf +uBufSize*5; 4534 uTarget = uBuf; 4535 uTargetLimit = uBuf+ uBufSize*5; 4536 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4537 if(U_FAILURE(errorCode)){ 4538 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4539 return; 4540 } 4541 cSource = cBuf; 4542 cSourceLimit =cTarget; 4543 test =uBuf; 4544 myOff=offsets; 4545 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4546 if(U_FAILURE(errorCode)){ 4547 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4548 return; 4549 } 4550 uSource = (const UChar*)in; 4551 while(uSource<uSourceLimit){ 4552 if(*test!=*uSource){ 4553 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4554 } 4555 else{ 4556 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4557 } 4558 uSource++; 4559 test++; 4560 } 4561 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); 4562 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 4563 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 4564 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv); 4565 TestJitterbug930("csISO2022CN"); 4566 /*Test for the condition where there is an invalid character*/ 4567 ucnv_reset(cnv); 4568 { 4569 static const uint8_t source2[]={0x0e,0x24,0x053}; 4570 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); 4571 } 4572 4573 ucnv_close(cnv); 4574 free(uBuf); 4575 free(cBuf); 4576 free(offsets); 4577 } 4578 4579 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */ 4580 typedef struct { 4581 const char * converterName; 4582 const char * inputText; 4583 int inputTextLength; 4584 } EmptySegmentTest; 4585 4586 /* Callback for TestJitterbug6175, should only get called for empty segment errors */ 4587 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, 4588 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) { 4589 if (reason > UCNV_IRREGULAR) { 4590 return; 4591 } 4592 if (reason != UCNV_IRREGULAR) { 4593 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); 4594 } 4595 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ 4596 *err = U_ZERO_ERROR; 4597 ucnv_cbToUWriteSub(toArgs,0,err); 4598 } 4599 4600 enum { kEmptySegmentToUCharsMax = 64 }; 4601 static void TestJitterbug6175(void) { 4602 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; 4603 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; 4604 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; 4605 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; 4606 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; 4607 static const EmptySegmentTest emptySegmentTests[] = { 4608 /* converterName inputText inputTextLength */ 4609 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, 4610 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, 4611 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, 4612 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, 4613 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, 4614 /* terminator: */ 4615 { NULL, NULL, 0, } 4616 }; 4617 const EmptySegmentTest * testPtr; 4618 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { 4619 UErrorCode err = U_ZERO_ERROR; 4620 UConverter * cnv = ucnv_open(testPtr->converterName, &err); 4621 if (U_FAILURE(err)) { 4622 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4623 return; 4624 } 4625 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); 4626 if (U_FAILURE(err)) { 4627 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4628 ucnv_close(cnv); 4629 return; 4630 } 4631 { 4632 UChar toUChars[kEmptySegmentToUCharsMax]; 4633 UChar * toUCharsPtr = toUChars; 4634 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; 4635 const char * inCharsPtr = testPtr->inputText; 4636 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; 4637 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); 4638 } 4639 ucnv_close(cnv); 4640 } 4641 } 4642 4643 static void 4644 TestEBCDIC_STATEFUL() { 4645 /* test input */ 4646 static const uint8_t in[]={ 4647 0x61, 4648 0x1a, 4649 0x0f, 0x4b, 4650 0x42, 4651 0x40, 4652 0x36, 4653 }; 4654 4655 /* expected test results */ 4656 static const int32_t results[]={ 4657 /* number of bytes read, code point */ 4658 1, 0x002f, 4659 1, 0x0092, 4660 2, 0x002e, 4661 1, 0xff62, 4662 1, 0x0020, 4663 1, 0x0096, 4664 4665 }; 4666 static const uint8_t in2[]={ 4667 0x0f, 4668 0xa1, 4669 0x01 4670 }; 4671 4672 /* expected test results */ 4673 static const int32_t results2[]={ 4674 /* number of bytes read, code point */ 4675 2, 0x203E, 4676 1, 0x0001, 4677 }; 4678 4679 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 4680 UErrorCode errorCode=U_ZERO_ERROR; 4681 UConverter *cnv=ucnv_open("ibm-930", &errorCode); 4682 if(U_FAILURE(errorCode)) { 4683 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode)); 4684 return; 4685 } 4686 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); 4687 ucnv_reset(cnv); 4688 /* Test the condition when source >= sourceLimit */ 4689 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 4690 ucnv_reset(cnv); 4691 /*Test for the condition where source > sourcelimit after consuming the shift chracter */ 4692 { 4693 static const uint8_t source1[]={0x0f}; 4694 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); 4695 } 4696 /*Test for the condition where there is an invalid character*/ 4697 ucnv_reset(cnv); 4698 { 4699 static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; 4700 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); 4701 } 4702 ucnv_reset(cnv); 4703 source=(const char*)in2; 4704 limit=(const char*)in2+sizeof(in2); 4705 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); 4706 ucnv_close(cnv); 4707 4708 } 4709 4710 static void 4711 TestGB18030() { 4712 /* test input */ 4713 static const uint8_t in[]={ 4714 0x24, 4715 0x7f, 4716 0x81, 0x30, 0x81, 0x30, 4717 0xa8, 0xbf, 4718 0xa2, 0xe3, 4719 0xd2, 0xbb, 4720 0x82, 0x35, 0x8f, 0x33, 4721 0x84, 0x31, 0xa4, 0x39, 4722 0x90, 0x30, 0x81, 0x30, 4723 0xe3, 0x32, 0x9a, 0x35 4724 #if 0 4725 /* 4726 * Feature removed markus 2000-oct-26 4727 * Only some codepages must match surrogate pairs into supplementary code points - 4728 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c . 4729 * GB 18030 provides direct encodings for supplementary code points, therefore 4730 * it must not combine two single-encoded surrogates into one code point. 4731 */ 4732 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */ 4733 #endif 4734 }; 4735 4736 /* expected test results */ 4737 static const int32_t results[]={ 4738 /* number of bytes read, code point */ 4739 1, 0x24, 4740 1, 0x7f, 4741 4, 0x80, 4742 2, 0x1f9, 4743 2, 0x20ac, 4744 2, 0x4e00, 4745 4, 0x9fa6, 4746 4, 0xffff, 4747 4, 0x10000, 4748 4, 0x10ffff 4749 #if 0 4750 /* Feature removed. See comment above. */ 4751 8, 0x10000 4752 #endif 4753 }; 4754 4755 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ 4756 UErrorCode errorCode=U_ZERO_ERROR; 4757 UConverter *cnv=ucnv_open("gb18030", &errorCode); 4758 if(U_FAILURE(errorCode)) { 4759 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode)); 4760 return; 4761 } 4762 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030"); 4763 ucnv_close(cnv); 4764 } 4765 4766 static void 4767 TestLMBCS() { 4768 /* LMBCS-1 string */ 4769 static const uint8_t pszLMBCS[]={ 4770 0x61, 4771 0x01, 0x29, 4772 0x81, 4773 0xA0, 4774 0x0F, 0x27, 4775 0x0F, 0x91, 4776 0x14, 0x0a, 0x74, 4777 0x14, 0xF6, 0x02, 4778 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ 4779 0x10, 0x88, 0xA0, 4780 }; 4781 4782 /* Unicode UChar32 equivalents */ 4783 static const UChar32 pszUnicode32[]={ 4784 /* code point */ 4785 0x00000061, 4786 0x00002013, 4787 0x000000FC, 4788 0x000000E1, 4789 0x00000007, 4790 0x00000091, 4791 0x00000a74, 4792 0x00000200, 4793 0x00023456, /* code point for surrogate pair */ 4794 0x00005516 4795 }; 4796 4797 /* Unicode UChar equivalents */ 4798 static const UChar pszUnicode[]={ 4799 /* code point */ 4800 0x0061, 4801 0x2013, 4802 0x00FC, 4803 0x00E1, 4804 0x0007, 4805 0x0091, 4806 0x0a74, 4807 0x0200, 4808 0xD84D, /* low surrogate */ 4809 0xDC56, /* high surrogate */ 4810 0x5516 4811 }; 4812 4813 /* expected test results */ 4814 static const int offsets32[]={ 4815 /* number of bytes read, code point */ 4816 0, 4817 1, 4818 3, 4819 4, 4820 5, 4821 7, 4822 9, 4823 12, 4824 15, 4825 21, 4826 24 4827 }; 4828 4829 /* expected test results */ 4830 static const int offsets[]={ 4831 /* number of bytes read, code point */ 4832 0, 4833 1, 4834 3, 4835 4, 4836 5, 4837 7, 4838 9, 4839 12, 4840 15, 4841 18, 4842 21, 4843 24 4844 }; 4845 4846 4847 UConverter *cnv; 4848 4849 #define NAME_LMBCS_1 "LMBCS-1" 4850 #define NAME_LMBCS_2 "LMBCS-2" 4851 4852 4853 /* Some basic open/close/property tests on some LMBCS converters */ 4854 { 4855 4856 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ 4857 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ 4858 char get_subchars [1]; 4859 const char * get_name; 4860 UConverter *cnv1; 4861 UConverter *cnv2; 4862 4863 int8_t len = sizeof(get_subchars); 4864 4865 UErrorCode errorCode=U_ZERO_ERROR; 4866 4867 /* Open */ 4868 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); 4869 if(U_FAILURE(errorCode)) { 4870 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4871 return; 4872 } 4873 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); 4874 if(U_FAILURE(errorCode)) { 4875 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode)); 4876 return; 4877 } 4878 4879 /* Name */ 4880 get_name = ucnv_getName (cnv1, &errorCode); 4881 if (strcmp(NAME_LMBCS_1,get_name)){ 4882 log_err("Unexpected converter name: %s\n", get_name); 4883 } 4884 get_name = ucnv_getName (cnv2, &errorCode); 4885 if (strcmp(NAME_LMBCS_2,get_name)){ 4886 log_err("Unexpected converter name: %s\n", get_name); 4887 } 4888 4889 /* substitution chars */ 4890 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); 4891 if(U_FAILURE(errorCode)) { 4892 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4893 } 4894 if (len!=1){ 4895 log_err("Unexpected length of sub chars\n"); 4896 } 4897 if (get_subchars[0] != expected_subchars[0]){ 4898 log_err("Unexpected value of sub chars\n"); 4899 } 4900 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); 4901 if(U_FAILURE(errorCode)) { 4902 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); 4903 } 4904 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); 4905 if(U_FAILURE(errorCode)) { 4906 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4907 } 4908 if (len!=1){ 4909 log_err("Unexpected length of sub chars\n"); 4910 } 4911 if (get_subchars[0] != new_subchars[0]){ 4912 log_err("Unexpected value of sub chars\n"); 4913 } 4914 ucnv_close(cnv1); 4915 ucnv_close(cnv2); 4916 4917 } 4918 4919 /* LMBCS to Unicode - offsets */ 4920 { 4921 UErrorCode errorCode=U_ZERO_ERROR; 4922 4923 const char * pSource = (const char *)pszLMBCS; 4924 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 4925 4926 UChar Out [sizeof(pszUnicode) + 1]; 4927 UChar * pOut = Out; 4928 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode); 4929 4930 int32_t off [sizeof(offsets)]; 4931 4932 /* last 'offset' in expected results is just the final size. 4933 (Makes other tests easier). Compensate here: */ 4934 4935 off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS); 4936 4937 4938 4939 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ 4940 if(U_FAILURE(errorCode)) { 4941 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode)); 4942 return; 4943 } 4944 4945 4946 4947 ucnv_toUnicode (cnv, 4948 &pOut, 4949 OutLimit, 4950 &pSource, 4951 sourceLimit, 4952 off, 4953 TRUE, 4954 &errorCode); 4955 4956 4957 if (memcmp(off,offsets,sizeof(offsets))) 4958 { 4959 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n"); 4960 } 4961 if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) 4962 { 4963 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n"); 4964 } 4965 ucnv_close(cnv); 4966 } 4967 { 4968 /* LMBCS to Unicode - getNextUChar */ 4969 const char * sourceStart; 4970 const char *source=(const char *)pszLMBCS; 4971 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); 4972 const UChar32 *results= pszUnicode32; 4973 const int *off = offsets32; 4974 4975 UErrorCode errorCode=U_ZERO_ERROR; 4976 UChar32 uniChar; 4977 4978 cnv=ucnv_open("LMBCS-1", &errorCode); 4979 if(U_FAILURE(errorCode)) { 4980 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4981 return; 4982 } 4983 else 4984 { 4985 4986 while(source<limit) { 4987 sourceStart=source; 4988 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode); 4989 if(U_FAILURE(errorCode)) { 4990 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode)); 4991 break; 4992 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) { 4993 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 4994 uniChar, (source-sourceStart), *results, *off); 4995 break; 4996 } 4997 results++; 4998 off++; 4999 } 5000 } 5001 ucnv_close(cnv); 5002 } 5003 { /* test locale & optimization group operations: Unicode to LMBCS */ 5004 5005 UErrorCode errorCode=U_ZERO_ERROR; 5006 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); 5007 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); 5008 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); 5009 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ 5010 const UChar * pUniOut = uniString; 5011 UChar * pUniIn = uniString; 5012 uint8_t lmbcsString [4]; 5013 const char * pLMBCSOut = (const char *)lmbcsString; 5014 char * pLMBCSIn = (char *)lmbcsString; 5015 5016 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */ 5017 ucnv_fromUnicode (cnv16he, 5018 &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)), 5019 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString), 5020 NULL, 1, &errorCode); 5021 5022 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) 5023 { 5024 log_err("LMBCS-16,locale=he gives unexpected translation\n"); 5025 } 5026 5027 pLMBCSIn= (char *)lmbcsString; 5028 pUniOut = uniString; 5029 ucnv_fromUnicode (cnv01us, 5030 &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)), 5031 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString), 5032 NULL, 1, &errorCode); 5033 5034 if (lmbcsString[0] != 0x9F) 5035 { 5036 log_err("LMBCS-1,locale=US gives unexpected translation\n"); 5037 } 5038 5039 /* single byte char from mbcs char set */ 5040 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ 5041 pLMBCSOut = (const char *)lmbcsString; 5042 pUniIn = uniString; 5043 ucnv_toUnicode (cnv16jp, 5044 &pUniIn, pUniIn + 1, 5045 &pLMBCSOut, (pLMBCSOut + 1), 5046 NULL, 1, &errorCode); 5047 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5048 { 5049 log_err("Unexpected results from LMBCS-16 single byte char\n"); 5050 } 5051 /* convert to group 1: should be 3 bytes */ 5052 pLMBCSIn = (char *)lmbcsString; 5053 pUniOut = uniString; 5054 ucnv_fromUnicode (cnv01us, 5055 &pLMBCSIn, (const char *)(pLMBCSIn + 3), 5056 &pUniOut, pUniOut + 1, 5057 NULL, 1, &errorCode); 5058 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1 5059 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE) 5060 { 5061 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); 5062 } 5063 pLMBCSOut = (const char *)lmbcsString; 5064 pUniIn = uniString; 5065 ucnv_toUnicode (cnv01us, 5066 &pUniIn, pUniIn + 1, 5067 &pLMBCSOut, (const char *)(pLMBCSOut + 3), 5068 NULL, 1, &errorCode); 5069 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5070 { 5071 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); 5072 } 5073 pLMBCSIn = (char *)lmbcsString; 5074 pUniOut = uniString; 5075 ucnv_fromUnicode (cnv16jp, 5076 &pLMBCSIn, (const char *)(pLMBCSIn + 1), 5077 &pUniOut, pUniOut + 1, 5078 NULL, 1, &errorCode); 5079 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE) 5080 { 5081 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); 5082 } 5083 ucnv_close(cnv16he); 5084 ucnv_close(cnv16jp); 5085 ucnv_close(cnv01us); 5086 } 5087 { 5088 /* Small source buffer testing, LMBCS -> Unicode */ 5089 5090 UErrorCode errorCode=U_ZERO_ERROR; 5091 5092 const char * pSource = (const char *)pszLMBCS; 5093 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 5094 int codepointCount = 0; 5095 5096 UChar Out [sizeof(pszUnicode) + 1]; 5097 UChar * pOut = Out; 5098 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode); 5099 5100 5101 cnv = ucnv_open(NAME_LMBCS_1, &errorCode); 5102 if(U_FAILURE(errorCode)) { 5103 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 5104 return; 5105 } 5106 5107 5108 while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) 5109 { 5110 ucnv_toUnicode (cnv, 5111 &pOut, 5112 OutLimit, 5113 &pSource, 5114 (pSource+1), /* claim that this is a 1- byte buffer */ 5115 NULL, 5116 FALSE, /* FALSE means there might be more chars in the next buffer */ 5117 &errorCode); 5118 5119 if (U_SUCCESS (errorCode)) 5120 { 5121 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1]) 5122 { 5123 /* we are on to the next code point: check value */ 5124 5125 if (Out[0] != pszUnicode[codepointCount]){ 5126 log_err("LMBCS->Uni result %lx should have been %lx \n", 5127 Out[0], pszUnicode[codepointCount]); 5128 } 5129 5130 pOut = Out; /* reset for accumulating next code point */ 5131 codepointCount++; 5132 } 5133 } 5134 else 5135 { 5136 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode)); 5137 } 5138 } 5139 { 5140 /* limits & surrogate error testing */ 5141 char LIn [sizeof(pszLMBCS)]; 5142 const char * pLIn = LIn; 5143 5144 char LOut [sizeof(pszLMBCS)]; 5145 char * pLOut = LOut; 5146 5147 UChar UOut [sizeof(pszUnicode)]; 5148 UChar * pUOut = UOut; 5149 5150 UChar UIn [sizeof(pszUnicode)]; 5151 const UChar * pUIn = UIn; 5152 5153 int32_t off [sizeof(offsets)]; 5154 UChar32 uniChar; 5155 5156 errorCode=U_ZERO_ERROR; 5157 5158 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ 5159 pUIn++; 5160 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode); 5161 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5162 { 5163 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); 5164 } 5165 pUIn--; 5166 5167 errorCode=U_ZERO_ERROR; 5168 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); 5169 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5170 { 5171 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode)); 5172 } 5173 errorCode=U_ZERO_ERROR; 5174 5175 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode); 5176 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5177 { 5178 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode)); 5179 } 5180 errorCode=U_ZERO_ERROR; 5181 5182 /* 0 byte source request - no error, no pointer movement */ 5183 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode); 5184 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); 5185 if(U_FAILURE(errorCode)) { 5186 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode)); 5187 } 5188 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn)) 5189 { 5190 log_err("Unexpected pointer move in 0 byte source request \n"); 5191 } 5192 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ 5193 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); 5194 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) 5195 { 5196 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); 5197 } 5198 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ 5199 { 5200 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n"); 5201 } 5202 errorCode = U_ZERO_ERROR; 5203 5204 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ 5205 5206 pUIn = pszUnicode; 5207 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,FALSE, &errorCode); 5208 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) 5209 { 5210 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); 5211 } 5212 5213 errorCode = U_ZERO_ERROR; 5214 5215 pLIn = (const char *)pszLMBCS; 5216 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); 5217 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4]) 5218 { 5219 log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); 5220 } 5221 5222 /* unpaired or chopped LMBCS surrogates */ 5223 5224 /* OK high surrogate, Low surrogate is chopped */ 5225 LIn [0] = (char)0x14; 5226 LIn [1] = (char)0xD8; 5227 LIn [2] = (char)0x01; 5228 LIn [3] = (char)0x14; 5229 LIn [4] = (char)0xDC; 5230 pLIn = LIn; 5231 errorCode = U_ZERO_ERROR; 5232 pUOut = UOut; 5233 5234 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 5235 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5236 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5237 { 5238 log_err("Unexpected results on chopped low surrogate\n"); 5239 } 5240 5241 /* chopped at surrogate boundary */ 5242 LIn [0] = (char)0x14; 5243 LIn [1] = (char)0xD8; 5244 LIn [2] = (char)0x01; 5245 pLIn = LIn; 5246 errorCode = U_ZERO_ERROR; 5247 pUOut = UOut; 5248 5249 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); 5250 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) 5251 { 5252 log_err("Unexpected results on chopped at surrogate boundary \n"); 5253 } 5254 5255 /* unpaired surrogate plus valid Unichar */ 5256 LIn [0] = (char)0x14; 5257 LIn [1] = (char)0xD8; 5258 LIn [2] = (char)0x01; 5259 LIn [3] = (char)0x14; 5260 LIn [4] = (char)0xC9; 5261 LIn [5] = (char)0xD0; 5262 pLIn = LIn; 5263 errorCode = U_ZERO_ERROR; 5264 pUOut = UOut; 5265 5266 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); 5267 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) 5268 { 5269 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); 5270 } 5271 5272 /* unpaired surrogate plus chopped Unichar */ 5273 LIn [0] = (char)0x14; 5274 LIn [1] = (char)0xD8; 5275 LIn [2] = (char)0x01; 5276 LIn [3] = (char)0x14; 5277 LIn [4] = (char)0xC9; 5278 5279 pLIn = LIn; 5280 errorCode = U_ZERO_ERROR; 5281 pUOut = UOut; 5282 5283 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5284 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5285 { 5286 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); 5287 } 5288 5289 /* unpaired surrogate plus valid non-Unichar */ 5290 LIn [0] = (char)0x14; 5291 LIn [1] = (char)0xD8; 5292 LIn [2] = (char)0x01; 5293 LIn [3] = (char)0x0F; 5294 LIn [4] = (char)0x3B; 5295 5296 pLIn = LIn; 5297 errorCode = U_ZERO_ERROR; 5298 pUOut = UOut; 5299 5300 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5301 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) 5302 { 5303 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); 5304 } 5305 5306 /* unpaired surrogate plus chopped non-Unichar */ 5307 LIn [0] = (char)0x14; 5308 LIn [1] = (char)0xD8; 5309 LIn [2] = (char)0x01; 5310 LIn [3] = (char)0x0F; 5311 5312 pLIn = LIn; 5313 errorCode = U_ZERO_ERROR; 5314 pUOut = UOut; 5315 5316 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); 5317 5318 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) 5319 { 5320 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n"); 5321 } 5322 } 5323 } 5324 ucnv_close(cnv); /* final cleanup */ 5325 } 5326 5327 5328 static void TestJitterbug255() 5329 { 5330 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; 5331 const char *testBuffer = (const char *)testBytes; 5332 const char *testEnd = (const char *)testBytes + sizeof(testBytes); 5333 UErrorCode status = U_ZERO_ERROR; 5334 /*UChar32 result;*/ 5335 UConverter *cnv = 0; 5336 5337 cnv = ucnv_open("shift-jis", &status); 5338 if (U_FAILURE(status) || cnv == 0) { 5339 log_data_err("Failed to open the converter for SJIS.\n"); 5340 return; 5341 } 5342 while (testBuffer != testEnd) 5343 { 5344 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); 5345 if (U_FAILURE(status)) 5346 { 5347 log_err("Failed to convert the next UChar for SJIS.\n"); 5348 break; 5349 } 5350 } 5351 ucnv_close(cnv); 5352 } 5353 5354 static void TestEBCDICUS4XML() 5355 { 5356 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; 5357 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; 5358 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; 5359 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; 5360 char target_x[] = {0x00, 0x00, 0x00, 0x00}; 5361 UChar *unicodes = unicodes_x; 5362 const UChar *toUnicodeMaps = toUnicodeMaps_x; 5363 char *target = target_x; 5364 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; 5365 UErrorCode status = U_ZERO_ERROR; 5366 UConverter *cnv = 0; 5367 5368 cnv = ucnv_open("ebcdic-xml-us", &status); 5369 if (U_FAILURE(status) || cnv == 0) { 5370 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); 5371 return; 5372 } 5373 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status); 5374 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) { 5375 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", 5376 u_errorName(status)); 5377 printUSeqErr(unicodes_x, 3); 5378 printUSeqErr(toUnicodeMaps, 3); 5379 } 5380 status = U_ZERO_ERROR; 5381 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status); 5382 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) { 5383 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", 5384 u_errorName(status)); 5385 printSeqErr((const unsigned char*)target_x, 3); 5386 printSeqErr((const unsigned char*)fromUnicodeMaps, 3); 5387 } 5388 ucnv_close(cnv); 5389 } 5390 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ 5391 5392 #if !UCONFIG_NO_COLLATION 5393 5394 static void TestJitterbug981(){ 5395 const UChar* rules; 5396 int32_t rules_length, target_cap, bytes_needed, buff_size; 5397 UErrorCode status = U_ZERO_ERROR; 5398 UConverter *utf8cnv; 5399 UCollator* myCollator; 5400 char *buff; 5401 int numNeeded=0; 5402 utf8cnv = ucnv_open ("utf8", &status); 5403 if(U_FAILURE(status)){ 5404 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status)); 5405 return; 5406 } 5407 myCollator = ucol_open("zh", &status); 5408 if(U_FAILURE(status)){ 5409 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status)); 5410 ucnv_close(utf8cnv); 5411 return; 5412 } 5413 5414 rules = ucol_getRules(myCollator, &rules_length); 5415 if(rules_length == 0) { 5416 log_data_err("missing zh tailoring rule string\n"); 5417 ucol_close(myCollator); 5418 ucnv_close(utf8cnv); 5419 return; 5420 } 5421 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); 5422 buff = malloc(buff_size); 5423 5424 target_cap = 0; 5425 do { 5426 ucnv_reset(utf8cnv); 5427 status = U_ZERO_ERROR; 5428 if(target_cap >= buff_size) { 5429 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size); 5430 break; 5431 } 5432 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, 5433 rules, rules_length, &status); 5434 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5435 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5436 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5437 break; 5438 } 5439 numNeeded = bytes_needed; 5440 } while (status == U_BUFFER_OVERFLOW_ERROR); 5441 ucol_close(myCollator); 5442 ucnv_close(utf8cnv); 5443 free(buff); 5444 } 5445 5446 #endif 5447 5448 #if !UCONFIG_NO_LEGACY_CONVERSION 5449 static void TestJitterbug1293(){ 5450 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; 5451 char target[256]; 5452 UErrorCode status = U_ZERO_ERROR; 5453 UConverter* conv=NULL; 5454 int32_t target_cap, bytes_needed, numNeeded = 0; 5455 conv = ucnv_open("shift-jis",&status); 5456 if(U_FAILURE(status)){ 5457 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status)); 5458 return; 5459 } 5460 5461 do{ 5462 target_cap =0; 5463 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status); 5464 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5465 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5466 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5467 } 5468 numNeeded = bytes_needed; 5469 } while (status == U_BUFFER_OVERFLOW_ERROR); 5470 if(U_FAILURE(status)){ 5471 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status)); 5472 return; 5473 } 5474 ucnv_close(conv); 5475 } 5476 #endif 5477 5478 static void TestJB5275_1(){ 5479 5480 static const char* data = "\x3B\xB3\x0A" /* Easy characters */ 5481 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5482 /* Switch script: */ 5483 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */ 5484 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/ 5485 "\xEF\x40\x3B\xB3\x0A"; 5486 static const UChar expected[] ={ 5487 0x003b, 0x0a15, 0x000a, /* Easy characters */ 5488 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */ 5489 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/ 5490 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/ 5491 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ 5492 }; 5493 5494 UErrorCode status = U_ZERO_ERROR; 5495 UConverter* conv = ucnv_open("iscii-gur", &status); 5496 UChar dest[100] = {'\0'}; 5497 UChar* target = dest; 5498 UChar* targetLimit = dest+100; 5499 const char* source = data; 5500 const char* sourceLimit = data+strlen(data); 5501 const UChar* exp = expected; 5502 5503 if (U_FAILURE(status)) { 5504 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status)); 5505 return; 5506 } 5507 5508 log_verbose("Testing switching back to default script when new line is encountered.\n"); 5509 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5510 if(U_FAILURE(status)){ 5511 log_err("conversion failed: %s \n", u_errorName(status)); 5512 } 5513 targetLimit = target; 5514 target = dest; 5515 printUSeq(target, targetLimit-target); 5516 while(target<targetLimit){ 5517 if(*exp!=*target){ 5518 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5519 } 5520 target++; 5521 exp++; 5522 } 5523 ucnv_close(conv); 5524 } 5525 5526 static void TestJB5275(){ 5527 static const char* data = 5528 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */ 5529 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */ 5530 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */ 5531 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5532 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ 5533 "\xEF\x48\x38\xB3\x0A" /* Kannada test */ 5534 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ 5535 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ 5536 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ 5537 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; 5538 static const UChar expected[] ={ 5539 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */ 5540 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */ 5541 0x0038, 0x0C95, 0x000A, /* Kannada test */ 5542 0x0039, 0x0D15, 0x000A, /* Malayalam test */ 5543 0x003A, 0x0A95, 0x000A, /* Gujarati test */ 5544 0x003B, 0x0A15, 0x000A, /* Punjabi test */ 5545 }; 5546 5547 UErrorCode status = U_ZERO_ERROR; 5548 UConverter* conv = ucnv_open("iscii", &status); 5549 UChar dest[100] = {'\0'}; 5550 UChar* target = dest; 5551 UChar* targetLimit = dest+100; 5552 const char* source = data; 5553 const char* sourceLimit = data+strlen(data); 5554 const UChar* exp = expected; 5555 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5556 if(U_FAILURE(status)){ 5557 log_data_err("conversion failed: %s \n", u_errorName(status)); 5558 } 5559 targetLimit = target; 5560 target = dest; 5561 5562 printUSeq(target, targetLimit-target); 5563 5564 while(target<targetLimit){ 5565 if(*exp!=*target){ 5566 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5567 } 5568 target++; 5569 exp++; 5570 } 5571 ucnv_close(conv); 5572 } 5573 5574 static void 5575 TestIsFixedWidth() { 5576 UErrorCode status = U_ZERO_ERROR; 5577 UConverter *cnv = NULL; 5578 int32_t i; 5579 5580 const char *fixedWidth[] = { 5581 "US-ASCII", 5582 "UTF32", 5583 "ibm-5478_P100-1995" 5584 }; 5585 5586 const char *notFixedWidth[] = { 5587 "GB18030", 5588 "UTF8", 5589 "windows-949-2000", 5590 "UTF16" 5591 }; 5592 5593 for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) { 5594 cnv = ucnv_open(fixedWidth[i], &status); 5595 if (cnv == NULL || U_FAILURE(status)) { 5596 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status)); 5597 continue; 5598 } 5599 5600 if (!ucnv_isFixedWidth(cnv, &status)) { 5601 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]); 5602 } 5603 ucnv_close(cnv); 5604 } 5605 5606 for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) { 5607 cnv = ucnv_open(notFixedWidth[i], &status); 5608 if (cnv == NULL || U_FAILURE(status)) { 5609 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status)); 5610 continue; 5611 } 5612 5613 if (ucnv_isFixedWidth(cnv, &status)) { 5614 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]); 5615 } 5616 ucnv_close(cnv); 5617 } 5618 } 5619