1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2009, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************* 7 * 8 * File CCONVTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Steven R. Loomis 7/8/1999 Adding input buffer test 13 ******************************************************************************** 14 */ 15 #include <stdio.h> 16 #include "cstring.h" 17 #include "unicode/uloc.h" 18 #include "unicode/ucnv.h" 19 #include "unicode/ucnv_err.h" 20 #include "unicode/ucnv_cb.h" 21 #include "cintltst.h" 22 #include "unicode/utypes.h" 23 #include "unicode/ustring.h" 24 #include "unicode/ucol.h" 25 #include "cmemory.h" 26 27 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); 28 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); 29 #if !UCONFIG_NO_COLLATION 30 static void TestJitterbug981(void); 31 #endif 32 static void TestJitterbug1293(void); 33 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; 34 static void TestConverterTypesAndStarters(void); 35 static void TestAmbiguous(void); 36 static void TestSignatureDetection(void); 37 static void TestUTF7(void); 38 static void TestIMAP(void); 39 static void TestUTF8(void); 40 static void TestCESU8(void); 41 static void TestUTF16(void); 42 static void TestUTF16BE(void); 43 static void TestUTF16LE(void); 44 static void TestUTF32(void); 45 static void TestUTF32BE(void); 46 static void TestUTF32LE(void); 47 static void TestLATIN1(void); 48 49 #if !UCONFIG_NO_LEGACY_CONVERSION 50 static void TestSBCS(void); 51 static void TestDBCS(void); 52 static void TestMBCS(void); 53 54 #ifdef U_ENABLE_GENERIC_ISO_2022 55 static void TestISO_2022(void); 56 #endif 57 58 static void TestISO_2022_JP(void); 59 static void TestISO_2022_JP_1(void); 60 static void TestISO_2022_JP_2(void); 61 static void TestISO_2022_KR(void); 62 static void TestISO_2022_KR_1(void); 63 static void TestISO_2022_CN(void); 64 static void TestISO_2022_CN_EXT(void); 65 static void TestJIS(void); 66 static void TestHZ(void); 67 #endif 68 69 static void TestSCSU(void); 70 71 #if !UCONFIG_NO_LEGACY_CONVERSION 72 static void TestEBCDIC_STATEFUL(void); 73 static void TestGB18030(void); 74 static void TestLMBCS(void); 75 static void TestJitterbug255(void); 76 static void TestEBCDICUS4XML(void); 77 static void TestJitterbug915(void); 78 static void TestISCII(void); 79 80 static void TestCoverageMBCS(void); 81 static void TestJitterbug2346(void); 82 static void TestJitterbug2411(void); 83 static void TestJB5275(void); 84 static void TestJB5275_1(void); 85 static void TestJitterbug6175(void); 86 #endif 87 88 static void TestRoundTrippingAllUTF(void); 89 static void TestConv(const uint16_t in[], 90 int len, 91 const char* conv, 92 const char* lang, 93 char byteArr[], 94 int byteArrLen); 95 void addTestNewConvert(TestNode** root); 96 97 /* open a converter, using test data if it begins with '@' */ 98 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); 99 100 101 #define NEW_MAX_BUFFER 999 102 103 static int32_t gInBufferSize = NEW_MAX_BUFFER; 104 static int32_t gOutBufferSize = NEW_MAX_BUFFER; 105 static char gNuConvTestName[1024]; 106 107 #define nct_min(x,y) ((x<y) ? x : y) 108 109 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) 110 { 111 if(cnv && cnv[0] == '@') { 112 return ucnv_openPackage(loadTestData(err), cnv+1, err); 113 } else { 114 return ucnv_open(cnv, err); 115 } 116 } 117 118 static void printSeq(const unsigned char* a, int len) 119 { 120 int i=0; 121 log_verbose("{"); 122 while (i<len) 123 log_verbose("0x%02x ", a[i++]); 124 log_verbose("}\n"); 125 } 126 127 static void printUSeq(const UChar* a, int len) 128 { 129 int i=0; 130 log_verbose("{U+"); 131 while (i<len) log_verbose("0x%04x ", a[i++]); 132 log_verbose("}\n"); 133 } 134 135 static void printSeqErr(const unsigned char* a, int len) 136 { 137 int i=0; 138 fprintf(stderr, "{"); 139 while (i<len) 140 fprintf(stderr, "0x%02x ", a[i++]); 141 fprintf(stderr, "}\n"); 142 } 143 144 static void printUSeqErr(const UChar* a, int len) 145 { 146 int i=0; 147 fprintf(stderr, "{U+"); 148 while (i<len) 149 fprintf(stderr, "0x%04x ", a[i++]); 150 fprintf(stderr,"}\n"); 151 } 152 153 static void 154 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message) 155 { 156 const char* s0; 157 const char* s=(char*)source; 158 const int32_t *r=results; 159 UErrorCode errorCode=U_ZERO_ERROR; 160 UChar32 c; 161 162 while(s<limit) { 163 s0=s; 164 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 165 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 166 break; /* no more significant input */ 167 } else if(U_FAILURE(errorCode)) { 168 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 169 break; 170 } else if( 171 /* test the expected number of input bytes only if >=0 */ 172 (*r>=0 && (int32_t)(s-s0)!=*r) || 173 c!=*(r+1) 174 ) { 175 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 176 message, c, (s-s0), *(r+1), *r); 177 break; 178 } 179 r+=2; 180 } 181 } 182 183 static void 184 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message) 185 { 186 const char* s=(char*)source; 187 UErrorCode errorCode=U_ZERO_ERROR; 188 uint32_t c; 189 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 190 if(errorCode != expected){ 191 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode)); 192 } 193 if(c != 0xFFFD && c != 0xffff){ 194 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c); 195 } 196 197 } 198 199 static void TestInBufSizes(void) 200 { 201 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); 202 #if 1 203 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); 204 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); 205 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); 206 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); 207 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); 208 TestNewConvertWithBufferSizes(1,1); 209 TestNewConvertWithBufferSizes(2,3); 210 TestNewConvertWithBufferSizes(3,2); 211 #endif 212 } 213 214 static void TestOutBufSizes(void) 215 { 216 #if 1 217 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); 218 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); 219 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); 220 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); 221 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); 222 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); 223 224 #endif 225 } 226 227 228 void addTestNewConvert(TestNode** root) 229 { 230 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); 231 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); 232 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); 233 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); 234 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); 235 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); 236 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); 237 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); 238 239 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ 240 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); 241 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); 242 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); 243 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); 244 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); 245 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); 246 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); 247 248 #if !UCONFIG_NO_LEGACY_CONVERSION 249 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); 250 #endif 251 252 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); 253 254 #if !UCONFIG_NO_LEGACY_CONVERSION 255 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); 256 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); 257 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); 258 259 #ifdef U_ENABLE_GENERIC_ISO_2022 260 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); 261 #endif 262 263 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); 264 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); 265 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); 266 addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); 267 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); 268 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); 269 addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); 270 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); 271 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); 272 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); 273 #endif 274 275 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); 276 277 #if !UCONFIG_NO_LEGACY_CONVERSION 278 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); 279 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); 280 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); 281 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); 282 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); 283 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); 284 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); 285 #if !UCONFIG_NO_COLLATION 286 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); 287 #endif 288 289 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); 290 #endif 291 292 293 #if !UCONFIG_NO_LEGACY_CONVERSION 294 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); 295 #endif 296 297 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); 298 299 #if !UCONFIG_NO_LEGACY_CONVERSION 300 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); 301 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); 302 addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); 303 #endif 304 305 } 306 307 308 /* Note that this test already makes use of statics, so it's not really 309 multithread safe. 310 This convenience function lets us make the error messages actually useful. 311 */ 312 313 static void setNuConvTestName(const char *codepage, const char *direction) 314 { 315 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 316 codepage, 317 direction, 318 (int)gInBufferSize, 319 (int)gOutBufferSize); 320 } 321 322 typedef enum 323 { 324 TC_OK = 0, /* test was OK */ 325 TC_MISMATCH = 1, /* Match failed - err was printed */ 326 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */ 327 } ETestConvertResult; 328 329 /* Note: This function uses global variables and it will not do offset 330 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 331 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 332 const char *codepage, const int32_t *expectOffsets , UBool useFallback) 333 { 334 UErrorCode status = U_ZERO_ERROR; 335 UConverter *conv = 0; 336 char junkout[NEW_MAX_BUFFER]; /* FIX */ 337 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 338 char *p; 339 const UChar *src; 340 char *end; 341 char *targ; 342 int32_t *offs; 343 int i; 344 int32_t realBufferSize; 345 char *realBufferEnd; 346 const UChar *realSourceEnd; 347 const UChar *sourceLimit; 348 UBool checkOffsets = TRUE; 349 UBool doFlush; 350 351 for(i=0;i<NEW_MAX_BUFFER;i++) 352 junkout[i] = (char)0xF0; 353 for(i=0;i<NEW_MAX_BUFFER;i++) 354 junokout[i] = 0xFF; 355 356 setNuConvTestName(codepage, "FROM"); 357 358 log_verbose("\n========= %s\n", gNuConvTestName); 359 360 conv = my_ucnv_open(codepage, &status); 361 362 if(U_FAILURE(status)) 363 { 364 log_data_err("Couldn't open converter %s\n",codepage); 365 return TC_FAIL; 366 } 367 if(useFallback){ 368 ucnv_setFallback(conv,useFallback); 369 } 370 371 log_verbose("Converter opened..\n"); 372 373 src = source; 374 targ = junkout; 375 offs = junokout; 376 377 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 378 realBufferEnd = junkout + realBufferSize; 379 realSourceEnd = source + sourceLen; 380 381 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 382 checkOffsets = FALSE; 383 384 do 385 { 386 end = nct_min(targ + gOutBufferSize, realBufferEnd); 387 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 388 389 doFlush = (UBool)(sourceLimit == realSourceEnd); 390 391 if(targ == realBufferEnd) { 392 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 393 return TC_FAIL; 394 } 395 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 396 397 398 status = U_ZERO_ERROR; 399 400 ucnv_fromUnicode (conv, 401 &targ, 402 end, 403 &src, 404 sourceLimit, 405 checkOffsets ? offs : NULL, 406 doFlush, /* flush if we're at the end of the input data */ 407 &status); 408 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 409 410 if(U_FAILURE(status)) { 411 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 412 return TC_FAIL; 413 } 414 415 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 416 sourceLen, targ-junkout); 417 418 if(VERBOSITY) 419 { 420 char junk[9999]; 421 char offset_str[9999]; 422 char *ptr; 423 424 junk[0] = 0; 425 offset_str[0] = 0; 426 for(ptr = junkout;ptr<targ;ptr++) { 427 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); 428 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout])); 429 } 430 431 log_verbose(junk); 432 printSeq((const uint8_t *)expect, expectLen); 433 if ( checkOffsets ) { 434 log_verbose("\nOffsets:"); 435 log_verbose(offset_str); 436 } 437 log_verbose("\n"); 438 } 439 ucnv_close(conv); 440 441 if(expectLen != targ-junkout) { 442 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 443 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 444 printf("\nGot:"); 445 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 446 printf("\nExpected:"); 447 printSeqErr((const unsigned char*)expect, expectLen); 448 return TC_MISMATCH; 449 } 450 451 if (checkOffsets && (expectOffsets != 0) ) { 452 log_verbose("comparing %d offsets..\n", targ-junkout); 453 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 454 log_err("did not get the expected offsets. %s\n", gNuConvTestName); 455 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 456 log_err("\n"); 457 log_err("Got : "); 458 for(p=junkout;p<targ;p++) { 459 log_err("%d,", junokout[p-junkout]); 460 } 461 log_err("\n"); 462 log_err("Expected: "); 463 for(i=0; i<(targ-junkout); i++) { 464 log_err("%d,", expectOffsets[i]); 465 } 466 log_err("\n"); 467 } 468 } 469 470 log_verbose("comparing..\n"); 471 if(!memcmp(junkout, expect, expectLen)) { 472 log_verbose("Matches!\n"); 473 return TC_OK; 474 } else { 475 log_err("String does not match u->%s\n", gNuConvTestName); 476 printUSeqErr(source, sourceLen); 477 printf("\nGot:"); 478 printSeqErr((const unsigned char *)junkout, expectLen); 479 printf("\nExpected:"); 480 printSeqErr((const unsigned char *)expect, expectLen); 481 482 return TC_MISMATCH; 483 } 484 } 485 486 /* Note: This function uses global variables and it will not do offset 487 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 488 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 489 const char *codepage, const int32_t *expectOffsets, UBool useFallback) 490 { 491 UErrorCode status = U_ZERO_ERROR; 492 UConverter *conv = 0; 493 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 494 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 495 const char *src; 496 const char *realSourceEnd; 497 const char *srcLimit; 498 UChar *p; 499 UChar *targ; 500 UChar *end; 501 int32_t *offs; 502 int i; 503 UBool checkOffsets = TRUE; 504 505 int32_t realBufferSize; 506 UChar *realBufferEnd; 507 508 509 for(i=0;i<NEW_MAX_BUFFER;i++) 510 junkout[i] = 0xFFFE; 511 512 for(i=0;i<NEW_MAX_BUFFER;i++) 513 junokout[i] = -1; 514 515 setNuConvTestName(codepage, "TO"); 516 517 log_verbose("\n========= %s\n", gNuConvTestName); 518 519 conv = my_ucnv_open(codepage, &status); 520 521 if(U_FAILURE(status)) 522 { 523 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 524 return TC_FAIL; 525 } 526 if(useFallback){ 527 ucnv_setFallback(conv,useFallback); 528 } 529 log_verbose("Converter opened..\n"); 530 531 src = (const char *)source; 532 targ = junkout; 533 offs = junokout; 534 535 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 536 realBufferEnd = junkout + realBufferSize; 537 realSourceEnd = src + sourcelen; 538 539 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 540 checkOffsets = FALSE; 541 542 do 543 { 544 end = nct_min( targ + gOutBufferSize, realBufferEnd); 545 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 546 547 if(targ == realBufferEnd) 548 { 549 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); 550 return TC_FAIL; 551 } 552 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 553 554 /* oldTarg = targ; */ 555 556 status = U_ZERO_ERROR; 557 558 ucnv_toUnicode (conv, 559 &targ, 560 end, 561 &src, 562 srcLimit, 563 checkOffsets ? offs : NULL, 564 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ 565 &status); 566 567 /* offs += (targ-oldTarg); */ 568 569 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 570 571 if(U_FAILURE(status)) 572 { 573 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 574 return TC_FAIL; 575 } 576 577 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 578 sourcelen, targ-junkout); 579 if(VERBOSITY) 580 { 581 char junk[9999]; 582 char offset_str[9999]; 583 UChar *ptr; 584 585 junk[0] = 0; 586 offset_str[0] = 0; 587 588 for(ptr = junkout;ptr<targ;ptr++) 589 { 590 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 591 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 592 } 593 594 log_verbose(junk); 595 printUSeq(expect, expectlen); 596 if ( checkOffsets ) 597 { 598 log_verbose("\nOffsets:"); 599 log_verbose(offset_str); 600 } 601 log_verbose("\n"); 602 } 603 ucnv_close(conv); 604 605 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 606 607 if (checkOffsets && (expectOffsets != 0)) 608 { 609 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 610 log_err("did not get the expected offsets. %s\n",gNuConvTestName); 611 log_err("Got: "); 612 for(p=junkout;p<targ;p++) { 613 log_err("%d,", junokout[p-junkout]); 614 } 615 log_err("\n"); 616 log_err("Expected: "); 617 for(i=0; i<(targ-junkout); i++) { 618 log_err("%d,", expectOffsets[i]); 619 } 620 log_err("\n"); 621 log_err("output: "); 622 for(i=0; i<(targ-junkout); i++) { 623 log_err("%X,", junkout[i]); 624 } 625 log_err("\n"); 626 log_err("input: "); 627 for(i=0; i<(src-(const char *)source); i++) { 628 log_err("%X,", (unsigned char)source[i]); 629 } 630 log_err("\n"); 631 } 632 } 633 634 if(!memcmp(junkout, expect, expectlen*2)) 635 { 636 log_verbose("Matches!\n"); 637 return TC_OK; 638 } 639 else 640 { 641 log_err("String does not match. %s\n", gNuConvTestName); 642 log_verbose("String does not match. %s\n", gNuConvTestName); 643 printf("\nGot:"); 644 printUSeqErr(junkout, expectlen); 645 printf("\nExpected:"); 646 printUSeqErr(expect, expectlen); 647 return TC_MISMATCH; 648 } 649 } 650 651 652 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 653 { 654 /** test chars #1 */ 655 /* 1 2 3 1Han 2Han 3Han . */ 656 static const UChar sampleText[] = 657 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 }; 658 static const UChar sampleTextRoundTripUnmappable[] = 659 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; 660 661 662 static const uint8_t expectedUTF8[] = 663 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; 664 static const int32_t toUTF8Offs[] = 665 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; 666 static const int32_t fmUTF8Offs[] = 667 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e }; 668 669 #ifdef U_ENABLE_GENERIC_ISO_2022 670 /* Same as UTF8, but with ^[%B preceeding */ 671 static const const uint8_t expectedISO2022[] = 672 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 673 static const int32_t toISO2022Offs[] = 674 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 675 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ 676 static const int32_t fmISO2022Offs[] = 677 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */ 678 #endif 679 680 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ 681 static const uint8_t expectedIBM930[] = 682 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f }; 683 static const int32_t toIBM930Offs[] = 684 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 }; 685 static const int32_t fmIBM930Offs[] = 686 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; 687 688 /* 1 2 3 0 h1 h2 h3 . MBCS*/ 689 static const uint8_t expectedIBM943[] = 690 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc }; 691 static const int32_t toIBM943Offs [] = 692 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 }; 693 static const int32_t fmIBM943Offs[] = 694 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; 695 696 /* 1 2 3 0 h1 h2 h3 . DBCS*/ 697 static const uint8_t expectedIBM9027[] = 698 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; 699 static const int32_t toIBM9027Offs [] = 700 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; 701 702 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 703 static const uint8_t expectedIBM920[] = 704 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; 705 static const int32_t toIBM920Offs [] = 706 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 707 708 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 709 static const uint8_t expectedISO88593[] = 710 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 711 static const int32_t toISO88593Offs[] = 712 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 713 714 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ 715 static const uint8_t expectedLATIN1[] = 716 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 717 static const int32_t toLATIN1Offs[] = 718 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 719 720 721 /* etc */ 722 static const uint8_t expectedUTF16BE[] = 723 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; 724 static const int32_t toUTF16BEOffs[]= 725 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 726 static const int32_t fmUTF16BEOffs[] = 727 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 728 729 static const uint8_t expectedUTF16LE[] = 730 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; 731 static const int32_t toUTF16LEOffs[]= 732 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 733 static const int32_t fmUTF16LEOffs[] = 734 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 735 736 static const uint8_t expectedUTF32BE[] = 737 { 0x00, 0x00, 0x00, 0x31, 738 0x00, 0x00, 0x00, 0x32, 739 0x00, 0x00, 0x00, 0x33, 740 0x00, 0x00, 0x00, 0x00, 741 0x00, 0x00, 0x4e, 0x00, 742 0x00, 0x00, 0x4e, 0x8c, 743 0x00, 0x00, 0x4e, 0x09, 744 0x00, 0x00, 0x00, 0x2e, 745 0x00, 0x02, 0x00, 0x21 }; 746 static const int32_t toUTF32BEOffs[]= 747 { 0x00, 0x00, 0x00, 0x00, 748 0x01, 0x01, 0x01, 0x01, 749 0x02, 0x02, 0x02, 0x02, 750 0x03, 0x03, 0x03, 0x03, 751 0x04, 0x04, 0x04, 0x04, 752 0x05, 0x05, 0x05, 0x05, 753 0x06, 0x06, 0x06, 0x06, 754 0x07, 0x07, 0x07, 0x07, 755 0x08, 0x08, 0x08, 0x08, 756 0x08, 0x08, 0x08, 0x08 }; 757 static const int32_t fmUTF32BEOffs[] = 758 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 759 760 static const uint8_t expectedUTF32LE[] = 761 { 0x31, 0x00, 0x00, 0x00, 762 0x32, 0x00, 0x00, 0x00, 763 0x33, 0x00, 0x00, 0x00, 764 0x00, 0x00, 0x00, 0x00, 765 0x00, 0x4e, 0x00, 0x00, 766 0x8c, 0x4e, 0x00, 0x00, 767 0x09, 0x4e, 0x00, 0x00, 768 0x2e, 0x00, 0x00, 0x00, 769 0x21, 0x00, 0x02, 0x00 }; 770 static const int32_t toUTF32LEOffs[]= 771 { 0x00, 0x00, 0x00, 0x00, 772 0x01, 0x01, 0x01, 0x01, 773 0x02, 0x02, 0x02, 0x02, 774 0x03, 0x03, 0x03, 0x03, 775 0x04, 0x04, 0x04, 0x04, 776 0x05, 0x05, 0x05, 0x05, 777 0x06, 0x06, 0x06, 0x06, 778 0x07, 0x07, 0x07, 0x07, 779 0x08, 0x08, 0x08, 0x08, 780 0x08, 0x08, 0x08, 0x08 }; 781 static const int32_t fmUTF32LEOffs[] = 782 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 783 784 785 786 787 /** Test chars #2 **/ 788 789 /* Sahha [health], slashed h's */ 790 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; 791 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; 792 793 /* LMBCS */ 794 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; 795 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; 796 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; 797 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; 798 /*********************************** START OF CODE finally *************/ 799 800 gInBufferSize = insize; 801 gOutBufferSize = outsize; 802 803 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); 804 805 806 /*UTF-8*/ 807 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 808 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); 809 810 log_verbose("Test surrogate behaviour for UTF8\n"); 811 { 812 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; 813 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, 814 0xf0, 0x90, 0x90, 0x81, 815 0xef, 0xbf, 0xbd 816 }; 817 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; 818 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), 819 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); 820 821 822 } 823 824 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 825 /*ISO-2022*/ 826 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 827 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); 828 #endif 829 830 /*UTF16 LE*/ 831 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 832 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); 833 /*UTF16 BE*/ 834 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 835 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); 836 /*UTF32 LE*/ 837 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 838 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); 839 /*UTF32 BE*/ 840 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 841 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); 842 843 /*LATIN_1*/ 844 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 845 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); 846 847 #if !UCONFIG_NO_LEGACY_CONVERSION 848 /*EBCDIC_STATEFUL*/ 849 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 850 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); 851 852 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 853 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 854 855 /*MBCS*/ 856 857 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 858 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); 859 /*DBCS*/ 860 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 861 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); 862 /*SBCS*/ 863 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 864 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); 865 /*SBCS*/ 866 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 867 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 868 #endif 869 870 871 /****/ 872 873 /*UTF-8*/ 874 testConvertToU(expectedUTF8, sizeof(expectedUTF8), 875 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE); 876 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 877 /*ISO-2022*/ 878 testConvertToU(expectedISO2022, sizeof(expectedISO2022), 879 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE); 880 #endif 881 882 /*UTF16 LE*/ 883 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 884 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 885 /*UTF16 BE*/ 886 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), 887 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE); 888 /*UTF32 LE*/ 889 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), 890 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE); 891 /*UTF32 BE*/ 892 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), 893 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE); 894 895 #if !UCONFIG_NO_LEGACY_CONVERSION 896 /*EBCDIC_STATEFUL*/ 897 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable, 898 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE); 899 /*MBCS*/ 900 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable, 901 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE); 902 #endif 903 904 /* Try it again to make sure it still works */ 905 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 906 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 907 908 #if !UCONFIG_NO_LEGACY_CONVERSION 909 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), 910 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE); 911 912 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), 913 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); 914 915 /*LMBCS*/ 916 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), 917 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); 918 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), 919 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE); 920 #endif 921 922 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ 923 { 924 /* encode directly set D and set O */ 925 static const uint8_t utf7[] = { 926 /* 927 Hi Mom -+Jjo--! 928 A+ImIDkQ. 929 +- 930 +ZeVnLIqe 931 */ 932 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 933 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 934 0x2b, 0x2d, 935 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 936 }; 937 static const UChar unicode[] = { 938 /* 939 Hi Mom -<WHITE SMILING FACE>-! 940 A<NOT IDENTICAL TO><ALPHA>. 941 + 942 [Japanese word "nihongo"] 943 */ 944 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 945 0x41, 0x2262, 0x0391, 0x2e, 946 0x2b, 947 0x65e5, 0x672c, 0x8a9e 948 }; 949 static const int32_t toUnicodeOffsets[] = { 950 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 951 15, 17, 19, 23, 952 24, 953 27, 29, 32 954 }; 955 static const int32_t fromUnicodeOffsets[] = { 956 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 957 11, 12, 12, 12, 13, 13, 13, 13, 14, 958 15, 15, 959 16, 16, 16, 17, 17, 17, 18, 18, 18 960 }; 961 962 /* same but escaping set O (the exclamation mark) */ 963 static const uint8_t utf7Restricted[] = { 964 /* 965 Hi Mom -+Jjo--+ACE- 966 A+ImIDkQ. 967 +- 968 +ZeVnLIqe 969 */ 970 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, 971 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 972 0x2b, 0x2d, 973 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65 974 }; 975 static const int32_t toUnicodeOffsetsR[] = { 976 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, 977 19, 21, 23, 27, 978 28, 979 31, 33, 36 980 }; 981 static const int32_t fromUnicodeOffsetsR[] = { 982 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, 983 11, 12, 12, 12, 13, 13, 13, 13, 14, 984 15, 15, 985 16, 16, 16, 17, 17, 17, 18, 18, 18 986 }; 987 988 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); 989 990 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE); 991 992 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); 993 994 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); 995 } 996 997 /* 998 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, 999 * modified according to RFC 2060, 1000 * and supplemented with the one example in RFC 2060 itself. 1001 */ 1002 { 1003 static const uint8_t imap[] = { 1004 /* Hi Mom -&Jjo--! 1005 A&ImIDkQ-. 1006 &- 1007 &ZeVnLIqe- 1008 \ 1009 ~peter 1010 /mail 1011 /&ZeVnLIqe- 1012 /&U,BTFw- 1013 */ 1014 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 1015 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, 1016 0x26, 0x2d, 1017 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1018 0x5c, 1019 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1020 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1021 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1022 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d 1023 }; 1024 static const UChar unicode[] = { 1025 /* Hi Mom -<WHITE SMILING FACE>-! 1026 A<NOT IDENTICAL TO><ALPHA>. 1027 & 1028 [Japanese word "nihongo"] 1029 \ 1030 ~peter 1031 /mail 1032 /<65e5, 672c, 8a9e> 1033 /<53f0, 5317> 1034 */ 1035 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 1036 0x41, 0x2262, 0x0391, 0x2e, 1037 0x26, 1038 0x65e5, 0x672c, 0x8a9e, 1039 0x5c, 1040 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1041 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1042 0x2f, 0x65e5, 0x672c, 0x8a9e, 1043 0x2f, 0x53f0, 0x5317 1044 }; 1045 static const int32_t toUnicodeOffsets[] = { 1046 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 1047 15, 17, 19, 24, 1048 25, 1049 28, 30, 33, 1050 37, 1051 38, 39, 40, 41, 42, 43, 1052 44, 45, 46, 47, 48, 1053 49, 51, 53, 56, 1054 60, 62, 64 1055 }; 1056 static const int32_t fromUnicodeOffsets[] = { 1057 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 1058 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, 1059 15, 15, 1060 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 1061 19, 1062 20, 21, 22, 23, 24, 25, 1063 26, 27, 28, 29, 30, 1064 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 1065 35, 36, 36, 36, 37, 37, 37, 37, 37 1066 }; 1067 1068 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); 1069 1070 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); 1071 } 1072 1073 /* Test UTF-8 bad data handling*/ 1074 { 1075 static const uint8_t utf8[]={ 1076 0x61, 1077 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1078 0x00, 1079 0x62, 1080 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1081 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1082 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ 1083 0xdf, 0xbf, /* 7ff */ 1084 0xbf, /* truncated tail */ 1085 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ 1086 0x02 1087 }; 1088 1089 static const uint16_t utf8Expected[]={ 1090 0x0061, 1091 0xfffd, 1092 0x0000, 1093 0x0062, 1094 0xfffd, 1095 0xfffd, 1096 0xdbff, 0xdfff, 1097 0x07ff, 1098 0xfffd, 1099 0xfffd, 1100 0x0002 1101 }; 1102 1103 static const int32_t utf8Offsets[]={ 1104 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 1105 }; 1106 testConvertToU(utf8, sizeof(utf8), 1107 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE); 1108 1109 } 1110 1111 /* Test UTF-32BE bad data handling*/ 1112 { 1113 static const uint8_t utf32[]={ 1114 0x00, 0x00, 0x00, 0x61, 1115 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 1116 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1117 0x00, 0x00, 0x00, 0x62, 1118 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1119 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 1120 0x00, 0x00, 0x01, 0x62, 1121 0x00, 0x00, 0x02, 0x62 1122 }; 1123 static const uint16_t utf32Expected[]={ 1124 0x0061, 1125 0xfffd, /* 0x110000 out of range */ 1126 0xDBFF, /* 0x10FFFF in range */ 1127 0xDFFF, 1128 0x0062, 1129 0xfffd, /* 0xffffffff out of range */ 1130 0xfffd, /* 0x7fffffff out of range */ 1131 0x0162, 1132 0x0262 1133 }; 1134 static const int32_t utf32Offsets[]={ 1135 0, 4, 8, 8, 12, 16, 20, 24, 28 1136 }; 1137 static const uint8_t utf32ExpectedBack[]={ 1138 0x00, 0x00, 0x00, 0x61, 1139 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ 1140 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1141 0x00, 0x00, 0x00, 0x62, 1142 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ 1143 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ 1144 0x00, 0x00, 0x01, 0x62, 1145 0x00, 0x00, 0x02, 0x62 1146 }; 1147 static const int32_t utf32OffsetsBack[]={ 1148 0,0,0,0, 1149 1,1,1,1, 1150 2,2,2,2, 1151 4,4,4,4, 1152 5,5,5,5, 1153 6,6,6,6, 1154 7,7,7,7, 1155 8,8,8,8 1156 }; 1157 1158 testConvertToU(utf32, sizeof(utf32), 1159 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE); 1160 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1161 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE); 1162 } 1163 1164 /* Test UTF-32LE bad data handling*/ 1165 { 1166 static const uint8_t utf32[]={ 1167 0x61, 0x00, 0x00, 0x00, 1168 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 1169 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1170 0x62, 0x00, 0x00, 0x00, 1171 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1172 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 1173 0x62, 0x01, 0x00, 0x00, 1174 0x62, 0x02, 0x00, 0x00, 1175 }; 1176 1177 static const uint16_t utf32Expected[]={ 1178 0x0061, 1179 0xfffd, /* 0x110000 out of range */ 1180 0xDBFF, /* 0x10FFFF in range */ 1181 0xDFFF, 1182 0x0062, 1183 0xfffd, /* 0xffffffff out of range */ 1184 0xfffd, /* 0x7fffffff out of range */ 1185 0x0162, 1186 0x0262 1187 }; 1188 static const int32_t utf32Offsets[]={ 1189 0, 4, 8, 8, 12, 16, 20, 24, 28 1190 }; 1191 static const uint8_t utf32ExpectedBack[]={ 1192 0x61, 0x00, 0x00, 0x00, 1193 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ 1194 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1195 0x62, 0x00, 0x00, 0x00, 1196 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ 1197 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ 1198 0x62, 0x01, 0x00, 0x00, 1199 0x62, 0x02, 0x00, 0x00 1200 }; 1201 static const int32_t utf32OffsetsBack[]={ 1202 0,0,0,0, 1203 1,1,1,1, 1204 2,2,2,2, 1205 4,4,4,4, 1206 5,5,5,5, 1207 6,6,6,6, 1208 7,7,7,7, 1209 8,8,8,8 1210 }; 1211 testConvertToU(utf32, sizeof(utf32), 1212 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE ); 1213 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1214 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE); 1215 } 1216 } 1217 1218 static void TestCoverageMBCS(){ 1219 #if 0 1220 UErrorCode status = U_ZERO_ERROR; 1221 const char *directory = loadTestData(&status); 1222 char* tdpath = NULL; 1223 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); 1224 int len = strlen(directory); 1225 char* index=NULL; 1226 1227 tdpath = (char*) malloc(sizeof(char) * (len * 2)); 1228 uprv_strcpy(saveDirectory,u_getDataDirectory()); 1229 log_verbose("Retrieved data directory %s \n",saveDirectory); 1230 uprv_strcpy(tdpath,directory); 1231 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); 1232 1233 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ 1234 *(index+1)=0; 1235 } 1236 u_setDataDirectory(tdpath); 1237 log_verbose("ICU data directory is set to: %s \n" ,tdpath); 1238 #endif 1239 1240 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm 1241 which is test file for MBCS conversion with single-byte codepage data.*/ 1242 { 1243 1244 /* MBCS with single byte codepage data test1.ucm*/ 1245 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; 1246 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; 1247 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; 1248 1249 /*from Unicode*/ 1250 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1251 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); 1252 } 1253 1254 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm 1255 which is test file for MBCS conversion with three-byte codepage data.*/ 1256 { 1257 1258 /* MBCS with three byte codepage data test3.ucm*/ 1259 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1260 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; 1261 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; 1262 1263 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; 1264 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1265 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; 1266 1267 /*from Unicode*/ 1268 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1269 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); 1270 1271 /*to Unicode*/ 1272 testConvertToU(test3input, sizeof(test3input), 1273 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE); 1274 1275 } 1276 1277 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm 1278 which is test file for MBCS conversion with four-byte codepage data.*/ 1279 { 1280 1281 /* MBCS with three byte codepage data test4.ucm*/ 1282 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1283 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; 1284 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; 1285 1286 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; 1287 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1288 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; 1289 1290 /*from Unicode*/ 1291 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1292 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); 1293 1294 /*to Unicode*/ 1295 testConvertToU(test4input, sizeof(test4input), 1296 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE ); 1297 1298 } 1299 #if 0 1300 free(tdpath); 1301 /* restore the original data directory */ 1302 log_verbose("Setting the data directory to %s \n", saveDirectory); 1303 u_setDataDirectory(saveDirectory); 1304 free(saveDirectory); 1305 #endif 1306 1307 } 1308 1309 static void TestConverterType(const char *convName, UConverterType convType) { 1310 UConverter* myConverter; 1311 UErrorCode err = U_ZERO_ERROR; 1312 1313 myConverter = my_ucnv_open(convName, &err); 1314 1315 if (U_FAILURE(err)) { 1316 log_data_err("Failed to create an %s converter\n", convName); 1317 return; 1318 } 1319 else 1320 { 1321 if (ucnv_getType(myConverter)!=convType) { 1322 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", 1323 convName, convType); 1324 } 1325 else { 1326 log_verbose("ucnv_getType %s ok\n", convName); 1327 } 1328 } 1329 ucnv_close(myConverter); 1330 } 1331 1332 static void TestConverterTypesAndStarters() 1333 { 1334 #if !UCONFIG_NO_LEGACY_CONVERSION 1335 UConverter* myConverter; 1336 UErrorCode err = U_ZERO_ERROR; 1337 UBool mystarters[256]; 1338 1339 /* const UBool expectedKSCstarters[256] = { 1340 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1341 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1342 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1343 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1344 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1345 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1346 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1347 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1348 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1349 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1350 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1351 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1352 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1353 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1354 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1355 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1356 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1357 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, 1358 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1359 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1360 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1361 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1362 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1363 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1364 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1365 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ 1366 1367 1368 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); 1369 1370 myConverter = ucnv_open("ksc", &err); 1371 if (U_FAILURE(err)) { 1372 log_data_err("Failed to create an ibm-ksc converter\n"); 1373 return; 1374 } 1375 else 1376 { 1377 if (ucnv_getType(myConverter)!=UCNV_MBCS) 1378 log_err("ucnv_getType Failed for ibm-949\n"); 1379 else 1380 log_verbose("ucnv_getType ibm-949 ok\n"); 1381 1382 if(myConverter!=NULL) 1383 ucnv_getStarters(myConverter, mystarters, &err); 1384 1385 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) 1386 log_err("Failed ucnv_getStarters for ksc\n"); 1387 else 1388 log_verbose("ucnv_getStarters ok\n");*/ 1389 1390 } 1391 ucnv_close(myConverter); 1392 1393 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); 1394 TestConverterType("ibm-878", UCNV_SBCS); 1395 #endif 1396 1397 TestConverterType("iso-8859-1", UCNV_LATIN_1); 1398 1399 TestConverterType("ibm-1208", UCNV_UTF8); 1400 1401 TestConverterType("utf-8", UCNV_UTF8); 1402 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); 1403 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); 1404 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); 1405 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); 1406 1407 #if !UCONFIG_NO_LEGACY_CONVERSION 1408 1409 #if defined(U_ENABLE_GENERIC_ISO_2022) 1410 TestConverterType("iso-2022", UCNV_ISO_2022); 1411 #endif 1412 1413 TestConverterType("hz", UCNV_HZ); 1414 #endif 1415 1416 TestConverterType("scsu", UCNV_SCSU); 1417 1418 #if !UCONFIG_NO_LEGACY_CONVERSION 1419 TestConverterType("x-iscii-de", UCNV_ISCII); 1420 #endif 1421 1422 TestConverterType("ascii", UCNV_US_ASCII); 1423 TestConverterType("utf-7", UCNV_UTF7); 1424 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); 1425 TestConverterType("bocu-1", UCNV_BOCU1); 1426 } 1427 1428 static void 1429 TestAmbiguousConverter(UConverter *cnv) { 1430 static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; 1431 UChar outUnicode[20]={ 0, 0, 0, 0 }; 1432 1433 const char *s; 1434 UChar *u; 1435 UErrorCode errorCode; 1436 UBool isAmbiguous; 1437 1438 /* try to convert an 'a', a square bracket and a US-ASCII backslash */ 1439 errorCode=U_ZERO_ERROR; 1440 s=inBytes; 1441 u=outUnicode; 1442 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); 1443 if(U_FAILURE(errorCode)) { 1444 /* we do not care about general failures in this test; the input may just not be mappable */ 1445 return; 1446 } 1447 1448 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { 1449 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ 1450 /* There are some encodings that are partially ASCII based, 1451 like the ISO-7 and GSM series of codepages, which we ignore. */ 1452 return; 1453 } 1454 1455 isAmbiguous=ucnv_isAmbiguous(cnv); 1456 1457 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ 1458 if((outUnicode[2]!=0x5c)!=isAmbiguous) { 1459 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", 1460 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); 1461 return; 1462 } 1463 1464 if(outUnicode[2]!=0x5c) { 1465 /* needs fixup, fix it */ 1466 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); 1467 if(outUnicode[2]!=0x5c) { 1468 /* the fix failed */ 1469 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); 1470 return; 1471 } 1472 } 1473 } 1474 1475 static void TestAmbiguous() 1476 { 1477 UErrorCode status = U_ZERO_ERROR; 1478 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; 1479 static const char target[] = { 1480 /* "\\usr\\local\\share\\data\\icutest.txt" */ 1481 0x5c, 0x75, 0x73, 0x72, 1482 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 1483 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, 1484 0x5c, 0x64, 0x61, 0x74, 0x61, 1485 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, 1486 0 1487 }; 1488 UChar asciiResult[200], sjisResult[200]; 1489 int32_t asciiLength = 0, sjisLength = 0, i; 1490 const char *name; 1491 1492 /* enumerate all converters */ 1493 status=U_ZERO_ERROR; 1494 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { 1495 cnv=ucnv_open(name, &status); 1496 if(U_SUCCESS(status)) { 1497 TestAmbiguousConverter(cnv); 1498 ucnv_close(cnv); 1499 } else { 1500 log_err("error: unable to open available converter \"%s\"\n", name); 1501 status=U_ZERO_ERROR; 1502 } 1503 } 1504 1505 #if !UCONFIG_NO_LEGACY_CONVERSION 1506 sjis_cnv = ucnv_open("ibm-943", &status); 1507 if (U_FAILURE(status)) 1508 { 1509 log_data_err("Failed to create a SJIS converter\n"); 1510 return; 1511 } 1512 ascii_cnv = ucnv_open("LATIN-1", &status); 1513 if (U_FAILURE(status)) 1514 { 1515 log_data_err("Failed to create a LATIN-1 converter\n"); 1516 ucnv_close(sjis_cnv); 1517 return; 1518 } 1519 /* convert target from SJIS to Unicode */ 1520 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1521 if (U_FAILURE(status)) 1522 { 1523 log_err("Failed to convert the SJIS string.\n"); 1524 ucnv_close(sjis_cnv); 1525 ucnv_close(ascii_cnv); 1526 return; 1527 } 1528 /* convert target from Latin-1 to Unicode */ 1529 asciiLength = ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1530 if (U_FAILURE(status)) 1531 { 1532 log_err("Failed to convert the Latin-1 string.\n"); 1533 ucnv_close(sjis_cnv); 1534 ucnv_close(ascii_cnv); 1535 return; 1536 } 1537 if (!ucnv_isAmbiguous(sjis_cnv)) 1538 { 1539 log_err("SJIS converter should contain ambiguous character mappings.\n"); 1540 ucnv_close(sjis_cnv); 1541 ucnv_close(ascii_cnv); 1542 return; 1543 } 1544 if (u_strcmp(sjisResult, asciiResult) == 0) 1545 { 1546 log_err("File separators for SJIS don't need to be fixed.\n"); 1547 } 1548 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); 1549 if (u_strcmp(sjisResult, asciiResult) != 0) 1550 { 1551 log_err("Fixing file separator for SJIS failed.\n"); 1552 } 1553 ucnv_close(sjis_cnv); 1554 ucnv_close(ascii_cnv); 1555 #endif 1556 } 1557 1558 static void 1559 TestSignatureDetection(){ 1560 /* with null terminated strings */ 1561 { 1562 static const char* data[] = { 1563 "\xFE\xFF\x00\x00", /* UTF-16BE */ 1564 "\xFF\xFE\x00\x00", /* UTF-16LE */ 1565 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1566 "\x0E\xFE\xFF\x00", /* SCSU */ 1567 1568 "\xFE\xFF", /* UTF-16BE */ 1569 "\xFF\xFE", /* UTF-16LE */ 1570 "\xEF\xBB\xBF", /* UTF-8 */ 1571 "\x0E\xFE\xFF", /* SCSU */ 1572 1573 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1574 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1575 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1576 "\x0E\xFE\xFF\x41", /* SCSU */ 1577 1578 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ 1579 "\x2B\x2F\x76\x38\x41", /* UTF-7 */ 1580 "\x2B\x2F\x76\x39\x41", /* UTF-7 */ 1581 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ 1582 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ 1583 1584 "\xDD\x73\x66\x73" /* UTF-EBCDIC */ 1585 }; 1586 static const char* expected[] = { 1587 "UTF-16BE", 1588 "UTF-16LE", 1589 "UTF-8", 1590 "SCSU", 1591 1592 "UTF-16BE", 1593 "UTF-16LE", 1594 "UTF-8", 1595 "SCSU", 1596 1597 "UTF-16BE", 1598 "UTF-16LE", 1599 "UTF-8", 1600 "SCSU", 1601 1602 "UTF-7", 1603 "UTF-7", 1604 "UTF-7", 1605 "UTF-7", 1606 "UTF-7", 1607 "UTF-EBCDIC" 1608 }; 1609 static const int32_t expectedLength[] ={ 1610 2, 1611 2, 1612 3, 1613 3, 1614 1615 2, 1616 2, 1617 3, 1618 3, 1619 1620 2, 1621 2, 1622 3, 1623 3, 1624 1625 5, 1626 4, 1627 4, 1628 4, 1629 4, 1630 4 1631 }; 1632 int i=0; 1633 UErrorCode err; 1634 int32_t signatureLength = -1; 1635 const char* source = NULL; 1636 const char* enc = NULL; 1637 for( ; i<sizeof(data)/sizeof(char*); i++){ 1638 err = U_ZERO_ERROR; 1639 source = data[i]; 1640 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err); 1641 if(U_FAILURE(err)){ 1642 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1643 continue; 1644 } 1645 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1646 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1647 continue; 1648 } 1649 if(signatureLength != expectedLength[i]){ 1650 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1651 } 1652 } 1653 } 1654 { 1655 static const char* data[] = { 1656 "\xFE\xFF\x00", /* UTF-16BE */ 1657 "\xFF\xFE\x00", /* UTF-16LE */ 1658 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1659 "\x0E\xFE\xFF\x00", /* SCSU */ 1660 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1661 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1662 "\xFE\xFF", /* UTF-16BE */ 1663 "\xFF\xFE", /* UTF-16LE */ 1664 "\xEF\xBB\xBF", /* UTF-8 */ 1665 "\x0E\xFE\xFF", /* SCSU */ 1666 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1667 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1668 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1669 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1670 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1671 "\x0E\xFE\xFF\x41", /* SCSU */ 1672 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ 1673 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ 1674 "\xFB\xEE\x28", /* BOCU-1 */ 1675 "\xFF\x41\x42" /* NULL */ 1676 }; 1677 static const int len[] = { 1678 3, 1679 3, 1680 4, 1681 4, 1682 4, 1683 4, 1684 2, 1685 2, 1686 3, 1687 3, 1688 4, 1689 4, 1690 4, 1691 4, 1692 4, 1693 4, 1694 5, 1695 5, 1696 3, 1697 3 1698 }; 1699 1700 static const char* expected[] = { 1701 "UTF-16BE", 1702 "UTF-16LE", 1703 "UTF-8", 1704 "SCSU", 1705 "UTF-32BE", 1706 "UTF-32LE", 1707 "UTF-16BE", 1708 "UTF-16LE", 1709 "UTF-8", 1710 "SCSU", 1711 "UTF-32BE", 1712 "UTF-32LE", 1713 "UTF-16BE", 1714 "UTF-16LE", 1715 "UTF-8", 1716 "SCSU", 1717 "UTF-32BE", 1718 "UTF-32LE", 1719 "BOCU-1", 1720 NULL 1721 }; 1722 static const int32_t expectedLength[] ={ 1723 2, 1724 2, 1725 3, 1726 3, 1727 4, 1728 4, 1729 2, 1730 2, 1731 3, 1732 3, 1733 4, 1734 4, 1735 2, 1736 2, 1737 3, 1738 3, 1739 4, 1740 4, 1741 3, 1742 0 1743 }; 1744 int i=0; 1745 UErrorCode err; 1746 int32_t signatureLength = -1; 1747 int32_t sourceLength=-1; 1748 const char* source = NULL; 1749 const char* enc = NULL; 1750 for( ; i<sizeof(data)/sizeof(char*); i++){ 1751 err = U_ZERO_ERROR; 1752 source = data[i]; 1753 sourceLength = len[i]; 1754 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err); 1755 if(U_FAILURE(err)){ 1756 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1757 continue; 1758 } 1759 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1760 if(expected[i] !=NULL){ 1761 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1762 continue; 1763 } 1764 } 1765 if(signatureLength != expectedLength[i]){ 1766 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1767 } 1768 } 1769 } 1770 } 1771 1772 void 1773 static TestUTF7() { 1774 /* test input */ 1775 static const uint8_t in[]={ 1776 /* H - +Jjo- - ! +- +2AHcAQ */ 1777 0x48, 1778 0x2d, 1779 0x2b, 0x4a, 0x6a, 0x6f, 1780 0x2d, 0x2d, 1781 0x21, 1782 0x2b, 0x2d, 1783 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 1784 }; 1785 1786 /* expected test results */ 1787 static const int32_t results[]={ 1788 /* number of bytes read, code point */ 1789 1, 0x48, 1790 1, 0x2d, 1791 4, 0x263a, /* <WHITE SMILING FACE> */ 1792 2, 0x2d, 1793 1, 0x21, 1794 2, 0x2b, 1795 7, 0x10401 1796 }; 1797 1798 const char *cnvName; 1799 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1800 UErrorCode errorCode=U_ZERO_ERROR; 1801 UConverter *cnv=ucnv_open("UTF-7", &errorCode); 1802 if(U_FAILURE(errorCode)) { 1803 log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1804 return; 1805 } 1806 TestNextUChar(cnv, source, limit, results, "UTF-7"); 1807 /* Test the condition when source >= sourceLimit */ 1808 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1809 cnvName = ucnv_getName(cnv, &errorCode); 1810 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { 1811 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1812 } 1813 ucnv_close(cnv); 1814 } 1815 1816 void 1817 static TestIMAP() { 1818 /* test input */ 1819 static const uint8_t in[]={ 1820 /* H - &Jjo- - ! &- &2AHcAQ- \ */ 1821 0x48, 1822 0x2d, 1823 0x26, 0x4a, 0x6a, 0x6f, 1824 0x2d, 0x2d, 1825 0x21, 1826 0x26, 0x2d, 1827 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d 1828 }; 1829 1830 /* expected test results */ 1831 static const int32_t results[]={ 1832 /* number of bytes read, code point */ 1833 1, 0x48, 1834 1, 0x2d, 1835 4, 0x263a, /* <WHITE SMILING FACE> */ 1836 2, 0x2d, 1837 1, 0x21, 1838 2, 0x26, 1839 7, 0x10401 1840 }; 1841 1842 const char *cnvName; 1843 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1844 UErrorCode errorCode=U_ZERO_ERROR; 1845 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); 1846 if(U_FAILURE(errorCode)) { 1847 log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */ 1848 return; 1849 } 1850 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); 1851 /* Test the condition when source >= sourceLimit */ 1852 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1853 cnvName = ucnv_getName(cnv, &errorCode); 1854 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { 1855 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1856 } 1857 ucnv_close(cnv); 1858 } 1859 1860 void 1861 static TestUTF8() { 1862 /* test input */ 1863 static const uint8_t in[]={ 1864 0x61, 1865 0xc2, 0x80, 1866 0xe0, 0xa0, 0x80, 1867 0xf0, 0x90, 0x80, 0x80, 1868 0xf4, 0x84, 0x8c, 0xa1, 1869 0xf0, 0x90, 0x90, 0x81 1870 }; 1871 1872 /* expected test results */ 1873 static const int32_t results[]={ 1874 /* number of bytes read, code point */ 1875 1, 0x61, 1876 2, 0x80, 1877 3, 0x800, 1878 4, 0x10000, 1879 4, 0x104321, 1880 4, 0x10401 1881 }; 1882 1883 /* error test input */ 1884 static const uint8_t in2[]={ 1885 0x61, 1886 0xc0, 0x80, /* illegal non-shortest form */ 1887 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1888 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1889 0xc0, 0xc0, /* illegal trail byte */ 1890 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1891 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1892 0xfe, /* illegal byte altogether */ 1893 0x62 1894 }; 1895 1896 /* expected error test results */ 1897 static const int32_t results2[]={ 1898 /* number of bytes read, code point */ 1899 1, 0x61, 1900 22, 0x62 1901 }; 1902 1903 UConverterToUCallback cb; 1904 const void *p; 1905 1906 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 1907 UErrorCode errorCode=U_ZERO_ERROR; 1908 UConverter *cnv=ucnv_open("UTF-8", &errorCode); 1909 if(U_FAILURE(errorCode)) { 1910 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); 1911 return; 1912 } 1913 TestNextUChar(cnv, source, limit, results, "UTF-8"); 1914 /* Test the condition when source >= sourceLimit */ 1915 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1916 1917 /* test error behavior with a skip callback */ 1918 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 1919 source=(const char *)in2; 1920 limit=(const char *)(in2+sizeof(in2)); 1921 TestNextUChar(cnv, source, limit, results2, "UTF-8"); 1922 1923 ucnv_close(cnv); 1924 } 1925 1926 void 1927 static TestCESU8() { 1928 /* test input */ 1929 static const uint8_t in[]={ 1930 0x61, 1931 0xc2, 0x80, 1932 0xe0, 0xa0, 0x80, 1933 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 1934 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, 1935 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1936 0xef, 0xbf, 0xbc 1937 }; 1938 1939 /* expected test results */ 1940 static const int32_t results[]={ 1941 /* number of bytes read, code point */ 1942 1, 0x61, 1943 2, 0x80, 1944 3, 0x800, 1945 6, 0x10000, 1946 3, 0xdc01, 1947 -1,0xd802, /* may read 3 or 6 bytes */ 1948 -1,0x10ffff,/* may read 0 or 3 bytes */ 1949 3, 0xfffc 1950 }; 1951 1952 /* error test input */ 1953 static const uint8_t in2[]={ 1954 0x61, 1955 0xc0, 0x80, /* illegal non-shortest form */ 1956 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1957 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1958 0xc0, 0xc0, /* illegal trail byte */ 1959 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ 1960 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ 1961 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ 1962 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1963 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1964 0xfe, /* illegal byte altogether */ 1965 0x62 1966 }; 1967 1968 /* expected error test results */ 1969 static const int32_t results2[]={ 1970 /* number of bytes read, code point */ 1971 1, 0x61, 1972 34, 0x62 1973 }; 1974 1975 UConverterToUCallback cb; 1976 const void *p; 1977 1978 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 1979 UErrorCode errorCode=U_ZERO_ERROR; 1980 UConverter *cnv=ucnv_open("CESU-8", &errorCode); 1981 if(U_FAILURE(errorCode)) { 1982 log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); 1983 return; 1984 } 1985 TestNextUChar(cnv, source, limit, results, "CESU-8"); 1986 /* Test the condition when source >= sourceLimit */ 1987 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1988 1989 /* test error behavior with a skip callback */ 1990 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 1991 source=(const char *)in2; 1992 limit=(const char *)(in2+sizeof(in2)); 1993 TestNextUChar(cnv, source, limit, results2, "CESU-8"); 1994 1995 ucnv_close(cnv); 1996 } 1997 1998 void 1999 static TestUTF16() { 2000 /* test input */ 2001 static const uint8_t in1[]={ 2002 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff 2003 }; 2004 static const uint8_t in2[]={ 2005 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff 2006 }; 2007 static const uint8_t in3[]={ 2008 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 2009 }; 2010 2011 /* expected test results */ 2012 static const int32_t results1[]={ 2013 /* number of bytes read, code point */ 2014 4, 0x4e00, 2015 2, 0xfeff 2016 }; 2017 static const int32_t results2[]={ 2018 /* number of bytes read, code point */ 2019 4, 0x004e, 2020 2, 0xfffe 2021 }; 2022 static const int32_t results3[]={ 2023 /* number of bytes read, code point */ 2024 2, 0xfefe, 2025 2, 0x4e00, 2026 2, 0xfeff, 2027 4, 0x20001 2028 }; 2029 2030 const char *source, *limit; 2031 2032 UErrorCode errorCode=U_ZERO_ERROR; 2033 UConverter *cnv=ucnv_open("UTF-16", &errorCode); 2034 if(U_FAILURE(errorCode)) { 2035 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); 2036 return; 2037 } 2038 2039 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2040 TestNextUChar(cnv, source, limit, results1, "UTF-16"); 2041 2042 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2043 ucnv_resetToUnicode(cnv); 2044 TestNextUChar(cnv, source, limit, results2, "UTF-16"); 2045 2046 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2047 ucnv_resetToUnicode(cnv); 2048 TestNextUChar(cnv, source, limit, results3, "UTF-16"); 2049 2050 /* Test the condition when source >= sourceLimit */ 2051 ucnv_resetToUnicode(cnv); 2052 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2053 2054 ucnv_close(cnv); 2055 } 2056 2057 void 2058 static TestUTF16BE() { 2059 /* test input */ 2060 static const uint8_t in[]={ 2061 0x00, 0x61, 2062 0x00, 0xc0, 2063 0x00, 0x31, 2064 0x00, 0xf4, 2065 0xce, 0xfe, 2066 0xd8, 0x01, 0xdc, 0x01 2067 }; 2068 2069 /* expected test results */ 2070 static const int32_t results[]={ 2071 /* number of bytes read, code point */ 2072 2, 0x61, 2073 2, 0xc0, 2074 2, 0x31, 2075 2, 0xf4, 2076 2, 0xcefe, 2077 4, 0x10401 2078 }; 2079 2080 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2081 UErrorCode errorCode=U_ZERO_ERROR; 2082 UConverter *cnv=ucnv_open("utf-16be", &errorCode); 2083 if(U_FAILURE(errorCode)) { 2084 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); 2085 return; 2086 } 2087 TestNextUChar(cnv, source, limit, results, "UTF-16BE"); 2088 /* Test the condition when source >= sourceLimit */ 2089 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2090 /*Test for the condition where there is an invalid character*/ 2091 { 2092 static const uint8_t source2[]={0x61}; 2093 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2094 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2095 } 2096 #if 0 2097 /* 2098 * Test disabled because currently the UTF-16BE/LE converters are supposed 2099 * to not set errors for unpaired surrogates. 2100 * This may change with 2101 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2102 */ 2103 2104 /*Test for the condition where there is a surrogate pair*/ 2105 { 2106 const uint8_t source2[]={0xd8, 0x01}; 2107 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2108 } 2109 #endif 2110 ucnv_close(cnv); 2111 } 2112 2113 static void 2114 TestUTF16LE() { 2115 /* test input */ 2116 static const uint8_t in[]={ 2117 0x61, 0x00, 2118 0x31, 0x00, 2119 0x4e, 0x2e, 2120 0x4e, 0x00, 2121 0x01, 0xd8, 0x01, 0xdc 2122 }; 2123 2124 /* expected test results */ 2125 static const int32_t results[]={ 2126 /* number of bytes read, code point */ 2127 2, 0x61, 2128 2, 0x31, 2129 2, 0x2e4e, 2130 2, 0x4e, 2131 4, 0x10401 2132 }; 2133 2134 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2135 UErrorCode errorCode=U_ZERO_ERROR; 2136 UConverter *cnv=ucnv_open("utf-16le", &errorCode); 2137 if(U_FAILURE(errorCode)) { 2138 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); 2139 return; 2140 } 2141 TestNextUChar(cnv, source, limit, results, "UTF-16LE"); 2142 /* Test the condition when source >= sourceLimit */ 2143 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2144 /*Test for the condition where there is an invalid character*/ 2145 { 2146 static const uint8_t source2[]={0x61}; 2147 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2148 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2149 } 2150 #if 0 2151 /* 2152 * Test disabled because currently the UTF-16BE/LE converters are supposed 2153 * to not set errors for unpaired surrogates. 2154 * This may change with 2155 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2156 */ 2157 2158 /*Test for the condition where there is a surrogate character*/ 2159 { 2160 static const uint8_t source2[]={0x01, 0xd8}; 2161 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2162 } 2163 #endif 2164 2165 ucnv_close(cnv); 2166 } 2167 2168 void 2169 static TestUTF32() { 2170 /* test input */ 2171 static const uint8_t in1[]={ 2172 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff 2173 }; 2174 static const uint8_t in2[]={ 2175 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 2176 }; 2177 static const uint8_t in3[]={ 2178 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 2179 }; 2180 2181 /* expected test results */ 2182 static const int32_t results1[]={ 2183 /* number of bytes read, code point */ 2184 8, 0x100f00, 2185 4, 0xfeff 2186 }; 2187 static const int32_t results2[]={ 2188 /* number of bytes read, code point */ 2189 8, 0x0f1000, 2190 4, 0xfffe 2191 }; 2192 static const int32_t results3[]={ 2193 /* number of bytes read, code point */ 2194 4, 0xfefe, 2195 4, 0x100f00, 2196 4, 0xfffd, /* unmatched surrogate */ 2197 4, 0xfffd /* unmatched surrogate */ 2198 }; 2199 2200 const char *source, *limit; 2201 2202 UErrorCode errorCode=U_ZERO_ERROR; 2203 UConverter *cnv=ucnv_open("UTF-32", &errorCode); 2204 if(U_FAILURE(errorCode)) { 2205 log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); 2206 return; 2207 } 2208 2209 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2210 TestNextUChar(cnv, source, limit, results1, "UTF-32"); 2211 2212 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2213 ucnv_resetToUnicode(cnv); 2214 TestNextUChar(cnv, source, limit, results2, "UTF-32"); 2215 2216 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2217 ucnv_resetToUnicode(cnv); 2218 TestNextUChar(cnv, source, limit, results3, "UTF-32"); 2219 2220 /* Test the condition when source >= sourceLimit */ 2221 ucnv_resetToUnicode(cnv); 2222 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2223 2224 ucnv_close(cnv); 2225 } 2226 2227 static void 2228 TestUTF32BE() { 2229 /* test input */ 2230 static const uint8_t in[]={ 2231 0x00, 0x00, 0x00, 0x61, 2232 0x00, 0x00, 0x30, 0x61, 2233 0x00, 0x00, 0xdc, 0x00, 2234 0x00, 0x00, 0xd8, 0x00, 2235 0x00, 0x00, 0xdf, 0xff, 2236 0x00, 0x00, 0xff, 0xfe, 2237 0x00, 0x10, 0xab, 0xcd, 2238 0x00, 0x10, 0xff, 0xff 2239 }; 2240 2241 /* expected test results */ 2242 static const int32_t results[]={ 2243 /* number of bytes read, code point */ 2244 4, 0x61, 2245 4, 0x3061, 2246 4, 0xfffd, 2247 4, 0xfffd, 2248 4, 0xfffd, 2249 4, 0xfffe, 2250 4, 0x10abcd, 2251 4, 0x10ffff 2252 }; 2253 2254 /* error test input */ 2255 static const uint8_t in2[]={ 2256 0x00, 0x00, 0x00, 0x61, 2257 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 2258 0x00, 0x00, 0x00, 0x62, 2259 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2260 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 2261 0x00, 0x00, 0x01, 0x62, 2262 0x00, 0x00, 0x02, 0x62 2263 }; 2264 2265 /* expected error test results */ 2266 static const int32_t results2[]={ 2267 /* number of bytes read, code point */ 2268 4, 0x61, 2269 8, 0x62, 2270 12, 0x162, 2271 4, 0x262 2272 }; 2273 2274 UConverterToUCallback cb; 2275 const void *p; 2276 2277 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2278 UErrorCode errorCode=U_ZERO_ERROR; 2279 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); 2280 if(U_FAILURE(errorCode)) { 2281 log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); 2282 return; 2283 } 2284 TestNextUChar(cnv, source, limit, results, "UTF-32BE"); 2285 2286 /* Test the condition when source >= sourceLimit */ 2287 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2288 2289 /* test error behavior with a skip callback */ 2290 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2291 source=(const char *)in2; 2292 limit=(const char *)(in2+sizeof(in2)); 2293 TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); 2294 2295 ucnv_close(cnv); 2296 } 2297 2298 static void 2299 TestUTF32LE() { 2300 /* test input */ 2301 static const uint8_t in[]={ 2302 0x61, 0x00, 0x00, 0x00, 2303 0x61, 0x30, 0x00, 0x00, 2304 0x00, 0xdc, 0x00, 0x00, 2305 0x00, 0xd8, 0x00, 0x00, 2306 0xff, 0xdf, 0x00, 0x00, 2307 0xfe, 0xff, 0x00, 0x00, 2308 0xcd, 0xab, 0x10, 0x00, 2309 0xff, 0xff, 0x10, 0x00 2310 }; 2311 2312 /* expected test results */ 2313 static const int32_t results[]={ 2314 /* number of bytes read, code point */ 2315 4, 0x61, 2316 4, 0x3061, 2317 4, 0xfffd, 2318 4, 0xfffd, 2319 4, 0xfffd, 2320 4, 0xfffe, 2321 4, 0x10abcd, 2322 4, 0x10ffff 2323 }; 2324 2325 /* error test input */ 2326 static const uint8_t in2[]={ 2327 0x61, 0x00, 0x00, 0x00, 2328 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 2329 0x62, 0x00, 0x00, 0x00, 2330 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2331 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 2332 0x62, 0x01, 0x00, 0x00, 2333 0x62, 0x02, 0x00, 0x00, 2334 }; 2335 2336 /* expected error test results */ 2337 static const int32_t results2[]={ 2338 /* number of bytes read, code point */ 2339 4, 0x61, 2340 8, 0x62, 2341 12, 0x162, 2342 4, 0x262, 2343 }; 2344 2345 UConverterToUCallback cb; 2346 const void *p; 2347 2348 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2349 UErrorCode errorCode=U_ZERO_ERROR; 2350 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); 2351 if(U_FAILURE(errorCode)) { 2352 log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); 2353 return; 2354 } 2355 TestNextUChar(cnv, source, limit, results, "UTF-32LE"); 2356 2357 /* Test the condition when source >= sourceLimit */ 2358 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2359 2360 /* test error behavior with a skip callback */ 2361 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2362 source=(const char *)in2; 2363 limit=(const char *)(in2+sizeof(in2)); 2364 TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); 2365 2366 ucnv_close(cnv); 2367 } 2368 2369 static void 2370 TestLATIN1() { 2371 /* test input */ 2372 static const uint8_t in[]={ 2373 0x61, 2374 0x31, 2375 0x32, 2376 0xc0, 2377 0xf0, 2378 0xf4, 2379 }; 2380 2381 /* expected test results */ 2382 static const int32_t results[]={ 2383 /* number of bytes read, code point */ 2384 1, 0x61, 2385 1, 0x31, 2386 1, 0x32, 2387 1, 0xc0, 2388 1, 0xf0, 2389 1, 0xf4, 2390 }; 2391 static const uint16_t in1[] = { 2392 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2393 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2394 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2395 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2396 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2397 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2398 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2399 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2400 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2401 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2402 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2403 0xcb, 0x82 2404 }; 2405 static const uint8_t out1[] = { 2406 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2407 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2408 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2409 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2410 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2411 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2412 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2413 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2414 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2415 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2416 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2417 0xcb, 0x82 2418 }; 2419 static const uint16_t in2[]={ 2420 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2421 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2422 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2423 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2424 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2425 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2426 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2427 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2428 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2429 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2430 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2431 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2432 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2433 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2434 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2435 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2436 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2437 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2438 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2439 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2440 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2441 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2442 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2443 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2444 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2445 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2446 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2447 0x37, 0x20, 0x2A, 0x2F, 2448 }; 2449 static const unsigned char out2[]={ 2450 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2451 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2452 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2453 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2454 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2455 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2456 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2457 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2458 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2459 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2460 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2461 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2462 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2463 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2464 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2465 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2466 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2467 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2468 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2469 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2470 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2471 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2472 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2473 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2474 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2475 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2476 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2477 0x37, 0x20, 0x2A, 0x2F, 2478 }; 2479 const char *source=(const char *)in; 2480 const char *limit=(const char *)in+sizeof(in); 2481 2482 UErrorCode errorCode=U_ZERO_ERROR; 2483 UConverter *cnv=ucnv_open("LATIN_1", &errorCode); 2484 if(U_FAILURE(errorCode)) { 2485 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode)); 2486 return; 2487 } 2488 TestNextUChar(cnv, source, limit, results, "LATIN_1"); 2489 /* Test the condition when source >= sourceLimit */ 2490 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2491 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1)); 2492 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2)); 2493 2494 ucnv_close(cnv); 2495 } 2496 2497 static void 2498 TestSBCS() { 2499 /* test input */ 2500 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; 2501 /* expected test results */ 2502 static const int32_t results[]={ 2503 /* number of bytes read, code point */ 2504 1, 0x61, 2505 1, 0xbf, 2506 1, 0xc4, 2507 1, 0x2021, 2508 1, 0xf8ff, 2509 1, 0x00d9 2510 }; 2511 2512 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2513 UErrorCode errorCode=U_ZERO_ERROR; 2514 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); 2515 if(U_FAILURE(errorCode)) { 2516 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode)); 2517 return; 2518 } 2519 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); 2520 /* Test the condition when source >= sourceLimit */ 2521 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2522 /*Test for Illegal character */ /* 2523 { 2524 static const uint8_t input1[]={ 0xA1 }; 2525 const char* illegalsource=(const char*)input1; 2526 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte"); 2527 } 2528 */ 2529 ucnv_close(cnv); 2530 } 2531 2532 static void 2533 TestDBCS() { 2534 /* test input */ 2535 static const uint8_t in[]={ 2536 0x44, 0x6a, 2537 0xc4, 0x9c, 2538 0x7a, 0x74, 2539 0x46, 0xab, 2540 0x42, 0x5b, 2541 2542 }; 2543 2544 /* expected test results */ 2545 static const int32_t results[]={ 2546 /* number of bytes read, code point */ 2547 2, 0x00a7, 2548 2, 0xe1d2, 2549 2, 0x6962, 2550 2, 0xf842, 2551 2, 0xffe5, 2552 }; 2553 2554 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2555 UErrorCode errorCode=U_ZERO_ERROR; 2556 2557 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); 2558 if(U_FAILURE(errorCode)) { 2559 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode)); 2560 return; 2561 } 2562 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); 2563 /* Test the condition when source >= sourceLimit */ 2564 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2565 /*Test for the condition where there is an invalid character*/ 2566 { 2567 static const uint8_t source2[]={0x1a, 0x1b}; 2568 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2569 } 2570 /*Test for the condition where we have a truncated char*/ 2571 { 2572 static const uint8_t source1[]={0xc4}; 2573 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2574 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2575 } 2576 ucnv_close(cnv); 2577 } 2578 2579 static void 2580 TestMBCS() { 2581 /* test input */ 2582 static const uint8_t in[]={ 2583 0x01, 2584 0xa6, 0xa3, 2585 0x00, 2586 0xa6, 0xa1, 2587 0x08, 2588 0xc2, 0x76, 2589 0xc2, 0x78, 2590 2591 }; 2592 2593 /* expected test results */ 2594 static const int32_t results[]={ 2595 /* number of bytes read, code point */ 2596 1, 0x0001, 2597 2, 0x250c, 2598 1, 0x0000, 2599 2, 0x2500, 2600 1, 0x0008, 2601 2, 0xd60c, 2602 2, 0xd60e, 2603 }; 2604 2605 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2606 UErrorCode errorCode=U_ZERO_ERROR; 2607 2608 UConverter *cnv=ucnv_open("ibm-1363", &errorCode); 2609 if(U_FAILURE(errorCode)) { 2610 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode)); 2611 return; 2612 } 2613 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); 2614 /* Test the condition when source >= sourceLimit */ 2615 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2616 /*Test for the condition where there is an invalid character*/ 2617 { 2618 static const uint8_t source2[]={0xa1, 0x80}; 2619 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2620 } 2621 /*Test for the condition where we have a truncated char*/ 2622 { 2623 static const uint8_t source1[]={0xc4}; 2624 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2625 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2626 } 2627 ucnv_close(cnv); 2628 2629 } 2630 2631 #ifdef U_ENABLE_GENERIC_ISO_2022 2632 2633 static void 2634 TestISO_2022() { 2635 /* test input */ 2636 static const uint8_t in[]={ 2637 0x1b, 0x25, 0x42, 2638 0x31, 2639 0x32, 2640 0x61, 2641 0xc2, 0x80, 2642 0xe0, 0xa0, 0x80, 2643 0xf0, 0x90, 0x80, 0x80 2644 }; 2645 2646 2647 2648 /* expected test results */ 2649 static const int32_t results[]={ 2650 /* number of bytes read, code point */ 2651 4, 0x0031, /* 4 bytes including the escape sequence */ 2652 1, 0x0032, 2653 1, 0x61, 2654 2, 0x80, 2655 3, 0x800, 2656 4, 0x10000 2657 }; 2658 2659 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2660 UErrorCode errorCode=U_ZERO_ERROR; 2661 UConverter *cnv; 2662 2663 cnv=ucnv_open("ISO_2022", &errorCode); 2664 if(U_FAILURE(errorCode)) { 2665 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 2666 return; 2667 } 2668 TestNextUChar(cnv, source, limit, results, "ISO_2022"); 2669 2670 /* Test the condition when source >= sourceLimit */ 2671 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source"); 2672 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2673 /*Test for the condition where we have a truncated char*/ 2674 { 2675 static const uint8_t source1[]={0xc4}; 2676 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2677 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2678 } 2679 /*Test for the condition where there is an invalid character*/ 2680 { 2681 static const uint8_t source2[]={0xa1, 0x01}; 2682 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); 2683 } 2684 ucnv_close(cnv); 2685 } 2686 2687 #endif 2688 2689 static void 2690 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2691 const UChar* uSource; 2692 const UChar* uSourceLimit; 2693 const char* cSource; 2694 const char* cSourceLimit; 2695 UChar *uTargetLimit =NULL; 2696 UChar *uTarget; 2697 char *cTarget; 2698 const char *cTargetLimit; 2699 char *cBuf; 2700 UChar *uBuf,*test; 2701 int32_t uBufSize = 120; 2702 int len=0; 2703 int i=2; 2704 UErrorCode errorCode=U_ZERO_ERROR; 2705 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2706 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2707 ucnv_reset(cnv); 2708 for(;--i>0; ){ 2709 uSource = (UChar*) source; 2710 uSourceLimit=(const UChar*)sourceLimit; 2711 cTarget = cBuf; 2712 uTarget = uBuf; 2713 cSource = cBuf; 2714 cTargetLimit = cBuf; 2715 uTargetLimit = uBuf; 2716 2717 do{ 2718 2719 cTargetLimit = cTargetLimit+ i; 2720 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2721 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2722 errorCode=U_ZERO_ERROR; 2723 continue; 2724 } 2725 2726 if(U_FAILURE(errorCode)){ 2727 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2728 return; 2729 } 2730 2731 }while (uSource<uSourceLimit); 2732 2733 cSourceLimit =cTarget; 2734 do{ 2735 uTargetLimit=uTargetLimit+i; 2736 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2737 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2738 errorCode=U_ZERO_ERROR; 2739 continue; 2740 } 2741 if(U_FAILURE(errorCode)){ 2742 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2743 return; 2744 } 2745 }while(cSource<cSourceLimit); 2746 2747 uSource = source; 2748 test =uBuf; 2749 for(len=0;len<(int)(source - sourceLimit);len++){ 2750 if(uBuf[len]!=uSource[len]){ 2751 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2752 } 2753 } 2754 } 2755 free(uBuf); 2756 free(cBuf); 2757 } 2758 /* Test for Jitterbug 778 */ 2759 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2760 const UChar* uSource; 2761 const UChar* uSourceLimit; 2762 const char* cSource; 2763 UChar *uTargetLimit =NULL; 2764 UChar *uTarget; 2765 char *cTarget; 2766 const char *cTargetLimit; 2767 char *cBuf; 2768 UChar *uBuf,*test; 2769 int32_t uBufSize = 120; 2770 int numCharsInTarget=0; 2771 UErrorCode errorCode=U_ZERO_ERROR; 2772 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2773 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 2774 uSource = source; 2775 uSourceLimit=sourceLimit; 2776 cTarget = cBuf; 2777 cTargetLimit = cBuf +uBufSize*5; 2778 uTarget = uBuf; 2779 uTargetLimit = uBuf+ uBufSize*5; 2780 ucnv_reset(cnv); 2781 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); 2782 if(U_FAILURE(errorCode)){ 2783 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2784 return; 2785 } 2786 cSource = cBuf; 2787 test =uBuf; 2788 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode); 2789 if(U_FAILURE(errorCode)){ 2790 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode)); 2791 return; 2792 } 2793 uSource = source; 2794 while(uSource<uSourceLimit){ 2795 if(*test!=*uSource){ 2796 2797 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 2798 } 2799 uSource++; 2800 test++; 2801 } 2802 free(uBuf); 2803 free(cBuf); 2804 } 2805 2806 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2807 const UChar* uSource; 2808 const UChar* uSourceLimit; 2809 const char* cSource; 2810 const char* cSourceLimit; 2811 UChar *uTargetLimit =NULL; 2812 UChar *uTarget; 2813 char *cTarget; 2814 const char *cTargetLimit; 2815 char *cBuf; 2816 UChar *uBuf,*test; 2817 int32_t uBufSize = 120; 2818 int len=0; 2819 int i=2; 2820 const UChar *temp = sourceLimit; 2821 UErrorCode errorCode=U_ZERO_ERROR; 2822 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2823 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2824 2825 ucnv_reset(cnv); 2826 for(;--i>0;){ 2827 uSource = (UChar*) source; 2828 cTarget = cBuf; 2829 uTarget = uBuf; 2830 cSource = cBuf; 2831 cTargetLimit = cBuf; 2832 uTargetLimit = uBuf+uBufSize*5; 2833 cTargetLimit = cTargetLimit+uBufSize*10; 2834 uSourceLimit=uSource; 2835 do{ 2836 2837 if (uSourceLimit < sourceLimit) { 2838 uSourceLimit = uSourceLimit+1; 2839 } 2840 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2841 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2842 errorCode=U_ZERO_ERROR; 2843 continue; 2844 } 2845 2846 if(U_FAILURE(errorCode)){ 2847 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2848 return; 2849 } 2850 2851 }while (uSource<temp); 2852 2853 cSourceLimit =cBuf; 2854 do{ 2855 if (cSourceLimit < cBuf + (cTarget - cBuf)) { 2856 cSourceLimit = cSourceLimit+1; 2857 } 2858 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2859 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2860 errorCode=U_ZERO_ERROR; 2861 continue; 2862 } 2863 if(U_FAILURE(errorCode)){ 2864 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2865 return; 2866 } 2867 }while(cSource<cTarget); 2868 2869 uSource = source; 2870 test =uBuf; 2871 for(;len<(int)(source - sourceLimit);len++){ 2872 if(uBuf[len]!=uSource[len]){ 2873 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2874 } 2875 } 2876 } 2877 free(uBuf); 2878 free(cBuf); 2879 } 2880 static void 2881 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, 2882 const uint16_t results[], const char* message){ 2883 const char* s0; 2884 const char* s=(char*)source; 2885 const uint16_t *r=results; 2886 UErrorCode errorCode=U_ZERO_ERROR; 2887 uint32_t c,exC; 2888 ucnv_reset(cnv); 2889 while(s<limit) { 2890 s0=s; 2891 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 2892 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 2893 break; /* no more significant input */ 2894 } else if(U_FAILURE(errorCode)) { 2895 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 2896 break; 2897 } else { 2898 if(UTF_IS_FIRST_SURROGATE(*r)){ 2899 int i =0, len = 2; 2900 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE); 2901 r++; 2902 }else{ 2903 exC = *r; 2904 } 2905 if(c!=(uint32_t)(exC)) 2906 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c); 2907 } 2908 r++; 2909 } 2910 } 2911 2912 static int TestJitterbug930(const char* enc){ 2913 UErrorCode err = U_ZERO_ERROR; 2914 UConverter*converter; 2915 char out[80]; 2916 char*target = out; 2917 UChar in[4]; 2918 const UChar*source = in; 2919 int32_t off[80]; 2920 int32_t* offsets = off; 2921 int numOffWritten=0; 2922 UBool flush = 0; 2923 converter = my_ucnv_open(enc, &err); 2924 2925 in[0] = 0x41; /* 0x4E00;*/ 2926 in[1] = 0x4E01; 2927 in[2] = 0x4E02; 2928 in[3] = 0x4E03; 2929 2930 memset(off, '*', sizeof(off)); 2931 2932 ucnv_fromUnicode (converter, 2933 &target, 2934 target+2, 2935 &source, 2936 source+3, 2937 offsets, 2938 flush, 2939 &err); 2940 2941 /* writes three bytes into the output buffer: 41 1B 24 2942 * but offsets contains 0 1 1 2943 */ 2944 while(*offsets< off[10]){ 2945 numOffWritten++; 2946 offsets++; 2947 } 2948 log_verbose("Testing Jitterbug 930 for encoding %s",enc); 2949 if(numOffWritten!= (int)(target-out)){ 2950 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten); 2951 } 2952 2953 err = U_ZERO_ERROR; 2954 2955 memset(off,'*' , sizeof(off)); 2956 2957 flush = 1; 2958 offsets=off; 2959 ucnv_fromUnicode (converter, 2960 &target, 2961 target+4, 2962 &source, 2963 source, 2964 offsets, 2965 flush, 2966 &err); 2967 numOffWritten=0; 2968 while(*offsets< off[10]){ 2969 numOffWritten++; 2970 if(*offsets!= -1){ 2971 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ; 2972 } 2973 offsets++; 2974 } 2975 2976 /* writes 42 43 7A into output buffer, 2977 * offsets contains -1 -1 -1 2978 */ 2979 ucnv_close(converter); 2980 return 0; 2981 } 2982 2983 static void 2984 TestHZ() { 2985 /* test input */ 2986 static const uint16_t in[]={ 2987 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 2988 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 2989 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 2990 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 2991 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 2992 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 2993 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 2994 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 2995 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 2996 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 2997 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 2998 0x005A, 0x005B, 0x005C, 0x000A 2999 }; 3000 const UChar* uSource; 3001 const UChar* uSourceLimit; 3002 const char* cSource; 3003 const char* cSourceLimit; 3004 UChar *uTargetLimit =NULL; 3005 UChar *uTarget; 3006 char *cTarget; 3007 const char *cTargetLimit; 3008 char *cBuf; 3009 UChar *uBuf,*test; 3010 int32_t uBufSize = 120; 3011 UErrorCode errorCode=U_ZERO_ERROR; 3012 UConverter *cnv; 3013 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3014 int32_t* myOff= offsets; 3015 cnv=ucnv_open("HZ", &errorCode); 3016 if(U_FAILURE(errorCode)) { 3017 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)); 3018 return; 3019 } 3020 3021 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3022 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3023 uSource = (const UChar*)in; 3024 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3025 cTarget = cBuf; 3026 cTargetLimit = cBuf +uBufSize*5; 3027 uTarget = uBuf; 3028 uTargetLimit = uBuf+ uBufSize*5; 3029 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3030 if(U_FAILURE(errorCode)){ 3031 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3032 return; 3033 } 3034 cSource = cBuf; 3035 cSourceLimit =cTarget; 3036 test =uBuf; 3037 myOff=offsets; 3038 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3039 if(U_FAILURE(errorCode)){ 3040 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3041 return; 3042 } 3043 uSource = (const UChar*)in; 3044 while(uSource<uSourceLimit){ 3045 if(*test!=*uSource){ 3046 3047 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3048 } 3049 uSource++; 3050 test++; 3051 } 3052 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); 3053 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3054 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3055 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3056 TestJitterbug930("csISO2022JP"); 3057 ucnv_close(cnv); 3058 free(offsets); 3059 free(uBuf); 3060 free(cBuf); 3061 } 3062 3063 static void 3064 TestISCII(){ 3065 /* test input */ 3066 static const uint16_t in[]={ 3067 /* test full range of Devanagari */ 3068 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, 3069 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, 3070 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, 3071 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, 3072 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, 3073 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, 3074 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, 3075 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, 3076 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, 3077 0x096D,0x096E,0x096F, 3078 /* test Soft halant*/ 3079 0x0915,0x094d, 0x200D, 3080 /* test explicit halant */ 3081 0x0915,0x094d, 0x200c, 3082 /* test double danda */ 3083 0x965, 3084 /* test ASCII */ 3085 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3086 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3087 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3088 /* tests from Lotus */ 3089 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, 3090 0x0930,0x094D,0x200D, 3091 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, 3092 0x0915,0x0921,0x002B,0x095F, 3093 /* tamil range */ 3094 0x0B86, 0xB87, 0xB88, 3095 /* telugu range */ 3096 0x0C05, 0x0C02, 0x0C03,0x0c31, 3097 /* kannada range */ 3098 0x0C85, 0xC82, 0x0C83, 3099 /* test Abbr sign and Anudatta */ 3100 0x0970, 0x952, 3101 /* 0x0958, 3102 0x0959, 3103 0x095A, 3104 0x095B, 3105 0x095C, 3106 0x095D, 3107 0x095E, 3108 0x095F,*/ 3109 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, 3110 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, 3111 0x090C , 3112 0x0962, 3113 0x0961 /* Vocallic LL 0xa6, 0xE9 */, 3114 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, 3115 0x0950 /* OM Symbol 0xa1, 0xE9,*/, 3116 0x093D /* Avagraha 0xEA, 0xE9*/, 3117 0x0958, 3118 0x0959, 3119 0x095A, 3120 0x095B, 3121 0x095C, 3122 0x095D, 3123 0x095E, 3124 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 3125 }; 3126 static const unsigned char byteArr[]={ 3127 3128 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, 3129 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, 3130 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, 3131 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, 3132 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, 3133 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 3134 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, 3135 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, 3136 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, 3137 0xf8,0xf9,0xfa, 3138 /* test soft halant */ 3139 0xb3, 0xE8, 0xE9, 3140 /* test explicit halant */ 3141 0xb3, 0xE8, 0xE8, 3142 /* test double danda */ 3143 0xea, 0xea, 3144 /* test ASCII */ 3145 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3146 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3147 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3148 /* test ATR code */ 3149 3150 /* tests from Lotus */ 3151 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, 3152 0xEF,0x42,0xCF,0xE8,0xD9, 3153 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, 3154 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, 3155 /* tamil range */ 3156 0xEF, 0x44, 0xa5, 0xa6, 0xa7, 3157 /* telugu range */ 3158 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, 3159 /* kannada range */ 3160 0xEF, 0x48,0xa4, 0xa2, 0xa3, 3161 /* anudatta and abbreviation sign */ 3162 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, 3163 3164 3165 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ 3166 3167 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ 3168 3169 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ 3170 3171 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ 3172 3173 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ 3174 3175 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ 3176 3177 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ 3178 3179 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ 3180 3181 0xB3, 0xE9, /* Ka + NUKTA */ 3182 3183 0xB4, 0xE9, /* Kha + NUKTA */ 3184 3185 0xB5, 0xE9, /* Ga + NUKTA */ 3186 3187 0xBA, 0xE9, 3188 3189 0xBF, 0xE9, 3190 3191 0xC0, 0xE9, 3192 3193 0xC9, 0xE9, 3194 /* INV halant RA */ 3195 0xD9, 0xE8, 0xCF, 3196 0x00, 0x00A0, 3197 /* just consume unhandled codepoints */ 3198 0xEF, 0x30, 3199 3200 }; 3201 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE); 3202 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr)); 3203 3204 } 3205 3206 static void 3207 TestISO_2022_JP() { 3208 /* test input */ 3209 static const uint16_t in[]={ 3210 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, 3211 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3212 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3213 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3214 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3215 0x201D, 0x3014, 0x000D, 0x000A, 3216 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3217 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3218 }; 3219 const UChar* uSource; 3220 const UChar* uSourceLimit; 3221 const char* cSource; 3222 const char* cSourceLimit; 3223 UChar *uTargetLimit =NULL; 3224 UChar *uTarget; 3225 char *cTarget; 3226 const char *cTargetLimit; 3227 char *cBuf; 3228 UChar *uBuf,*test; 3229 int32_t uBufSize = 120; 3230 UErrorCode errorCode=U_ZERO_ERROR; 3231 UConverter *cnv; 3232 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3233 int32_t* myOff= offsets; 3234 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3235 if(U_FAILURE(errorCode)) { 3236 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode)); 3237 return; 3238 } 3239 3240 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3241 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3242 uSource = (const UChar*)in; 3243 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3244 cTarget = cBuf; 3245 cTargetLimit = cBuf +uBufSize*5; 3246 uTarget = uBuf; 3247 uTargetLimit = uBuf+ uBufSize*5; 3248 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3249 if(U_FAILURE(errorCode)){ 3250 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3251 return; 3252 } 3253 cSource = cBuf; 3254 cSourceLimit =cTarget; 3255 test =uBuf; 3256 myOff=offsets; 3257 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3258 if(U_FAILURE(errorCode)){ 3259 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3260 return; 3261 } 3262 3263 uSource = (const UChar*)in; 3264 while(uSource<uSourceLimit){ 3265 if(*test!=*uSource){ 3266 3267 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3268 } 3269 uSource++; 3270 test++; 3271 } 3272 3273 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3274 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3275 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); 3276 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3277 TestJitterbug930("csISO2022JP"); 3278 ucnv_close(cnv); 3279 free(uBuf); 3280 free(cBuf); 3281 free(offsets); 3282 } 3283 3284 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){ 3285 const UChar* uSource; 3286 const UChar* uSourceLimit; 3287 const char* cSource; 3288 const char* cSourceLimit; 3289 UChar *uTargetLimit =NULL; 3290 UChar *uTarget; 3291 char *cTarget; 3292 const char *cTargetLimit; 3293 char *cBuf; 3294 UChar *uBuf,*test; 3295 int32_t uBufSize = 120*10; 3296 UErrorCode errorCode=U_ZERO_ERROR; 3297 UConverter *cnv; 3298 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); 3299 int32_t* myOff= offsets; 3300 cnv=my_ucnv_open(conv, &errorCode); 3301 if(U_FAILURE(errorCode)) { 3302 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode)); 3303 return; 3304 } 3305 3306 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); 3307 cBuf =(char*)malloc(uBufSize * sizeof(char)); 3308 uSource = (const UChar*)in; 3309 uSourceLimit=uSource+len; 3310 cTarget = cBuf; 3311 cTargetLimit = cBuf +uBufSize; 3312 uTarget = uBuf; 3313 uTargetLimit = uBuf+ uBufSize; 3314 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3315 if(U_FAILURE(errorCode)){ 3316 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3317 return; 3318 } 3319 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/ 3320 cSource = cBuf; 3321 cSourceLimit =cTarget; 3322 test =uBuf; 3323 myOff=offsets; 3324 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3325 if(U_FAILURE(errorCode)){ 3326 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode)); 3327 return; 3328 } 3329 3330 uSource = (const UChar*)in; 3331 while(uSource<uSourceLimit){ 3332 if(*test!=*uSource){ 3333 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ; 3334 } 3335 uSource++; 3336 test++; 3337 } 3338 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); 3339 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); 3340 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); 3341 if(byteArr && byteArrLen!=0){ 3342 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); 3343 TestToAndFromUChars(in,(const UChar*)&in[len],cnv); 3344 { 3345 cSource = byteArr; 3346 cSourceLimit = cSource+byteArrLen; 3347 test=uBuf; 3348 myOff = offsets; 3349 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3350 if(U_FAILURE(errorCode)){ 3351 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3352 return; 3353 } 3354 3355 uSource = (const UChar*)in; 3356 while(uSource<uSourceLimit){ 3357 if(*test!=*uSource){ 3358 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3359 } 3360 uSource++; 3361 test++; 3362 } 3363 } 3364 } 3365 3366 ucnv_close(cnv); 3367 free(uBuf); 3368 free(cBuf); 3369 free(offsets); 3370 } 3371 static UChar U_CALLCONV 3372 _charAt(int32_t offset, void *context) { 3373 return ((char*)context)[offset]; 3374 } 3375 3376 static int32_t 3377 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){ 3378 int32_t srcIndex=0; 3379 int32_t dstIndex=0; 3380 if(U_FAILURE(*status)){ 3381 return 0; 3382 } 3383 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ 3384 *status = U_ILLEGAL_ARGUMENT_ERROR; 3385 return 0; 3386 } 3387 if(srcLen==-1){ 3388 srcLen = (int32_t)uprv_strlen(src); 3389 } 3390 3391 for (; srcIndex<srcLen; ) { 3392 UChar32 c = src[srcIndex++]; 3393 if (c == 0x005C /*'\\'*/) { 3394 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/ 3395 if (c == (UChar32)0xFFFFFFFF) { 3396 *status=U_INVALID_CHAR_FOUND; /* return empty string */ 3397 break; /* invalid escape sequence */ 3398 } 3399 } 3400 if(dstIndex < dstLen){ 3401 if(c>0xFFFF){ 3402 dst[dstIndex++] = UTF16_LEAD(c); 3403 if(dstIndex<dstLen){ 3404 dst[dstIndex]=UTF16_TRAIL(c); 3405 }else{ 3406 *status=U_BUFFER_OVERFLOW_ERROR; 3407 } 3408 }else{ 3409 dst[dstIndex]=(UChar)c; 3410 } 3411 3412 }else{ 3413 *status = U_BUFFER_OVERFLOW_ERROR; 3414 } 3415 dstIndex++; /* for preflighting */ 3416 } 3417 return dstIndex; 3418 } 3419 3420 static void 3421 TestFullRoundtrip(const char* cp){ 3422 UChar usource[10] ={0}; 3423 UChar nsrc[10] = {0}; 3424 uint32_t i=1; 3425 int len=0, ulen; 3426 nsrc[0]=0x0061; 3427 /* Test codepoint 0 */ 3428 TestConv(usource,1,cp,"",NULL,0); 3429 TestConv(usource,2,cp,"",NULL,0); 3430 nsrc[2]=0x5555; 3431 TestConv(nsrc,3,cp,"",NULL,0); 3432 3433 for(;i<=0x10FFFF;i++){ 3434 if(i==0xD800){ 3435 i=0xDFFF; 3436 continue; 3437 } 3438 if(i<=0xFFFF){ 3439 usource[0] =(UChar) i; 3440 len=1; 3441 }else{ 3442 usource[0]=UTF16_LEAD(i); 3443 usource[1]=UTF16_TRAIL(i); 3444 len=2; 3445 } 3446 ulen=len; 3447 if(i==0x80) { 3448 usource[2]=0; 3449 } 3450 /* Test only single code points */ 3451 TestConv(usource,ulen,cp,"",NULL,0); 3452 /* Test codepoint repeated twice */ 3453 usource[ulen]=usource[0]; 3454 usource[ulen+1]=usource[1]; 3455 ulen+=len; 3456 TestConv(usource,ulen,cp,"",NULL,0); 3457 /* Test codepoint repeated 3 times */ 3458 usource[ulen]=usource[0]; 3459 usource[ulen+1]=usource[1]; 3460 ulen+=len; 3461 TestConv(usource,ulen,cp,"",NULL,0); 3462 /* Test codepoint in between 2 codepoints */ 3463 nsrc[1]=usource[0]; 3464 nsrc[2]=usource[1]; 3465 nsrc[len+1]=0x5555; 3466 TestConv(nsrc,len+2,cp,"",NULL,0); 3467 uprv_memset(usource,0,sizeof(UChar)*10); 3468 } 3469 } 3470 3471 static void 3472 TestRoundTrippingAllUTF(void){ 3473 if(!QUICK){ 3474 log_verbose("Running exhaustive round trip test for BOCU-1\n"); 3475 TestFullRoundtrip("BOCU-1"); 3476 log_verbose("Running exhaustive round trip test for SCSU\n"); 3477 TestFullRoundtrip("SCSU"); 3478 log_verbose("Running exhaustive round trip test for UTF-8\n"); 3479 TestFullRoundtrip("UTF-8"); 3480 log_verbose("Running exhaustive round trip test for CESU-8\n"); 3481 TestFullRoundtrip("CESU-8"); 3482 log_verbose("Running exhaustive round trip test for UTF-16BE\n"); 3483 TestFullRoundtrip("UTF-16BE"); 3484 log_verbose("Running exhaustive round trip test for UTF-16LE\n"); 3485 TestFullRoundtrip("UTF-16LE"); 3486 log_verbose("Running exhaustive round trip test for UTF-16\n"); 3487 TestFullRoundtrip("UTF-16"); 3488 log_verbose("Running exhaustive round trip test for UTF-32BE\n"); 3489 TestFullRoundtrip("UTF-32BE"); 3490 log_verbose("Running exhaustive round trip test for UTF-32LE\n"); 3491 TestFullRoundtrip("UTF-32LE"); 3492 log_verbose("Running exhaustive round trip test for UTF-32\n"); 3493 TestFullRoundtrip("UTF-32"); 3494 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3495 TestFullRoundtrip("UTF-7"); 3496 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3497 TestFullRoundtrip("UTF-7,version=1"); 3498 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"); 3499 TestFullRoundtrip("IMAP-mailbox-name"); 3500 log_verbose("Running exhaustive round trip test for GB18030\n"); 3501 TestFullRoundtrip("GB18030"); 3502 } 3503 } 3504 3505 static void 3506 TestSCSU() { 3507 3508 static const uint16_t germanUTF16[]={ 3509 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 3510 }; 3511 3512 static const uint8_t germanSCSU[]={ 3513 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 3514 }; 3515 3516 static const uint16_t russianUTF16[]={ 3517 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 3518 }; 3519 3520 static const uint8_t russianSCSU[]={ 3521 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 3522 }; 3523 3524 static const uint16_t japaneseUTF16[]={ 3525 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 3526 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 3527 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 3528 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 3529 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 3530 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 3531 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 3532 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 3533 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 3534 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 3535 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 3536 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 3537 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 3538 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 3539 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 3540 }; 3541 3542 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 3543 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */ 3544 static const uint8_t japaneseSCSU[]={ 3545 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 3546 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 3547 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 3548 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 3549 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 3550 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 3551 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 3552 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 3553 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 3554 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 3555 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 3556 0xcb, 0x82 3557 }; 3558 3559 static const uint16_t allFeaturesUTF16[]={ 3560 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 3561 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 3562 0x01df, 0xf000, 0xdbff, 0xdfff 3563 }; 3564 3565 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter 3566 * result here (34B vs. 35B) 3567 */ 3568 static const uint8_t allFeaturesSCSU[]={ 3569 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, 3570 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, 3571 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, 3572 0xdf, 0x14, 0x80, 0x15, 0xff 3573 }; 3574 static const uint16_t monkeyIn[]={ 3575 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3576 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3577 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3578 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3579 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3580 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3581 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3582 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3583 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3584 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3585 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3586 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3587 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3588 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3589 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3590 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3591 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3592 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3593 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 3594 /* test non-BMP code points */ 3595 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 3596 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 3597 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 3598 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 3599 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 3600 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 3601 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 3602 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 3603 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 3604 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 3605 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 3606 3607 3608 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 3609 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 3610 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 3611 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 3612 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 3613 }; 3614 static const char *fTestCases [] = { 3615 "\\ud800\\udc00", /* smallest surrogate*/ 3616 "\\ud8ff\\udcff", 3617 "\\udBff\\udFff", /* largest surrogate pair*/ 3618 "\\ud834\\udc00", 3619 "\\U0010FFFF", 3620 "Hello \\u9292 \\u9192 World!", 3621 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", 3622 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3623 3624 "\\u0648\\u06c8", /* catch missing reset*/ 3625 "\\u0648\\u06c8", 3626 3627 "\\u4444\\uE001", /* lowest quotable*/ 3628 "\\u4444\\uf2FF", /* highest quotable*/ 3629 "\\u4444\\uf188\\u4444", 3630 "\\u4444\\uf188\\uf288", 3631 "\\u4444\\uf188abc\\u0429\\uf288", 3632 "\\u9292\\u2222", 3633 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", 3634 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3635 "Hello World!123456", 3636 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ 3637 3638 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ 3639 "abc\\u4411d", /* uses SQU*/ 3640 "abc\\u4411\\u4412d",/* uses SCU*/ 3641 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ 3642 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/ 3643 "\\u9292\\u2222", 3644 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", 3645 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c", 3646 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", 3647 3648 "", /* empty input*/ 3649 "\\u0000", /* smallest BMP character*/ 3650 "\\uFFFF", /* largest BMP character*/ 3651 3652 /* regression tests*/ 3653 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", 3654 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff", 3655 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", 3656 "\\u0041\\u00df\\u0401\\u015f", 3657 "\\u9066\\u2123abc", 3658 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5", 3659 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489", 3660 }; 3661 int i=0; 3662 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){ 3663 const char* cSrc = fTestCases[i]; 3664 UErrorCode status = U_ZERO_ERROR; 3665 int32_t cSrcLen,srcLen; 3666 UChar* src; 3667 /* UConverter* cnv = ucnv_open("SCSU",&status); */ 3668 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); 3669 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); 3670 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); 3671 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); 3672 TestConv(src,srcLen,"SCSU","Coverage",NULL,0); 3673 free(src); 3674 } 3675 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3676 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3677 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3678 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3679 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU)); 3680 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU)); 3681 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); 3682 } 3683 3684 #if !UCONFIG_NO_LEGACY_CONVERSION 3685 static void TestJitterbug2346(){ 3686 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, 3687 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; 3688 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; 3689 3690 UChar uTarget[500]={'\0'}; 3691 UChar* utarget=uTarget; 3692 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 3693 3694 char cTarget[500]={'\0'}; 3695 char* ctarget=cTarget; 3696 char* ctargetLimit=cTarget+sizeof(cTarget); 3697 const char* csource=source; 3698 UChar* temp = expected; 3699 UErrorCode err=U_ZERO_ERROR; 3700 3701 UConverter* conv =ucnv_open("ISO_2022_JP",&err); 3702 if(U_FAILURE(err)) { 3703 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 3704 return; 3705 } 3706 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err); 3707 if(U_FAILURE(err)) { 3708 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err)); 3709 return; 3710 } 3711 utargetLimit=utarget; 3712 utarget = uTarget; 3713 while(utarget<utargetLimit){ 3714 if(*temp!=*utarget){ 3715 3716 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ; 3717 } 3718 utarget++; 3719 temp++; 3720 } 3721 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 3722 if(U_FAILURE(err)) { 3723 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err)); 3724 return; 3725 } 3726 ctargetLimit=ctarget; 3727 ctarget =cTarget; 3728 ucnv_close(conv); 3729 3730 3731 } 3732 3733 static void 3734 TestISO_2022_JP_1() { 3735 /* test input */ 3736 static const uint16_t in[]={ 3737 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A, 3738 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3739 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A, 3740 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3741 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3742 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3743 0x201D, 0x000D, 0x000A, 3744 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3745 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A, 3746 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3747 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A, 3748 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A, 3749 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A 3750 }; 3751 const UChar* uSource; 3752 const UChar* uSourceLimit; 3753 const char* cSource; 3754 const char* cSourceLimit; 3755 UChar *uTargetLimit =NULL; 3756 UChar *uTarget; 3757 char *cTarget; 3758 const char *cTargetLimit; 3759 char *cBuf; 3760 UChar *uBuf,*test; 3761 int32_t uBufSize = 120; 3762 UErrorCode errorCode=U_ZERO_ERROR; 3763 UConverter *cnv; 3764 3765 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3766 if(U_FAILURE(errorCode)) { 3767 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3768 return; 3769 } 3770 3771 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3772 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3773 uSource = (const UChar*)in; 3774 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3775 cTarget = cBuf; 3776 cTargetLimit = cBuf +uBufSize*5; 3777 uTarget = uBuf; 3778 uTargetLimit = uBuf+ uBufSize*5; 3779 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode); 3780 if(U_FAILURE(errorCode)){ 3781 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3782 return; 3783 } 3784 cSource = cBuf; 3785 cSourceLimit =cTarget; 3786 test =uBuf; 3787 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode); 3788 if(U_FAILURE(errorCode)){ 3789 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3790 return; 3791 } 3792 uSource = (const UChar*)in; 3793 while(uSource<uSourceLimit){ 3794 if(*test!=*uSource){ 3795 3796 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3797 } 3798 uSource++; 3799 test++; 3800 } 3801 /*ucnv_close(cnv); 3802 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ 3803 /*Test for the condition where there is an invalid character*/ 3804 ucnv_reset(cnv); 3805 { 3806 static const uint8_t source2[]={0x0e,0x24,0x053}; 3807 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); 3808 } 3809 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3810 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3811 ucnv_close(cnv); 3812 free(uBuf); 3813 free(cBuf); 3814 } 3815 3816 static void 3817 TestISO_2022_JP_2() { 3818 /* test input */ 3819 static const uint16_t in[]={ 3820 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3821 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3822 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3823 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3824 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3825 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3826 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3827 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3828 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3829 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3830 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3831 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3832 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3833 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3834 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3835 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3836 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3837 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3838 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A 3839 }; 3840 const UChar* uSource; 3841 const UChar* uSourceLimit; 3842 const char* cSource; 3843 const char* cSourceLimit; 3844 UChar *uTargetLimit =NULL; 3845 UChar *uTarget; 3846 char *cTarget; 3847 const char *cTargetLimit; 3848 char *cBuf; 3849 UChar *uBuf,*test; 3850 int32_t uBufSize = 120; 3851 UErrorCode errorCode=U_ZERO_ERROR; 3852 UConverter *cnv; 3853 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3854 int32_t* myOff= offsets; 3855 cnv=ucnv_open("ISO_2022_JP_2", &errorCode); 3856 if(U_FAILURE(errorCode)) { 3857 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3858 return; 3859 } 3860 3861 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3862 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3863 uSource = (const UChar*)in; 3864 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3865 cTarget = cBuf; 3866 cTargetLimit = cBuf +uBufSize*5; 3867 uTarget = uBuf; 3868 uTargetLimit = uBuf+ uBufSize*5; 3869 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3870 if(U_FAILURE(errorCode)){ 3871 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3872 return; 3873 } 3874 cSource = cBuf; 3875 cSourceLimit =cTarget; 3876 test =uBuf; 3877 myOff=offsets; 3878 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3879 if(U_FAILURE(errorCode)){ 3880 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3881 return; 3882 } 3883 uSource = (const UChar*)in; 3884 while(uSource<uSourceLimit){ 3885 if(*test!=*uSource){ 3886 3887 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3888 } 3889 uSource++; 3890 test++; 3891 } 3892 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3893 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3894 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3895 /*Test for the condition where there is an invalid character*/ 3896 ucnv_reset(cnv); 3897 { 3898 static const uint8_t source2[]={0x0e,0x24,0x053}; 3899 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); 3900 } 3901 ucnv_close(cnv); 3902 free(uBuf); 3903 free(cBuf); 3904 free(offsets); 3905 } 3906 3907 static void 3908 TestISO_2022_KR() { 3909 /* test input */ 3910 static const uint16_t in[]={ 3911 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 3912 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 3913 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 3914 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 3915 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 3916 ,0x53E3,0x53E4,0x000A,0x000D}; 3917 const UChar* uSource; 3918 const UChar* uSourceLimit; 3919 const char* cSource; 3920 const char* cSourceLimit; 3921 UChar *uTargetLimit =NULL; 3922 UChar *uTarget; 3923 char *cTarget; 3924 const char *cTargetLimit; 3925 char *cBuf; 3926 UChar *uBuf,*test; 3927 int32_t uBufSize = 120; 3928 UErrorCode errorCode=U_ZERO_ERROR; 3929 UConverter *cnv; 3930 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3931 int32_t* myOff= offsets; 3932 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); 3933 if(U_FAILURE(errorCode)) { 3934 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3935 return; 3936 } 3937 3938 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3939 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3940 uSource = (const UChar*)in; 3941 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3942 cTarget = cBuf; 3943 cTargetLimit = cBuf +uBufSize*5; 3944 uTarget = uBuf; 3945 uTargetLimit = uBuf+ uBufSize*5; 3946 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3947 if(U_FAILURE(errorCode)){ 3948 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3949 return; 3950 } 3951 cSource = cBuf; 3952 cSourceLimit =cTarget; 3953 test =uBuf; 3954 myOff=offsets; 3955 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3956 if(U_FAILURE(errorCode)){ 3957 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3958 return; 3959 } 3960 uSource = (const UChar*)in; 3961 while(uSource<uSourceLimit){ 3962 if(*test!=*uSource){ 3963 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 3964 } 3965 uSource++; 3966 test++; 3967 } 3968 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 3969 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3970 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3971 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3972 TestJitterbug930("csISO2022KR"); 3973 /*Test for the condition where there is an invalid character*/ 3974 ucnv_reset(cnv); 3975 { 3976 static const uint8_t source2[]={0x1b,0x24,0x053}; 3977 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 3978 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 3979 } 3980 ucnv_close(cnv); 3981 free(uBuf); 3982 free(cBuf); 3983 free(offsets); 3984 } 3985 3986 static void 3987 TestISO_2022_KR_1() { 3988 /* test input */ 3989 static const uint16_t in[]={ 3990 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 3991 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 3992 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 3993 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 3994 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 3995 ,0x53E3,0x53E4,0x000A,0x000D}; 3996 const UChar* uSource; 3997 const UChar* uSourceLimit; 3998 const char* cSource; 3999 const char* cSourceLimit; 4000 UChar *uTargetLimit =NULL; 4001 UChar *uTarget; 4002 char *cTarget; 4003 const char *cTargetLimit; 4004 char *cBuf; 4005 UChar *uBuf,*test; 4006 int32_t uBufSize = 120; 4007 UErrorCode errorCode=U_ZERO_ERROR; 4008 UConverter *cnv; 4009 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4010 int32_t* myOff= offsets; 4011 cnv=ucnv_open("ibm-25546", &errorCode); 4012 if(U_FAILURE(errorCode)) { 4013 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4014 return; 4015 } 4016 4017 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4018 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4019 uSource = (const UChar*)in; 4020 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4021 cTarget = cBuf; 4022 cTargetLimit = cBuf +uBufSize*5; 4023 uTarget = uBuf; 4024 uTargetLimit = uBuf+ uBufSize*5; 4025 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4026 if(U_FAILURE(errorCode)){ 4027 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4028 return; 4029 } 4030 cSource = cBuf; 4031 cSourceLimit =cTarget; 4032 test =uBuf; 4033 myOff=offsets; 4034 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4035 if(U_FAILURE(errorCode)){ 4036 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4037 return; 4038 } 4039 uSource = (const UChar*)in; 4040 while(uSource<uSourceLimit){ 4041 if(*test!=*uSource){ 4042 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4043 } 4044 uSource++; 4045 test++; 4046 } 4047 ucnv_reset(cnv); 4048 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4049 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4050 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4051 ucnv_reset(cnv); 4052 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4053 /*Test for the condition where there is an invalid character*/ 4054 ucnv_reset(cnv); 4055 { 4056 static const uint8_t source2[]={0x1b,0x24,0x053}; 4057 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4058 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4059 } 4060 ucnv_close(cnv); 4061 free(uBuf); 4062 free(cBuf); 4063 free(offsets); 4064 } 4065 4066 static void TestJitterbug2411(){ 4067 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" 4068 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; 4069 UConverter* kr=NULL, *kr1=NULL; 4070 UErrorCode errorCode = U_ZERO_ERROR; 4071 UChar tgt[100]={'\0'}; 4072 UChar* target = tgt; 4073 UChar* targetLimit = target+100; 4074 kr=ucnv_open("iso-2022-kr", &errorCode); 4075 if(U_FAILURE(errorCode)) { 4076 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode)); 4077 return; 4078 } 4079 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4080 if(U_FAILURE(errorCode)) { 4081 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4082 return; 4083 } 4084 kr1 = ucnv_open("ibm-25546", &errorCode); 4085 if(U_FAILURE(errorCode)) { 4086 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode)); 4087 return; 4088 } 4089 target = tgt; 4090 targetLimit = target+100; 4091 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4092 4093 if(U_FAILURE(errorCode)) { 4094 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4095 return; 4096 } 4097 4098 ucnv_close(kr); 4099 ucnv_close(kr1); 4100 4101 } 4102 4103 static void 4104 TestJIS(){ 4105 /* From Unicode moved to testdata/conversion.txt */ 4106 /*To Unicode*/ 4107 { 4108 static const uint8_t sampleTextJIS[] = { 4109 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ 4110 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4111 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4112 }; 4113 static const uint16_t expectedISO2022JIS[] = { 4114 0x0041, 0x0042, 4115 0xFF81, 0xFF82, 4116 0x3000 4117 }; 4118 static const int32_t toISO2022JISOffs[]={ 4119 3,4, 4120 8,9, 4121 16 4122 }; 4123 4124 static const uint8_t sampleTextJIS7[] = { 4125 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ 4126 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4127 0x1b,0x24,0x42,0x21,0x21, 4128 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ 4129 0x21,0x22, 4130 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4131 }; 4132 static const uint16_t expectedISO2022JIS7[] = { 4133 0x0041, 0x0042, 4134 0xFF81, 0xFF82, 4135 0x3000, 4136 0xFF81, 0xFF82, 4137 0x3001, 4138 0x3000 4139 }; 4140 static const int32_t toISO2022JIS7Offs[]={ 4141 3,4, 4142 8,9, 4143 13,16, 4144 17, 4145 19,27 4146 }; 4147 static const uint8_t sampleTextJIS8[] = { 4148 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ 4149 0xa1,0xc8,0xd9,/*Katakana Set*/ 4150 0x1b,0x28,0x42, 4151 0x41,0x42, 4152 0xb1,0xc3, /*Katakana Set*/ 4153 0x1b,0x24,0x42,0x21,0x21 4154 }; 4155 static const uint16_t expectedISO2022JIS8[] = { 4156 0x0041, 0x0042, 4157 0xff61, 0xff88, 0xff99, 4158 0x0041, 0x0042, 4159 0xff71, 0xff83, 4160 0x3000 4161 }; 4162 static const int32_t toISO2022JIS8Offs[]={ 4163 3, 4, 5, 6, 4164 7, 11, 12, 13, 4165 14, 18, 4166 }; 4167 4168 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, 4169 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE); 4170 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, 4171 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE); 4172 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, 4173 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE); 4174 } 4175 4176 } 4177 4178 static void TestJitterbug915(){ 4179 /* tests for roundtripping of the below sequence 4180 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / 4181 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / 4182 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / 4183 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / 4184 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / 4185 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / 4186 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / 4187 */ 4188 static const char cSource[]={ 4189 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 4190 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 4191 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 4192 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 4193 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 4194 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 4195 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, 4196 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 4197 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 4198 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 4199 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 4200 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 4201 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 4202 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 4203 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4204 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4205 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 4206 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 4207 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4208 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4209 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 4210 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 4211 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 4212 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 4213 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 4214 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 4215 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 4216 0x37, 0x20, 0x2A, 0x2F 4217 }; 4218 UChar uTarget[500]={'\0'}; 4219 UChar* utarget=uTarget; 4220 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 4221 4222 char cTarget[500]={'\0'}; 4223 char* ctarget=cTarget; 4224 char* ctargetLimit=cTarget+sizeof(cTarget); 4225 const char* csource=cSource; 4226 const char* tempSrc = cSource; 4227 UErrorCode err=U_ZERO_ERROR; 4228 4229 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); 4230 if(U_FAILURE(err)) { 4231 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 4232 return; 4233 } 4234 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err); 4235 if(U_FAILURE(err)) { 4236 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err)); 4237 return; 4238 } 4239 utargetLimit=utarget; 4240 utarget = uTarget; 4241 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 4242 if(U_FAILURE(err)) { 4243 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err)); 4244 return; 4245 } 4246 ctargetLimit=ctarget; 4247 ctarget =cTarget; 4248 while(ctarget<ctargetLimit){ 4249 if(*ctarget != *tempSrc){ 4250 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ; 4251 } 4252 ++ctarget; 4253 ++tempSrc; 4254 } 4255 4256 ucnv_close(conv); 4257 } 4258 4259 static void 4260 TestISO_2022_CN_EXT() { 4261 /* test input */ 4262 static const uint16_t in[]={ 4263 /* test Non-BMP code points */ 4264 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 4265 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 4266 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 4267 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 4268 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 4269 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 4270 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 4271 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 4272 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 4273 0xD869, 0xDED5, 4274 4275 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 4276 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 4277 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 4278 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4279 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4280 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4281 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4282 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4283 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4284 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4285 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4286 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4287 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4288 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A, 4289 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 4290 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A, 4291 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A, 4292 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A, 4293 4294 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A 4295 4296 }; 4297 4298 const UChar* uSource; 4299 const UChar* uSourceLimit; 4300 const char* cSource; 4301 const char* cSourceLimit; 4302 UChar *uTargetLimit =NULL; 4303 UChar *uTarget; 4304 char *cTarget; 4305 const char *cTargetLimit; 4306 char *cBuf; 4307 UChar *uBuf,*test; 4308 int32_t uBufSize = 180; 4309 UErrorCode errorCode=U_ZERO_ERROR; 4310 UConverter *cnv; 4311 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4312 int32_t* myOff= offsets; 4313 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); 4314 if(U_FAILURE(errorCode)) { 4315 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4316 return; 4317 } 4318 4319 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4320 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4321 uSource = (const UChar*)in; 4322 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4323 cTarget = cBuf; 4324 cTargetLimit = cBuf +uBufSize*5; 4325 uTarget = uBuf; 4326 uTargetLimit = uBuf+ uBufSize*5; 4327 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4328 if(U_FAILURE(errorCode)){ 4329 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4330 return; 4331 } 4332 cSource = cBuf; 4333 cSourceLimit =cTarget; 4334 test =uBuf; 4335 myOff=offsets; 4336 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4337 if(U_FAILURE(errorCode)){ 4338 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4339 return; 4340 } 4341 uSource = (const UChar*)in; 4342 while(uSource<uSourceLimit){ 4343 if(*test!=*uSource){ 4344 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4345 } 4346 else{ 4347 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4348 } 4349 uSource++; 4350 test++; 4351 } 4352 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4353 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4354 /*Test for the condition where there is an invalid character*/ 4355 ucnv_reset(cnv); 4356 { 4357 static const uint8_t source2[]={0x0e,0x24,0x053}; 4358 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); 4359 } 4360 ucnv_close(cnv); 4361 free(uBuf); 4362 free(cBuf); 4363 free(offsets); 4364 } 4365 4366 static void 4367 TestISO_2022_CN() { 4368 /* test input */ 4369 static const uint16_t in[]={ 4370 /* jitterbug 951 */ 4371 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52, 4372 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 4373 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52, 4374 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45, 4375 0x0020, 0x0045, 0x004e, 0x0044, 4376 /**/ 4377 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A, 4378 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A, 4379 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A, 4380 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4381 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4382 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4383 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4384 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4385 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4386 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4387 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4388 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4389 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A, 4390 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A, 4391 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4392 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 4393 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A, 4394 4395 }; 4396 const UChar* uSource; 4397 const UChar* uSourceLimit; 4398 const char* cSource; 4399 const char* cSourceLimit; 4400 UChar *uTargetLimit =NULL; 4401 UChar *uTarget; 4402 char *cTarget; 4403 const char *cTargetLimit; 4404 char *cBuf; 4405 UChar *uBuf,*test; 4406 int32_t uBufSize = 180; 4407 UErrorCode errorCode=U_ZERO_ERROR; 4408 UConverter *cnv; 4409 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4410 int32_t* myOff= offsets; 4411 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); 4412 if(U_FAILURE(errorCode)) { 4413 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4414 return; 4415 } 4416 4417 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4418 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4419 uSource = (const UChar*)in; 4420 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4421 cTarget = cBuf; 4422 cTargetLimit = cBuf +uBufSize*5; 4423 uTarget = uBuf; 4424 uTargetLimit = uBuf+ uBufSize*5; 4425 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4426 if(U_FAILURE(errorCode)){ 4427 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4428 return; 4429 } 4430 cSource = cBuf; 4431 cSourceLimit =cTarget; 4432 test =uBuf; 4433 myOff=offsets; 4434 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4435 if(U_FAILURE(errorCode)){ 4436 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4437 return; 4438 } 4439 uSource = (const UChar*)in; 4440 while(uSource<uSourceLimit){ 4441 if(*test!=*uSource){ 4442 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4443 } 4444 else{ 4445 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4446 } 4447 uSource++; 4448 test++; 4449 } 4450 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); 4451 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4452 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4453 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4454 TestJitterbug930("csISO2022CN"); 4455 /*Test for the condition where there is an invalid character*/ 4456 ucnv_reset(cnv); 4457 { 4458 static const uint8_t source2[]={0x0e,0x24,0x053}; 4459 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); 4460 } 4461 4462 ucnv_close(cnv); 4463 free(uBuf); 4464 free(cBuf); 4465 free(offsets); 4466 } 4467 4468 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */ 4469 typedef struct { 4470 const char * converterName; 4471 const char * inputText; 4472 int inputTextLength; 4473 } EmptySegmentTest; 4474 4475 /* Callback for TestJitterbug6175, should only get called for empty segment errors */ 4476 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, 4477 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) { 4478 if (reason > UCNV_IRREGULAR) { 4479 return; 4480 } 4481 if (reason != UCNV_IRREGULAR) { 4482 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); 4483 } 4484 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ 4485 *err = U_ZERO_ERROR; 4486 ucnv_cbToUWriteSub(toArgs,0,err); 4487 } 4488 4489 enum { kEmptySegmentToUCharsMax = 64 }; 4490 static void TestJitterbug6175(void) { 4491 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; 4492 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; 4493 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; 4494 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; 4495 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; 4496 static const EmptySegmentTest emptySegmentTests[] = { 4497 /* converterName inputText inputTextLength */ 4498 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, 4499 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, 4500 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, 4501 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, 4502 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, 4503 /* terminator: */ 4504 { NULL, NULL, 0, } 4505 }; 4506 const EmptySegmentTest * testPtr; 4507 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { 4508 UErrorCode err = U_ZERO_ERROR; 4509 UConverter * cnv = ucnv_open(testPtr->converterName, &err); 4510 if (U_FAILURE(err)) { 4511 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4512 return; 4513 } 4514 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); 4515 if (U_FAILURE(err)) { 4516 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4517 ucnv_close(cnv); 4518 return; 4519 } 4520 { 4521 UChar toUChars[kEmptySegmentToUCharsMax]; 4522 UChar * toUCharsPtr = toUChars; 4523 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; 4524 const char * inCharsPtr = testPtr->inputText; 4525 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; 4526 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); 4527 } 4528 ucnv_close(cnv); 4529 } 4530 } 4531 4532 static void 4533 TestEBCDIC_STATEFUL() { 4534 /* test input */ 4535 static const uint8_t in[]={ 4536 0x61, 4537 0x1a, 4538 0x0f, 0x4b, 4539 0x42, 4540 0x40, 4541 0x36, 4542 }; 4543 4544 /* expected test results */ 4545 static const int32_t results[]={ 4546 /* number of bytes read, code point */ 4547 1, 0x002f, 4548 1, 0x0092, 4549 2, 0x002e, 4550 1, 0xff62, 4551 1, 0x0020, 4552 1, 0x0096, 4553 4554 }; 4555 static const uint8_t in2[]={ 4556 0x0f, 4557 0xa1, 4558 0x01 4559 }; 4560 4561 /* expected test results */ 4562 static const int32_t results2[]={ 4563 /* number of bytes read, code point */ 4564 2, 0x203E, 4565 1, 0x0001, 4566 }; 4567 4568 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 4569 UErrorCode errorCode=U_ZERO_ERROR; 4570 UConverter *cnv=ucnv_open("ibm-930", &errorCode); 4571 if(U_FAILURE(errorCode)) { 4572 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode)); 4573 return; 4574 } 4575 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); 4576 ucnv_reset(cnv); 4577 /* Test the condition when source >= sourceLimit */ 4578 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 4579 ucnv_reset(cnv); 4580 /*Test for the condition where source > sourcelimit after consuming the shift chracter */ 4581 { 4582 static const uint8_t source1[]={0x0f}; 4583 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); 4584 } 4585 /*Test for the condition where there is an invalid character*/ 4586 ucnv_reset(cnv); 4587 { 4588 static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; 4589 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); 4590 } 4591 ucnv_reset(cnv); 4592 source=(const char*)in2; 4593 limit=(const char*)in2+sizeof(in2); 4594 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); 4595 ucnv_close(cnv); 4596 4597 } 4598 4599 static void 4600 TestGB18030() { 4601 /* test input */ 4602 static const uint8_t in[]={ 4603 0x24, 4604 0x7f, 4605 0x81, 0x30, 0x81, 0x30, 4606 0xa8, 0xbf, 4607 0xa2, 0xe3, 4608 0xd2, 0xbb, 4609 0x82, 0x35, 0x8f, 0x33, 4610 0x84, 0x31, 0xa4, 0x39, 4611 0x90, 0x30, 0x81, 0x30, 4612 0xe3, 0x32, 0x9a, 0x35 4613 #if 0 4614 /* 4615 * Feature removed markus 2000-oct-26 4616 * Only some codepages must match surrogate pairs into supplementary code points - 4617 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c . 4618 * GB 18030 provides direct encodings for supplementary code points, therefore 4619 * it must not combine two single-encoded surrogates into one code point. 4620 */ 4621 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */ 4622 #endif 4623 }; 4624 4625 /* expected test results */ 4626 static const int32_t results[]={ 4627 /* number of bytes read, code point */ 4628 1, 0x24, 4629 1, 0x7f, 4630 4, 0x80, 4631 2, 0x1f9, 4632 2, 0x20ac, 4633 2, 0x4e00, 4634 4, 0x9fa6, 4635 4, 0xffff, 4636 4, 0x10000, 4637 4, 0x10ffff 4638 #if 0 4639 /* Feature removed. See comment above. */ 4640 8, 0x10000 4641 #endif 4642 }; 4643 4644 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ 4645 UErrorCode errorCode=U_ZERO_ERROR; 4646 UConverter *cnv=ucnv_open("gb18030", &errorCode); 4647 if(U_FAILURE(errorCode)) { 4648 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode)); 4649 return; 4650 } 4651 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030"); 4652 ucnv_close(cnv); 4653 } 4654 4655 static void 4656 TestLMBCS() { 4657 /* LMBCS-1 string */ 4658 static const uint8_t pszLMBCS[]={ 4659 0x61, 4660 0x01, 0x29, 4661 0x81, 4662 0xA0, 4663 0x0F, 0x27, 4664 0x0F, 0x91, 4665 0x14, 0x0a, 0x74, 4666 0x14, 0xF6, 0x02, 4667 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ 4668 0x10, 0x88, 0xA0, 4669 }; 4670 4671 /* Unicode UChar32 equivalents */ 4672 static const UChar32 pszUnicode32[]={ 4673 /* code point */ 4674 0x00000061, 4675 0x00002013, 4676 0x000000FC, 4677 0x000000E1, 4678 0x00000007, 4679 0x00000091, 4680 0x00000a74, 4681 0x00000200, 4682 0x00023456, /* code point for surrogate pair */ 4683 0x00005516 4684 }; 4685 4686 /* Unicode UChar equivalents */ 4687 static const UChar pszUnicode[]={ 4688 /* code point */ 4689 0x0061, 4690 0x2013, 4691 0x00FC, 4692 0x00E1, 4693 0x0007, 4694 0x0091, 4695 0x0a74, 4696 0x0200, 4697 0xD84D, /* low surrogate */ 4698 0xDC56, /* high surrogate */ 4699 0x5516 4700 }; 4701 4702 /* expected test results */ 4703 static const int offsets32[]={ 4704 /* number of bytes read, code point */ 4705 0, 4706 1, 4707 3, 4708 4, 4709 5, 4710 7, 4711 9, 4712 12, 4713 15, 4714 21, 4715 24 4716 }; 4717 4718 /* expected test results */ 4719 static const int offsets[]={ 4720 /* number of bytes read, code point */ 4721 0, 4722 1, 4723 3, 4724 4, 4725 5, 4726 7, 4727 9, 4728 12, 4729 15, 4730 18, 4731 21, 4732 24 4733 }; 4734 4735 4736 UConverter *cnv; 4737 4738 #define NAME_LMBCS_1 "LMBCS-1" 4739 #define NAME_LMBCS_2 "LMBCS-2" 4740 4741 4742 /* Some basic open/close/property tests on some LMBCS converters */ 4743 { 4744 4745 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ 4746 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ 4747 char get_subchars [1]; 4748 const char * get_name; 4749 UConverter *cnv1; 4750 UConverter *cnv2; 4751 4752 int8_t len = sizeof(get_subchars); 4753 4754 UErrorCode errorCode=U_ZERO_ERROR; 4755 4756 /* Open */ 4757 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); 4758 if(U_FAILURE(errorCode)) { 4759 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4760 return; 4761 } 4762 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); 4763 if(U_FAILURE(errorCode)) { 4764 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode)); 4765 return; 4766 } 4767 4768 /* Name */ 4769 get_name = ucnv_getName (cnv1, &errorCode); 4770 if (strcmp(NAME_LMBCS_1,get_name)){ 4771 log_err("Unexpected converter name: %s\n", get_name); 4772 } 4773 get_name = ucnv_getName (cnv2, &errorCode); 4774 if (strcmp(NAME_LMBCS_2,get_name)){ 4775 log_err("Unexpected converter name: %s\n", get_name); 4776 } 4777 4778 /* substitution chars */ 4779 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); 4780 if(U_FAILURE(errorCode)) { 4781 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4782 } 4783 if (len!=1){ 4784 log_err("Unexpected length of sub chars\n"); 4785 } 4786 if (get_subchars[0] != expected_subchars[0]){ 4787 log_err("Unexpected value of sub chars\n"); 4788 } 4789 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); 4790 if(U_FAILURE(errorCode)) { 4791 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); 4792 } 4793 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); 4794 if(U_FAILURE(errorCode)) { 4795 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4796 } 4797 if (len!=1){ 4798 log_err("Unexpected length of sub chars\n"); 4799 } 4800 if (get_subchars[0] != new_subchars[0]){ 4801 log_err("Unexpected value of sub chars\n"); 4802 } 4803 ucnv_close(cnv1); 4804 ucnv_close(cnv2); 4805 4806 } 4807 4808 /* LMBCS to Unicode - offsets */ 4809 { 4810 UErrorCode errorCode=U_ZERO_ERROR; 4811 4812 const char * pSource = (const char *)pszLMBCS; 4813 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 4814 4815 UChar Out [sizeof(pszUnicode) + 1]; 4816 UChar * pOut = Out; 4817 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 4818 4819 int32_t off [sizeof(offsets)]; 4820 4821 /* last 'offset' in expected results is just the final size. 4822 (Makes other tests easier). Compensate here: */ 4823 4824 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); 4825 4826 4827 4828 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ 4829 if(U_FAILURE(errorCode)) { 4830 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode)); 4831 return; 4832 } 4833 4834 4835 4836 ucnv_toUnicode (cnv, 4837 &pOut, 4838 OutLimit, 4839 &pSource, 4840 sourceLimit, 4841 off, 4842 TRUE, 4843 &errorCode); 4844 4845 4846 if (memcmp(off,offsets,sizeof(offsets))) 4847 { 4848 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n"); 4849 } 4850 if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) 4851 { 4852 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n"); 4853 } 4854 ucnv_close(cnv); 4855 } 4856 { 4857 /* LMBCS to Unicode - getNextUChar */ 4858 const char * sourceStart; 4859 const char *source=(const char *)pszLMBCS; 4860 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); 4861 const UChar32 *results= pszUnicode32; 4862 const int *off = offsets32; 4863 4864 UErrorCode errorCode=U_ZERO_ERROR; 4865 UChar32 uniChar; 4866 4867 cnv=ucnv_open("LMBCS-1", &errorCode); 4868 if(U_FAILURE(errorCode)) { 4869 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4870 return; 4871 } 4872 else 4873 { 4874 4875 while(source<limit) { 4876 sourceStart=source; 4877 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode); 4878 if(U_FAILURE(errorCode)) { 4879 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode)); 4880 break; 4881 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) { 4882 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 4883 uniChar, (source-sourceStart), *results, *off); 4884 break; 4885 } 4886 results++; 4887 off++; 4888 } 4889 } 4890 ucnv_close(cnv); 4891 } 4892 { /* test locale & optimization group operations: Unicode to LMBCS */ 4893 4894 UErrorCode errorCode=U_ZERO_ERROR; 4895 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); 4896 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); 4897 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); 4898 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ 4899 const UChar * pUniOut = uniString; 4900 UChar * pUniIn = uniString; 4901 uint8_t lmbcsString [4]; 4902 const char * pLMBCSOut = (const char *)lmbcsString; 4903 char * pLMBCSIn = (char *)lmbcsString; 4904 4905 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */ 4906 ucnv_fromUnicode (cnv16he, 4907 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 4908 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 4909 NULL, 1, &errorCode); 4910 4911 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) 4912 { 4913 log_err("LMBCS-16,locale=he gives unexpected translation\n"); 4914 } 4915 4916 pLMBCSIn= (char *)lmbcsString; 4917 pUniOut = uniString; 4918 ucnv_fromUnicode (cnv01us, 4919 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 4920 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 4921 NULL, 1, &errorCode); 4922 4923 if (lmbcsString[0] != 0x9F) 4924 { 4925 log_err("LMBCS-1,locale=US gives unexpected translation\n"); 4926 } 4927 4928 /* single byte char from mbcs char set */ 4929 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ 4930 pLMBCSOut = (const char *)lmbcsString; 4931 pUniIn = uniString; 4932 ucnv_toUnicode (cnv16jp, 4933 &pUniIn, pUniIn + 1, 4934 &pLMBCSOut, (pLMBCSOut + 1), 4935 NULL, 1, &errorCode); 4936 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 4937 { 4938 log_err("Unexpected results from LMBCS-16 single byte char\n"); 4939 } 4940 /* convert to group 1: should be 3 bytes */ 4941 pLMBCSIn = (char *)lmbcsString; 4942 pUniOut = uniString; 4943 ucnv_fromUnicode (cnv01us, 4944 &pLMBCSIn, (const char *)(pLMBCSIn + 3), 4945 &pUniOut, pUniOut + 1, 4946 NULL, 1, &errorCode); 4947 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1 4948 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE) 4949 { 4950 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); 4951 } 4952 pLMBCSOut = (const char *)lmbcsString; 4953 pUniIn = uniString; 4954 ucnv_toUnicode (cnv01us, 4955 &pUniIn, pUniIn + 1, 4956 &pLMBCSOut, (const char *)(pLMBCSOut + 3), 4957 NULL, 1, &errorCode); 4958 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 4959 { 4960 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); 4961 } 4962 pLMBCSIn = (char *)lmbcsString; 4963 pUniOut = uniString; 4964 ucnv_fromUnicode (cnv16jp, 4965 &pLMBCSIn, (const char *)(pLMBCSIn + 1), 4966 &pUniOut, pUniOut + 1, 4967 NULL, 1, &errorCode); 4968 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE) 4969 { 4970 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); 4971 } 4972 ucnv_close(cnv16he); 4973 ucnv_close(cnv16jp); 4974 ucnv_close(cnv01us); 4975 } 4976 { 4977 /* Small source buffer testing, LMBCS -> Unicode */ 4978 4979 UErrorCode errorCode=U_ZERO_ERROR; 4980 4981 const char * pSource = (const char *)pszLMBCS; 4982 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 4983 int codepointCount = 0; 4984 4985 UChar Out [sizeof(pszUnicode) + 1]; 4986 UChar * pOut = Out; 4987 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 4988 4989 4990 cnv = ucnv_open(NAME_LMBCS_1, &errorCode); 4991 if(U_FAILURE(errorCode)) { 4992 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4993 return; 4994 } 4995 4996 4997 while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) 4998 { 4999 ucnv_toUnicode (cnv, 5000 &pOut, 5001 OutLimit, 5002 &pSource, 5003 (pSource+1), /* claim that this is a 1- byte buffer */ 5004 NULL, 5005 FALSE, /* FALSE means there might be more chars in the next buffer */ 5006 &errorCode); 5007 5008 if (U_SUCCESS (errorCode)) 5009 { 5010 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1]) 5011 { 5012 /* we are on to the next code point: check value */ 5013 5014 if (Out[0] != pszUnicode[codepointCount]){ 5015 log_err("LMBCS->Uni result %lx should have been %lx \n", 5016 Out[0], pszUnicode[codepointCount]); 5017 } 5018 5019 pOut = Out; /* reset for accumulating next code point */ 5020 codepointCount++; 5021 } 5022 } 5023 else 5024 { 5025 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode)); 5026 } 5027 } 5028 { 5029 /* limits & surrogate error testing */ 5030 char LIn [sizeof(pszLMBCS)]; 5031 const char * pLIn = LIn; 5032 5033 char LOut [sizeof(pszLMBCS)]; 5034 char * pLOut = LOut; 5035 5036 UChar UOut [sizeof(pszUnicode)]; 5037 UChar * pUOut = UOut; 5038 5039 UChar UIn [sizeof(pszUnicode)]; 5040 const UChar * pUIn = UIn; 5041 5042 int32_t off [sizeof(offsets)]; 5043 UChar32 uniChar; 5044 5045 errorCode=U_ZERO_ERROR; 5046 5047 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ 5048 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn-1,off,FALSE, &errorCode); 5049 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5050 { 5051 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); 5052 } 5053 errorCode=U_ZERO_ERROR; 5054 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); 5055 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5056 { 5057 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode)); 5058 } 5059 errorCode=U_ZERO_ERROR; 5060 5061 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode); 5062 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5063 { 5064 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode)); 5065 } 5066 errorCode=U_ZERO_ERROR; 5067 5068 /* 0 byte source request - no error, no pointer movement */ 5069 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode); 5070 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); 5071 if(U_FAILURE(errorCode)) { 5072 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode)); 5073 } 5074 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn)) 5075 { 5076 log_err("Unexpected pointer move in 0 byte source request \n"); 5077 } 5078 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ 5079 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); 5080 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) 5081 { 5082 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); 5083 } 5084 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ 5085 { 5086 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n"); 5087 } 5088 errorCode = U_ZERO_ERROR; 5089 5090 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ 5091 5092 pUIn = pszUnicode; 5093 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode); 5094 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) 5095 { 5096 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); 5097 } 5098 5099 errorCode = U_ZERO_ERROR; 5100 5101 pLIn = (const char *)pszLMBCS; 5102 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); 5103 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4]) 5104 { 5105 log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); 5106 } 5107 5108 /* unpaired or chopped LMBCS surrogates */ 5109 5110 /* OK high surrogate, Low surrogate is chopped */ 5111 LIn [0] = (char)0x14; 5112 LIn [1] = (char)0xD8; 5113 LIn [2] = (char)0x01; 5114 LIn [3] = (char)0x14; 5115 LIn [4] = (char)0xDC; 5116 pLIn = LIn; 5117 errorCode = U_ZERO_ERROR; 5118 pUOut = UOut; 5119 5120 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 5121 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5122 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5123 { 5124 log_err("Unexpected results on chopped low surrogate\n"); 5125 } 5126 5127 /* chopped at surrogate boundary */ 5128 LIn [0] = (char)0x14; 5129 LIn [1] = (char)0xD8; 5130 LIn [2] = (char)0x01; 5131 pLIn = LIn; 5132 errorCode = U_ZERO_ERROR; 5133 pUOut = UOut; 5134 5135 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); 5136 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) 5137 { 5138 log_err("Unexpected results on chopped at surrogate boundary \n"); 5139 } 5140 5141 /* unpaired surrogate plus valid Unichar */ 5142 LIn [0] = (char)0x14; 5143 LIn [1] = (char)0xD8; 5144 LIn [2] = (char)0x01; 5145 LIn [3] = (char)0x14; 5146 LIn [4] = (char)0xC9; 5147 LIn [5] = (char)0xD0; 5148 pLIn = LIn; 5149 errorCode = U_ZERO_ERROR; 5150 pUOut = UOut; 5151 5152 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); 5153 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) 5154 { 5155 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); 5156 } 5157 5158 /* unpaired surrogate plus chopped Unichar */ 5159 LIn [0] = (char)0x14; 5160 LIn [1] = (char)0xD8; 5161 LIn [2] = (char)0x01; 5162 LIn [3] = (char)0x14; 5163 LIn [4] = (char)0xC9; 5164 5165 pLIn = LIn; 5166 errorCode = U_ZERO_ERROR; 5167 pUOut = UOut; 5168 5169 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5170 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5171 { 5172 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); 5173 } 5174 5175 /* unpaired surrogate plus valid non-Unichar */ 5176 LIn [0] = (char)0x14; 5177 LIn [1] = (char)0xD8; 5178 LIn [2] = (char)0x01; 5179 LIn [3] = (char)0x0F; 5180 LIn [4] = (char)0x3B; 5181 5182 pLIn = LIn; 5183 errorCode = U_ZERO_ERROR; 5184 pUOut = UOut; 5185 5186 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5187 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) 5188 { 5189 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); 5190 } 5191 5192 /* unpaired surrogate plus chopped non-Unichar */ 5193 LIn [0] = (char)0x14; 5194 LIn [1] = (char)0xD8; 5195 LIn [2] = (char)0x01; 5196 LIn [3] = (char)0x0F; 5197 5198 pLIn = LIn; 5199 errorCode = U_ZERO_ERROR; 5200 pUOut = UOut; 5201 5202 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); 5203 5204 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) 5205 { 5206 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n"); 5207 } 5208 } 5209 } 5210 ucnv_close(cnv); /* final cleanup */ 5211 } 5212 5213 5214 static void TestJitterbug255() 5215 { 5216 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; 5217 const char *testBuffer = (const char *)testBytes; 5218 const char *testEnd = (const char *)testBytes + sizeof(testBytes); 5219 UErrorCode status = U_ZERO_ERROR; 5220 UChar32 result; 5221 UConverter *cnv = 0; 5222 5223 cnv = ucnv_open("shift-jis", &status); 5224 if (U_FAILURE(status) || cnv == 0) { 5225 log_data_err("Failed to open the converter for SJIS.\n"); 5226 return; 5227 } 5228 while (testBuffer != testEnd) 5229 { 5230 result = ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); 5231 if (U_FAILURE(status)) 5232 { 5233 log_err("Failed to convert the next UChar for SJIS.\n"); 5234 break; 5235 } 5236 } 5237 ucnv_close(cnv); 5238 } 5239 5240 static void TestEBCDICUS4XML() 5241 { 5242 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; 5243 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; 5244 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; 5245 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; 5246 char target_x[] = {0x00, 0x00, 0x00, 0x00}; 5247 UChar *unicodes = unicodes_x; 5248 const UChar *toUnicodeMaps = toUnicodeMaps_x; 5249 char *target = target_x; 5250 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; 5251 UErrorCode status = U_ZERO_ERROR; 5252 UConverter *cnv = 0; 5253 5254 cnv = ucnv_open("ebcdic-xml-us", &status); 5255 if (U_FAILURE(status) || cnv == 0) { 5256 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); 5257 return; 5258 } 5259 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status); 5260 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) { 5261 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", 5262 u_errorName(status)); 5263 printUSeqErr(unicodes_x, 3); 5264 printUSeqErr(toUnicodeMaps, 3); 5265 } 5266 status = U_ZERO_ERROR; 5267 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status); 5268 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) { 5269 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", 5270 u_errorName(status)); 5271 printSeqErr((const unsigned char*)target_x, 3); 5272 printSeqErr((const unsigned char*)fromUnicodeMaps, 3); 5273 } 5274 ucnv_close(cnv); 5275 } 5276 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ 5277 5278 #if !UCONFIG_NO_COLLATION 5279 5280 static void TestJitterbug981(){ 5281 const UChar* rules; 5282 int32_t rules_length, target_cap, bytes_needed, buff_size; 5283 UErrorCode status = U_ZERO_ERROR; 5284 UConverter *utf8cnv; 5285 UCollator* myCollator; 5286 char *buff; 5287 int numNeeded=0; 5288 utf8cnv = ucnv_open ("utf8", &status); 5289 if(U_FAILURE(status)){ 5290 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status)); 5291 return; 5292 } 5293 myCollator = ucol_open("zh", &status); 5294 if(U_FAILURE(status)){ 5295 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status)); 5296 ucnv_close(utf8cnv); 5297 return; 5298 } 5299 5300 rules = ucol_getRules(myCollator, &rules_length); 5301 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); 5302 buff = malloc(buff_size); 5303 5304 target_cap = 0; 5305 do { 5306 ucnv_reset(utf8cnv); 5307 status = U_ZERO_ERROR; 5308 if(target_cap >= buff_size) { 5309 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size); 5310 break; 5311 } 5312 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, 5313 rules, rules_length, &status); 5314 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5315 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5316 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5317 break; 5318 } 5319 numNeeded = bytes_needed; 5320 } while (status == U_BUFFER_OVERFLOW_ERROR); 5321 ucol_close(myCollator); 5322 ucnv_close(utf8cnv); 5323 free(buff); 5324 } 5325 5326 #endif 5327 5328 static void TestJitterbug1293(){ 5329 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; 5330 char target[256]; 5331 UErrorCode status = U_ZERO_ERROR; 5332 UConverter* conv=NULL; 5333 int32_t target_cap, bytes_needed, numNeeded = 0; 5334 conv = ucnv_open("shift-jis",&status); 5335 if(U_FAILURE(status)){ 5336 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status)); 5337 return; 5338 } 5339 5340 do{ 5341 target_cap =0; 5342 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status); 5343 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5344 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5345 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5346 } 5347 numNeeded = bytes_needed; 5348 } while (status == U_BUFFER_OVERFLOW_ERROR); 5349 if(U_FAILURE(status)){ 5350 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status)); 5351 return; 5352 } 5353 ucnv_close(conv); 5354 } 5355 static void TestJB5275_1(){ 5356 5357 static const char* data = "\x3B\xB3\x0A" /* Easy characters */ 5358 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5359 /* Switch script: */ 5360 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */ 5361 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/ 5362 "\xEF\x40\x3B\xB3\x0A"; 5363 static const UChar expected[] ={ 5364 0x003b, 0x0a15, 0x000a, /* Easy characters */ 5365 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */ 5366 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/ 5367 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/ 5368 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ 5369 }; 5370 5371 UErrorCode status = U_ZERO_ERROR; 5372 UConverter* conv = ucnv_open("iscii-gur", &status); 5373 UChar dest[100] = {'\0'}; 5374 UChar* target = dest; 5375 UChar* targetLimit = dest+100; 5376 const char* source = data; 5377 const char* sourceLimit = data+strlen(data); 5378 const UChar* exp = expected; 5379 5380 if (U_FAILURE(status)) { 5381 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status)); 5382 return; 5383 } 5384 5385 log_verbose("Testing switching back to default script when new line is encountered.\n"); 5386 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5387 if(U_FAILURE(status)){ 5388 log_err("conversion failed: %s \n", u_errorName(status)); 5389 } 5390 targetLimit = target; 5391 target = dest; 5392 printUSeq(target, targetLimit-target); 5393 while(target<targetLimit){ 5394 if(*exp!=*target){ 5395 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5396 } 5397 target++; 5398 exp++; 5399 } 5400 ucnv_close(conv); 5401 } 5402 5403 static void TestJB5275(){ 5404 static const char* data = 5405 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */ 5406 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */ 5407 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */ 5408 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5409 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ 5410 "\xEF\x48\x38\xB3\x0A" /* Kannada test */ 5411 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ 5412 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ 5413 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ 5414 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; 5415 static const UChar expected[] ={ 5416 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */ 5417 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */ 5418 0x0038, 0x0C95, 0x000A, /* Kannada test */ 5419 0x0039, 0x0D15, 0x000A, /* Malayalam test */ 5420 0x003A, 0x0A95, 0x000A, /* Gujarati test */ 5421 0x003B, 0x0A15, 0x000A, /* Punjabi test */ 5422 }; 5423 5424 UErrorCode status = U_ZERO_ERROR; 5425 UConverter* conv = ucnv_open("iscii", &status); 5426 UChar dest[100] = {'\0'}; 5427 UChar* target = dest; 5428 UChar* targetLimit = dest+100; 5429 const char* source = data; 5430 const char* sourceLimit = data+strlen(data); 5431 const UChar* exp = expected; 5432 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5433 if(U_FAILURE(status)){ 5434 log_err("conversion failed: %s \n", u_errorName(status)); 5435 } 5436 targetLimit = target; 5437 target = dest; 5438 5439 printUSeq(target, targetLimit-target); 5440 5441 while(target<targetLimit){ 5442 if(*exp!=*target){ 5443 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5444 } 5445 target++; 5446 exp++; 5447 } 5448 ucnv_close(conv); 5449 } 5450