1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2015, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************* 7 * 8 * File nucnvtst.c 9 * 10 * Modification History: 11 * Name Description 12 * Steven R. Loomis 7/8/1999 Adding input buffer test 13 ******************************************************************************** 14 */ 15 #include <stdio.h> 16 #include "cstring.h" 17 #include "unicode/uloc.h" 18 #include "unicode/ucnv.h" 19 #include "unicode/ucnv_err.h" 20 #include "unicode/ucnv_cb.h" 21 #include "cintltst.h" 22 #include "unicode/utypes.h" 23 #include "unicode/ustring.h" 24 #include "unicode/ucol.h" 25 #include "unicode/utf16.h" 26 #include "cmemory.h" 27 #include "nucnvtst.h" 28 29 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message); 30 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message); 31 #if !UCONFIG_NO_COLLATION 32 static void TestJitterbug981(void); 33 #endif 34 #if !UCONFIG_NO_LEGACY_CONVERSION 35 static void TestJitterbug1293(void); 36 #endif 37 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ; 38 static void TestConverterTypesAndStarters(void); 39 static void TestAmbiguous(void); 40 static void TestSignatureDetection(void); 41 static void TestUTF7(void); 42 static void TestIMAP(void); 43 static void TestUTF8(void); 44 static void TestCESU8(void); 45 static void TestUTF16(void); 46 static void TestUTF16BE(void); 47 static void TestUTF16LE(void); 48 static void TestUTF32(void); 49 static void TestUTF32BE(void); 50 static void TestUTF32LE(void); 51 static void TestLATIN1(void); 52 53 #if !UCONFIG_NO_LEGACY_CONVERSION 54 static void TestSBCS(void); 55 static void TestDBCS(void); 56 static void TestMBCS(void); 57 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 58 static void TestICCRunout(void); 59 #endif 60 61 #ifdef U_ENABLE_GENERIC_ISO_2022 62 static void TestISO_2022(void); 63 #endif 64 65 static void TestISO_2022_JP(void); 66 static void TestISO_2022_JP_1(void); 67 static void TestISO_2022_JP_2(void); 68 static void TestISO_2022_KR(void); 69 static void TestISO_2022_KR_1(void); 70 static void TestISO_2022_CN(void); 71 #if 0 72 /* 73 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 74 */ 75 static void TestISO_2022_CN_EXT(void); 76 #endif 77 static void TestJIS(void); 78 static void TestHZ(void); 79 #endif 80 81 static void TestSCSU(void); 82 83 #if !UCONFIG_NO_LEGACY_CONVERSION 84 static void TestEBCDIC_STATEFUL(void); 85 static void TestGB18030(void); 86 static void TestLMBCS(void); 87 static void TestJitterbug255(void); 88 static void TestEBCDICUS4XML(void); 89 #if 0 90 /* 91 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 92 */ 93 static void TestJitterbug915(void); 94 #endif 95 static void TestISCII(void); 96 97 static void TestCoverageMBCS(void); 98 static void TestJitterbug2346(void); 99 static void TestJitterbug2411(void); 100 static void TestJB5275(void); 101 static void TestJB5275_1(void); 102 static void TestJitterbug6175(void); 103 104 static void TestIsFixedWidth(void); 105 #endif 106 107 static void TestInBufSizes(void); 108 109 static void TestRoundTrippingAllUTF(void); 110 static void TestConv(const uint16_t in[], 111 int len, 112 const char* conv, 113 const char* lang, 114 char byteArr[], 115 int byteArrLen); 116 117 /* open a converter, using test data if it begins with '@' */ 118 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err); 119 120 121 #define NEW_MAX_BUFFER 999 122 123 static int32_t gInBufferSize = NEW_MAX_BUFFER; 124 static int32_t gOutBufferSize = NEW_MAX_BUFFER; 125 static char gNuConvTestName[1024]; 126 127 #define nct_min(x,y) ((x<y) ? x : y) 128 129 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) 130 { 131 if(cnv && cnv[0] == '@') { 132 return ucnv_openPackage(loadTestData(err), cnv+1, err); 133 } else { 134 return ucnv_open(cnv, err); 135 } 136 } 137 138 static void printSeq(const unsigned char* a, int len) 139 { 140 int i=0; 141 log_verbose("{"); 142 while (i<len) 143 log_verbose("0x%02x ", a[i++]); 144 log_verbose("}\n"); 145 } 146 147 static void printUSeq(const UChar* a, int len) 148 { 149 int i=0; 150 log_verbose("{U+"); 151 while (i<len) log_verbose("0x%04x ", a[i++]); 152 log_verbose("}\n"); 153 } 154 155 static void printSeqErr(const unsigned char* a, int len) 156 { 157 int i=0; 158 fprintf(stderr, "{"); 159 while (i<len) 160 fprintf(stderr, "0x%02x ", a[i++]); 161 fprintf(stderr, "}\n"); 162 } 163 164 static void printUSeqErr(const UChar* a, int len) 165 { 166 int i=0; 167 fprintf(stderr, "{U+"); 168 while (i<len) 169 fprintf(stderr, "0x%04x ", a[i++]); 170 fprintf(stderr,"}\n"); 171 } 172 173 static void 174 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message) 175 { 176 const char* s0; 177 const char* s=(char*)source; 178 const int32_t *r=results; 179 UErrorCode errorCode=U_ZERO_ERROR; 180 UChar32 c; 181 182 while(s<limit) { 183 s0=s; 184 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 185 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 186 break; /* no more significant input */ 187 } else if(U_FAILURE(errorCode)) { 188 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 189 break; 190 } else if( 191 /* test the expected number of input bytes only if >=0 */ 192 (*r>=0 && (int32_t)(s-s0)!=*r) || 193 c!=*(r+1) 194 ) { 195 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 196 message, c, (s-s0), *(r+1), *r); 197 break; 198 } 199 r+=2; 200 } 201 } 202 203 static void 204 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message) 205 { 206 const char* s=(char*)source; 207 UErrorCode errorCode=U_ZERO_ERROR; 208 uint32_t c; 209 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 210 if(errorCode != expected){ 211 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode)); 212 } 213 if(c != 0xFFFD && c != 0xffff){ 214 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c); 215 } 216 217 } 218 219 static void TestInBufSizes(void) 220 { 221 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1); 222 #if 1 223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2); 224 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3); 225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4); 226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5); 227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6); 228 TestNewConvertWithBufferSizes(1,1); 229 TestNewConvertWithBufferSizes(2,3); 230 TestNewConvertWithBufferSizes(3,2); 231 #endif 232 } 233 234 static void TestOutBufSizes(void) 235 { 236 #if 1 237 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); 238 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER); 239 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER); 240 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER); 241 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER); 242 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER); 243 244 #endif 245 } 246 247 248 void addTestNewConvert(TestNode** root) 249 { 250 #if !UCONFIG_NO_FILE_IO 251 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes"); 252 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes"); 253 #endif 254 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters"); 255 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous"); 256 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection"); 257 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7"); 258 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP"); 259 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8"); 260 261 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */ 262 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8"); 263 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16"); 264 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE"); 265 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE"); 266 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32"); 267 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE"); 268 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE"); 269 270 #if !UCONFIG_NO_LEGACY_CONVERSION 271 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS"); 272 #endif 273 274 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1"); 275 276 #if !UCONFIG_NO_LEGACY_CONVERSION 277 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS"); 278 #if !UCONFIG_NO_FILE_IO 279 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS"); 280 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout"); 281 #endif 282 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS"); 283 284 #ifdef U_ENABLE_GENERIC_ISO_2022 285 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022"); 286 #endif 287 288 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP"); 289 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS"); 290 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1"); 291 // android-changed (no have ISO_2022_JP_2) -- addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2"); 292 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR"); 293 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1"); 294 // android-changed (no ISO-2022-CN) -- addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN"); 295 /* 296 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 297 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT"); 298 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915"); 299 */ 300 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ"); 301 #endif 302 303 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU"); 304 305 #if !UCONFIG_NO_LEGACY_CONVERSION 306 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL"); 307 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030"); 308 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255"); 309 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML"); 310 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII"); 311 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275"); 312 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1"); 313 #if !UCONFIG_NO_COLLATION 314 // android-removed (no collation tailoring rules) -- addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981"); 315 #endif 316 317 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293"); 318 #endif 319 320 321 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 322 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS"); 323 #endif 324 325 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF"); 326 327 #if !UCONFIG_NO_LEGACY_CONVERSION 328 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346"); 329 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411"); 330 // android-removed (no full ISO2022 CJK tables) -- addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175"); 331 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth"); 332 #endif 333 } 334 335 336 /* Note that this test already makes use of statics, so it's not really 337 multithread safe. 338 This convenience function lets us make the error messages actually useful. 339 */ 340 341 static void setNuConvTestName(const char *codepage, const char *direction) 342 { 343 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 344 codepage, 345 direction, 346 (int)gInBufferSize, 347 (int)gOutBufferSize); 348 } 349 350 typedef enum 351 { 352 TC_OK = 0, /* test was OK */ 353 TC_MISMATCH = 1, /* Match failed - err was printed */ 354 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */ 355 } ETestConvertResult; 356 357 /* Note: This function uses global variables and it will not do offset 358 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 359 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 360 const char *codepage, const int32_t *expectOffsets , UBool useFallback) 361 { 362 UErrorCode status = U_ZERO_ERROR; 363 UConverter *conv = 0; 364 char junkout[NEW_MAX_BUFFER]; /* FIX */ 365 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 366 char *p; 367 const UChar *src; 368 char *end; 369 char *targ; 370 int32_t *offs; 371 int i; 372 int32_t realBufferSize; 373 char *realBufferEnd; 374 const UChar *realSourceEnd; 375 const UChar *sourceLimit; 376 UBool checkOffsets = TRUE; 377 UBool doFlush; 378 379 for(i=0;i<NEW_MAX_BUFFER;i++) 380 junkout[i] = (char)0xF0; 381 for(i=0;i<NEW_MAX_BUFFER;i++) 382 junokout[i] = 0xFF; 383 384 setNuConvTestName(codepage, "FROM"); 385 386 log_verbose("\n========= %s\n", gNuConvTestName); 387 388 conv = my_ucnv_open(codepage, &status); 389 390 if(U_FAILURE(status)) 391 { 392 log_data_err("Couldn't open converter %s\n",codepage); 393 return TC_FAIL; 394 } 395 if(useFallback){ 396 ucnv_setFallback(conv,useFallback); 397 } 398 399 log_verbose("Converter opened..\n"); 400 401 src = source; 402 targ = junkout; 403 offs = junokout; 404 405 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 406 realBufferEnd = junkout + realBufferSize; 407 realSourceEnd = source + sourceLen; 408 409 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 410 checkOffsets = FALSE; 411 412 do 413 { 414 end = nct_min(targ + gOutBufferSize, realBufferEnd); 415 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 416 417 doFlush = (UBool)(sourceLimit == realSourceEnd); 418 419 if(targ == realBufferEnd) { 420 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 421 return TC_FAIL; 422 } 423 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 424 425 426 status = U_ZERO_ERROR; 427 428 ucnv_fromUnicode (conv, 429 &targ, 430 end, 431 &src, 432 sourceLimit, 433 checkOffsets ? offs : NULL, 434 doFlush, /* flush if we're at the end of the input data */ 435 &status); 436 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) ); 437 438 if(U_FAILURE(status)) { 439 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 440 return TC_FAIL; 441 } 442 443 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 444 sourceLen, targ-junkout); 445 446 if(getTestOption(VERBOSITY_OPTION)) 447 { 448 char junk[9999]; 449 char offset_str[9999]; 450 char *ptr; 451 452 junk[0] = 0; 453 offset_str[0] = 0; 454 for(ptr = junkout;ptr<targ;ptr++) { 455 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr)); 456 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout])); 457 } 458 459 log_verbose(junk); 460 printSeq((const uint8_t *)expect, expectLen); 461 if ( checkOffsets ) { 462 log_verbose("\nOffsets:"); 463 log_verbose(offset_str); 464 } 465 log_verbose("\n"); 466 } 467 ucnv_close(conv); 468 469 if(expectLen != targ-junkout) { 470 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 471 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 472 fprintf(stderr, "Got:\n"); 473 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 474 fprintf(stderr, "Expected:\n"); 475 printSeqErr((const unsigned char*)expect, expectLen); 476 return TC_MISMATCH; 477 } 478 479 if (checkOffsets && (expectOffsets != 0) ) { 480 log_verbose("comparing %d offsets..\n", targ-junkout); 481 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 482 log_err("did not get the expected offsets. %s\n", gNuConvTestName); 483 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 484 log_err("\n"); 485 log_err("Got : "); 486 for(p=junkout;p<targ;p++) { 487 log_err("%d,", junokout[p-junkout]); 488 } 489 log_err("\n"); 490 log_err("Expected: "); 491 for(i=0; i<(targ-junkout); i++) { 492 log_err("%d,", expectOffsets[i]); 493 } 494 log_err("\n"); 495 } 496 } 497 498 log_verbose("comparing..\n"); 499 if(!memcmp(junkout, expect, expectLen)) { 500 log_verbose("Matches!\n"); 501 return TC_OK; 502 } else { 503 log_err("String does not match u->%s\n", gNuConvTestName); 504 printUSeqErr(source, sourceLen); 505 fprintf(stderr, "Got:\n"); 506 printSeqErr((const unsigned char *)junkout, expectLen); 507 fprintf(stderr, "Expected:\n"); 508 printSeqErr((const unsigned char *)expect, expectLen); 509 510 return TC_MISMATCH; 511 } 512 } 513 514 /* Note: This function uses global variables and it will not do offset 515 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */ 516 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 517 const char *codepage, const int32_t *expectOffsets, UBool useFallback) 518 { 519 UErrorCode status = U_ZERO_ERROR; 520 UConverter *conv = 0; 521 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 522 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 523 const char *src; 524 const char *realSourceEnd; 525 const char *srcLimit; 526 UChar *p; 527 UChar *targ; 528 UChar *end; 529 int32_t *offs; 530 int i; 531 UBool checkOffsets = TRUE; 532 533 int32_t realBufferSize; 534 UChar *realBufferEnd; 535 536 537 for(i=0;i<NEW_MAX_BUFFER;i++) 538 junkout[i] = 0xFFFE; 539 540 for(i=0;i<NEW_MAX_BUFFER;i++) 541 junokout[i] = -1; 542 543 setNuConvTestName(codepage, "TO"); 544 545 log_verbose("\n========= %s\n", gNuConvTestName); 546 547 conv = my_ucnv_open(codepage, &status); 548 549 if(U_FAILURE(status)) 550 { 551 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 552 return TC_FAIL; 553 } 554 if(useFallback){ 555 ucnv_setFallback(conv,useFallback); 556 } 557 log_verbose("Converter opened..\n"); 558 559 src = (const char *)source; 560 targ = junkout; 561 offs = junokout; 562 563 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 564 realBufferEnd = junkout + realBufferSize; 565 realSourceEnd = src + sourcelen; 566 567 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER ) 568 checkOffsets = FALSE; 569 570 do 571 { 572 end = nct_min( targ + gOutBufferSize, realBufferEnd); 573 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 574 575 if(targ == realBufferEnd) 576 { 577 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName); 578 return TC_FAIL; 579 } 580 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 581 582 /* oldTarg = targ; */ 583 584 status = U_ZERO_ERROR; 585 586 ucnv_toUnicode (conv, 587 &targ, 588 end, 589 &src, 590 srcLimit, 591 checkOffsets ? offs : NULL, 592 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ 593 &status); 594 595 /* offs += (targ-oldTarg); */ 596 597 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */ 598 599 if(U_FAILURE(status)) 600 { 601 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName); 602 return TC_FAIL; 603 } 604 605 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 606 sourcelen, targ-junkout); 607 if(getTestOption(VERBOSITY_OPTION)) 608 { 609 char junk[9999]; 610 char offset_str[9999]; 611 UChar *ptr; 612 613 junk[0] = 0; 614 offset_str[0] = 0; 615 616 for(ptr = junkout;ptr<targ;ptr++) 617 { 618 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr); 619 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]); 620 } 621 622 log_verbose(junk); 623 printUSeq(expect, expectlen); 624 if ( checkOffsets ) 625 { 626 log_verbose("\nOffsets:"); 627 log_verbose(offset_str); 628 } 629 log_verbose("\n"); 630 } 631 ucnv_close(conv); 632 633 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 634 635 if (checkOffsets && (expectOffsets != 0)) 636 { 637 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){ 638 log_err("did not get the expected offsets. %s\n",gNuConvTestName); 639 log_err("Got: "); 640 for(p=junkout;p<targ;p++) { 641 log_err("%d,", junokout[p-junkout]); 642 } 643 log_err("\n"); 644 log_err("Expected: "); 645 for(i=0; i<(targ-junkout); i++) { 646 log_err("%d,", expectOffsets[i]); 647 } 648 log_err("\n"); 649 log_err("output: "); 650 for(i=0; i<(targ-junkout); i++) { 651 log_err("%X,", junkout[i]); 652 } 653 log_err("\n"); 654 log_err("input: "); 655 for(i=0; i<(src-(const char *)source); i++) { 656 log_err("%X,", (unsigned char)source[i]); 657 } 658 log_err("\n"); 659 } 660 } 661 662 if(!memcmp(junkout, expect, expectlen*2)) 663 { 664 log_verbose("Matches!\n"); 665 return TC_OK; 666 } 667 else 668 { 669 log_err("String does not match. %s\n", gNuConvTestName); 670 log_verbose("String does not match. %s\n", gNuConvTestName); 671 printf("\nGot:"); 672 printUSeqErr(junkout, expectlen); 673 printf("\nExpected:"); 674 printUSeqErr(expect, expectlen); 675 return TC_MISMATCH; 676 } 677 } 678 679 680 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize ) 681 { 682 /** test chars #1 */ 683 /* 1 2 3 1Han 2Han 3Han . */ 684 static const UChar sampleText[] = 685 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 }; 686 static const UChar sampleTextRoundTripUnmappable[] = 687 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd }; 688 689 690 static const uint8_t expectedUTF8[] = 691 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 }; 692 static const int32_t toUTF8Offs[] = 693 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 }; 694 static const int32_t fmUTF8Offs[] = 695 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e }; 696 697 #ifdef U_ENABLE_GENERIC_ISO_2022 698 /* Same as UTF8, but with ^[%B preceeding */ 699 static const const uint8_t expectedISO2022[] = 700 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E }; 701 static const int32_t toISO2022Offs[] = 702 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 703 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */ 704 static const int32_t fmISO2022Offs[] = 705 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */ 706 #endif 707 708 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */ 709 static const uint8_t expectedIBM930[] = 710 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f }; 711 static const int32_t toIBM930Offs[] = 712 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 }; 713 static const int32_t fmIBM930Offs[] = 714 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e }; 715 716 /* 1 2 3 0 h1 h2 h3 . MBCS*/ 717 static const uint8_t expectedIBM943[] = 718 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc }; 719 static const int32_t toIBM943Offs [] = 720 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 }; 721 static const int32_t fmIBM943Offs[] = 722 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b }; 723 724 /* 1 2 3 0 h1 h2 h3 . DBCS*/ 725 static const uint8_t expectedIBM9027[] = 726 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe }; 727 static const int32_t toIBM9027Offs [] = 728 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 }; 729 730 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 731 static const uint8_t expectedIBM920[] = 732 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a }; 733 static const int32_t toIBM920Offs [] = 734 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 735 736 /* 1 2 3 0 <?> <?> <?> . SBCS*/ 737 static const uint8_t expectedISO88593[] = 738 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 739 static const int32_t toISO88593Offs[] = 740 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 741 742 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/ 743 static const uint8_t expectedLATIN1[] = 744 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a }; 745 static const int32_t toLATIN1Offs[] = 746 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }; 747 748 749 /* etc */ 750 static const uint8_t expectedUTF16BE[] = 751 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 }; 752 static const int32_t toUTF16BEOffs[]= 753 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 754 static const int32_t fmUTF16BEOffs[] = 755 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 756 757 static const uint8_t expectedUTF16LE[] = 758 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc }; 759 static const int32_t toUTF16LEOffs[]= 760 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 }; 761 static const int32_t fmUTF16LEOffs[] = 762 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 }; 763 764 static const uint8_t expectedUTF32BE[] = 765 { 0x00, 0x00, 0x00, 0x31, 766 0x00, 0x00, 0x00, 0x32, 767 0x00, 0x00, 0x00, 0x33, 768 0x00, 0x00, 0x00, 0x00, 769 0x00, 0x00, 0x4e, 0x00, 770 0x00, 0x00, 0x4e, 0x8c, 771 0x00, 0x00, 0x4e, 0x09, 772 0x00, 0x00, 0x00, 0x2e, 773 0x00, 0x02, 0x00, 0x21 }; 774 static const int32_t toUTF32BEOffs[]= 775 { 0x00, 0x00, 0x00, 0x00, 776 0x01, 0x01, 0x01, 0x01, 777 0x02, 0x02, 0x02, 0x02, 778 0x03, 0x03, 0x03, 0x03, 779 0x04, 0x04, 0x04, 0x04, 780 0x05, 0x05, 0x05, 0x05, 781 0x06, 0x06, 0x06, 0x06, 782 0x07, 0x07, 0x07, 0x07, 783 0x08, 0x08, 0x08, 0x08, 784 0x08, 0x08, 0x08, 0x08 }; 785 static const int32_t fmUTF32BEOffs[] = 786 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 787 788 static const uint8_t expectedUTF32LE[] = 789 { 0x31, 0x00, 0x00, 0x00, 790 0x32, 0x00, 0x00, 0x00, 791 0x33, 0x00, 0x00, 0x00, 792 0x00, 0x00, 0x00, 0x00, 793 0x00, 0x4e, 0x00, 0x00, 794 0x8c, 0x4e, 0x00, 0x00, 795 0x09, 0x4e, 0x00, 0x00, 796 0x2e, 0x00, 0x00, 0x00, 797 0x21, 0x00, 0x02, 0x00 }; 798 static const int32_t toUTF32LEOffs[]= 799 { 0x00, 0x00, 0x00, 0x00, 800 0x01, 0x01, 0x01, 0x01, 801 0x02, 0x02, 0x02, 0x02, 802 0x03, 0x03, 0x03, 0x03, 803 0x04, 0x04, 0x04, 0x04, 804 0x05, 0x05, 0x05, 0x05, 805 0x06, 0x06, 0x06, 0x06, 806 0x07, 0x07, 0x07, 0x07, 807 0x08, 0x08, 0x08, 0x08, 808 0x08, 0x08, 0x08, 0x08 }; 809 static const int32_t fmUTF32LEOffs[] = 810 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 }; 811 812 813 814 815 /** Test chars #2 **/ 816 817 /* Sahha [health], slashed h's */ 818 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 }; 819 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 }; 820 821 /* LMBCS */ 822 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 }; 823 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 }; 824 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 }; 825 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008}; 826 /*********************************** START OF CODE finally *************/ 827 828 gInBufferSize = insize; 829 gOutBufferSize = outsize; 830 831 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize); 832 833 834 /*UTF-8*/ 835 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 836 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE ); 837 838 log_verbose("Test surrogate behaviour for UTF8\n"); 839 { 840 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 }; 841 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac, 842 0xf0, 0x90, 0x90, 0x81, 843 0xef, 0xbf, 0xbd 844 }; 845 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 }; 846 testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]), 847 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE ); 848 849 850 } 851 852 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 853 /*ISO-2022*/ 854 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 855 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE ); 856 #endif 857 858 /*UTF16 LE*/ 859 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 860 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE ); 861 /*UTF16 BE*/ 862 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 863 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE ); 864 /*UTF32 LE*/ 865 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 866 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE ); 867 /*UTF32 BE*/ 868 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 869 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE ); 870 871 /*LATIN_1*/ 872 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 873 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE ); 874 875 #if !UCONFIG_NO_LEGACY_CONVERSION 876 /*EBCDIC_STATEFUL*/ 877 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 878 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE ); 879 880 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 881 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 882 883 /*MBCS*/ 884 885 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 886 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE ); 887 /*DBCS*/ 888 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 889 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE ); 890 /*SBCS*/ 891 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 892 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE ); 893 /*SBCS*/ 894 testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]), 895 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE ); 896 #endif 897 898 899 /****/ 900 901 /*UTF-8*/ 902 testConvertToU(expectedUTF8, sizeof(expectedUTF8), 903 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE); 904 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022) 905 /*ISO-2022*/ 906 testConvertToU(expectedISO2022, sizeof(expectedISO2022), 907 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE); 908 #endif 909 910 /*UTF16 LE*/ 911 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 912 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 913 /*UTF16 BE*/ 914 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE), 915 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE); 916 /*UTF32 LE*/ 917 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE), 918 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE); 919 /*UTF32 BE*/ 920 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE), 921 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE); 922 923 #if !UCONFIG_NO_LEGACY_CONVERSION 924 /*EBCDIC_STATEFUL*/ 925 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable, 926 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE); 927 /*MBCS*/ 928 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable, 929 sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE); 930 #endif 931 932 /* Try it again to make sure it still works */ 933 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE), 934 sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE); 935 936 #if !UCONFIG_NO_LEGACY_CONVERSION 937 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913), 938 malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE); 939 940 testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), 941 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE ); 942 943 /*LMBCS*/ 944 testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), 945 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE ); 946 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS), 947 LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE); 948 #endif 949 950 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */ 951 { 952 /* encode directly set D and set O */ 953 static const uint8_t utf7[] = { 954 /* 955 Hi Mom -+Jjo--! 956 A+ImIDkQ. 957 +- 958 +ZeVnLIqe- 959 */ 960 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 961 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 962 0x2b, 0x2d, 963 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d 964 }; 965 static const UChar unicode[] = { 966 /* 967 Hi Mom -<WHITE SMILING FACE>-! 968 A<NOT IDENTICAL TO><ALPHA>. 969 + 970 [Japanese word "nihongo"] 971 */ 972 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 973 0x41, 0x2262, 0x0391, 0x2e, 974 0x2b, 975 0x65e5, 0x672c, 0x8a9e 976 }; 977 static const int32_t toUnicodeOffsets[] = { 978 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 979 15, 17, 19, 23, 980 24, 981 27, 29, 32 982 }; 983 static const int32_t fromUnicodeOffsets[] = { 984 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 985 11, 12, 12, 12, 13, 13, 13, 13, 14, 986 15, 15, 987 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 988 }; 989 990 /* same but escaping set O (the exclamation mark) */ 991 static const uint8_t utf7Restricted[] = { 992 /* 993 Hi Mom -+Jjo--+ACE- 994 A+ImIDkQ. 995 +- 996 +ZeVnLIqe- 997 */ 998 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d, 999 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e, 1000 0x2b, 0x2d, 1001 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d 1002 }; 1003 static const int32_t toUnicodeOffsetsR[] = { 1004 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15, 1005 19, 21, 23, 27, 1006 28, 1007 31, 33, 36 1008 }; 1009 static const int32_t fromUnicodeOffsetsR[] = { 1010 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10, 1011 11, 12, 12, 12, 13, 13, 13, 13, 14, 1012 15, 15, 1013 16, 16, 16, 17, 17, 17, 18, 18, 18, 18 1014 }; 1015 1016 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE); 1017 1018 testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE); 1019 1020 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE); 1021 1022 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE); 1023 } 1024 1025 /* 1026 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152, 1027 * modified according to RFC 2060, 1028 * and supplemented with the one example in RFC 2060 itself. 1029 */ 1030 { 1031 static const uint8_t imap[] = { 1032 /* Hi Mom -&Jjo--! 1033 A&ImIDkQ-. 1034 &- 1035 &ZeVnLIqe- 1036 \ 1037 ~peter 1038 /mail 1039 /&ZeVnLIqe- 1040 /&U,BTFw- 1041 */ 1042 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21, 1043 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e, 1044 0x26, 0x2d, 1045 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1046 0x5c, 1047 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1048 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1049 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d, 1050 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d 1051 }; 1052 static const UChar unicode[] = { 1053 /* Hi Mom -<WHITE SMILING FACE>-! 1054 A<NOT IDENTICAL TO><ALPHA>. 1055 & 1056 [Japanese word "nihongo"] 1057 \ 1058 ~peter 1059 /mail 1060 /<65e5, 672c, 8a9e> 1061 /<53f0, 5317> 1062 */ 1063 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21, 1064 0x41, 0x2262, 0x0391, 0x2e, 1065 0x26, 1066 0x65e5, 0x672c, 0x8a9e, 1067 0x5c, 1068 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72, 1069 0x2f, 0x6d, 0x61, 0x69, 0x6c, 1070 0x2f, 0x65e5, 0x672c, 0x8a9e, 1071 0x2f, 0x53f0, 0x5317 1072 }; 1073 static const int32_t toUnicodeOffsets[] = { 1074 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14, 1075 15, 17, 19, 24, 1076 25, 1077 28, 30, 33, 1078 37, 1079 38, 39, 40, 41, 42, 43, 1080 44, 45, 46, 47, 48, 1081 49, 51, 53, 56, 1082 60, 62, 64 1083 }; 1084 static const int32_t fromUnicodeOffsets[] = { 1085 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 1086 11, 12, 12, 12, 13, 13, 13, 13, 13, 14, 1087 15, 15, 1088 16, 16, 16, 17, 17, 17, 18, 18, 18, 18, 1089 19, 1090 20, 21, 22, 23, 24, 25, 1091 26, 27, 28, 29, 30, 1092 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34, 1093 35, 36, 36, 36, 37, 37, 37, 37, 37 1094 }; 1095 1096 testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE); 1097 1098 testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE); 1099 } 1100 1101 /* Test UTF-8 bad data handling*/ 1102 { 1103 static const uint8_t utf8[]={ 1104 0x61, 1105 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1106 0x00, 1107 0x62, 1108 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1109 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */ 1110 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */ 1111 0xdf, 0xbf, /* 7ff */ 1112 0xbf, /* truncated tail */ 1113 0xf4, 0x90, 0x80, 0x80, /* 11FFFF */ 1114 0x02 1115 }; 1116 1117 static const uint16_t utf8Expected[]={ 1118 0x0061, 1119 0xfffd, 1120 0x0000, 1121 0x0062, 1122 0xfffd, 1123 0xfffd, 1124 0xdbff, 0xdfff, 1125 0x07ff, 1126 0xfffd, 1127 0xfffd, 1128 0x0002 1129 }; 1130 1131 static const int32_t utf8Offsets[]={ 1132 0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28 1133 }; 1134 testConvertToU(utf8, sizeof(utf8), 1135 utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE); 1136 1137 } 1138 1139 /* Test UTF-32BE bad data handling*/ 1140 { 1141 static const uint8_t utf32[]={ 1142 0x00, 0x00, 0x00, 0x61, 1143 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 1144 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1145 0x00, 0x00, 0x00, 0x62, 1146 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1147 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 1148 0x00, 0x00, 0x01, 0x62, 1149 0x00, 0x00, 0x02, 0x62 1150 }; 1151 static const uint16_t utf32Expected[]={ 1152 0x0061, 1153 0xfffd, /* 0x110000 out of range */ 1154 0xDBFF, /* 0x10FFFF in range */ 1155 0xDFFF, 1156 0x0062, 1157 0xfffd, /* 0xffffffff out of range */ 1158 0xfffd, /* 0x7fffffff out of range */ 1159 0x0162, 1160 0x0262 1161 }; 1162 static const int32_t utf32Offsets[]={ 1163 0, 4, 8, 8, 12, 16, 20, 24, 28 1164 }; 1165 static const uint8_t utf32ExpectedBack[]={ 1166 0x00, 0x00, 0x00, 0x61, 1167 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */ 1168 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */ 1169 0x00, 0x00, 0x00, 0x62, 1170 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */ 1171 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */ 1172 0x00, 0x00, 0x01, 0x62, 1173 0x00, 0x00, 0x02, 0x62 1174 }; 1175 static const int32_t utf32OffsetsBack[]={ 1176 0,0,0,0, 1177 1,1,1,1, 1178 2,2,2,2, 1179 4,4,4,4, 1180 5,5,5,5, 1181 6,6,6,6, 1182 7,7,7,7, 1183 8,8,8,8 1184 }; 1185 1186 testConvertToU(utf32, sizeof(utf32), 1187 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE); 1188 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1189 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE); 1190 } 1191 1192 /* Test UTF-32LE bad data handling*/ 1193 { 1194 static const uint8_t utf32[]={ 1195 0x61, 0x00, 0x00, 0x00, 1196 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 1197 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1198 0x62, 0x00, 0x00, 0x00, 1199 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 1200 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 1201 0x62, 0x01, 0x00, 0x00, 1202 0x62, 0x02, 0x00, 0x00, 1203 }; 1204 1205 static const uint16_t utf32Expected[]={ 1206 0x0061, 1207 0xfffd, /* 0x110000 out of range */ 1208 0xDBFF, /* 0x10FFFF in range */ 1209 0xDFFF, 1210 0x0062, 1211 0xfffd, /* 0xffffffff out of range */ 1212 0xfffd, /* 0x7fffffff out of range */ 1213 0x0162, 1214 0x0262 1215 }; 1216 static const int32_t utf32Offsets[]={ 1217 0, 4, 8, 8, 12, 16, 20, 24, 28 1218 }; 1219 static const uint8_t utf32ExpectedBack[]={ 1220 0x61, 0x00, 0x00, 0x00, 1221 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */ 1222 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */ 1223 0x62, 0x00, 0x00, 0x00, 1224 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */ 1225 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */ 1226 0x62, 0x01, 0x00, 0x00, 1227 0x62, 0x02, 0x00, 0x00 1228 }; 1229 static const int32_t utf32OffsetsBack[]={ 1230 0,0,0,0, 1231 1,1,1,1, 1232 2,2,2,2, 1233 4,4,4,4, 1234 5,5,5,5, 1235 6,6,6,6, 1236 7,7,7,7, 1237 8,8,8,8 1238 }; 1239 testConvertToU(utf32, sizeof(utf32), 1240 utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE ); 1241 testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), 1242 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE); 1243 } 1244 } 1245 1246 static void TestCoverageMBCS(){ 1247 #if 0 1248 UErrorCode status = U_ZERO_ERROR; 1249 const char *directory = loadTestData(&status); 1250 char* tdpath = NULL; 1251 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1)); 1252 int len = strlen(directory); 1253 char* index=NULL; 1254 1255 tdpath = (char*) malloc(sizeof(char) * (len * 2)); 1256 uprv_strcpy(saveDirectory,u_getDataDirectory()); 1257 log_verbose("Retrieved data directory %s \n",saveDirectory); 1258 uprv_strcpy(tdpath,directory); 1259 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR); 1260 1261 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){ 1262 *(index+1)=0; 1263 } 1264 u_setDataDirectory(tdpath); 1265 log_verbose("ICU data directory is set to: %s \n" ,tdpath); 1266 #endif 1267 1268 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm 1269 which is test file for MBCS conversion with single-byte codepage data.*/ 1270 { 1271 1272 /* MBCS with single byte codepage data test1.ucm*/ 1273 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003}; 1274 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,}; 1275 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, }; 1276 1277 /*from Unicode*/ 1278 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1279 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE ); 1280 } 1281 1282 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm 1283 which is test file for MBCS conversion with three-byte codepage data.*/ 1284 { 1285 1286 /* MBCS with three byte codepage data test3.ucm*/ 1287 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1288 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,}; 1289 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8}; 1290 1291 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,}; 1292 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1293 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 }; 1294 1295 /*from Unicode*/ 1296 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1297 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE ); 1298 1299 /*to Unicode*/ 1300 testConvertToU(test3input, sizeof(test3input), 1301 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE); 1302 1303 } 1304 1305 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm 1306 which is test file for MBCS conversion with four-byte codepage data.*/ 1307 { 1308 1309 /* MBCS with three byte codepage data test4.ucm*/ 1310 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e}; 1311 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,}; 1312 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,}; 1313 1314 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,}; 1315 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; 1316 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,}; 1317 1318 /*from Unicode*/ 1319 testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 1320 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE ); 1321 1322 /*to Unicode*/ 1323 testConvertToU(test4input, sizeof(test4input), 1324 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE ); 1325 1326 } 1327 #if 0 1328 free(tdpath); 1329 /* restore the original data directory */ 1330 log_verbose("Setting the data directory to %s \n", saveDirectory); 1331 u_setDataDirectory(saveDirectory); 1332 free(saveDirectory); 1333 #endif 1334 1335 } 1336 1337 static void TestConverterType(const char *convName, UConverterType convType) { 1338 UConverter* myConverter; 1339 UErrorCode err = U_ZERO_ERROR; 1340 1341 myConverter = my_ucnv_open(convName, &err); 1342 1343 if (U_FAILURE(err)) { 1344 log_data_err("Failed to create an %s converter\n", convName); 1345 return; 1346 } 1347 else 1348 { 1349 if (ucnv_getType(myConverter)!=convType) { 1350 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n", 1351 convName, convType); 1352 } 1353 else { 1354 log_verbose("ucnv_getType %s ok\n", convName); 1355 } 1356 } 1357 ucnv_close(myConverter); 1358 } 1359 1360 static void TestConverterTypesAndStarters() 1361 { 1362 #if !UCONFIG_NO_LEGACY_CONVERSION 1363 UConverter* myConverter; 1364 UErrorCode err = U_ZERO_ERROR; 1365 UBool mystarters[256]; 1366 1367 /* const UBool expectedKSCstarters[256] = { 1368 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1369 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1370 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1371 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1372 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1373 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1374 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1375 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1376 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1377 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1378 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1379 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1380 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 1382 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1383 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1384 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1385 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, 1386 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1387 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1388 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1389 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1390 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1391 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1392 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, 1393 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/ 1394 1395 1396 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types."); 1397 1398 myConverter = ucnv_open("ksc", &err); 1399 if (U_FAILURE(err)) { 1400 log_data_err("Failed to create an ibm-ksc converter\n"); 1401 return; 1402 } 1403 else 1404 { 1405 if (ucnv_getType(myConverter)!=UCNV_MBCS) 1406 log_err("ucnv_getType Failed for ibm-949\n"); 1407 else 1408 log_verbose("ucnv_getType ibm-949 ok\n"); 1409 1410 if(myConverter!=NULL) 1411 ucnv_getStarters(myConverter, mystarters, &err); 1412 1413 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters))) 1414 log_err("Failed ucnv_getStarters for ksc\n"); 1415 else 1416 log_verbose("ucnv_getStarters ok\n");*/ 1417 1418 } 1419 ucnv_close(myConverter); 1420 1421 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL); 1422 TestConverterType("ibm-878", UCNV_SBCS); 1423 #endif 1424 1425 TestConverterType("iso-8859-1", UCNV_LATIN_1); 1426 1427 TestConverterType("ibm-1208", UCNV_UTF8); 1428 1429 TestConverterType("utf-8", UCNV_UTF8); 1430 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian); 1431 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian); 1432 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian); 1433 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian); 1434 1435 #if !UCONFIG_NO_LEGACY_CONVERSION 1436 1437 #if defined(U_ENABLE_GENERIC_ISO_2022) 1438 TestConverterType("iso-2022", UCNV_ISO_2022); 1439 #endif 1440 1441 TestConverterType("hz", UCNV_HZ); 1442 #endif 1443 1444 TestConverterType("scsu", UCNV_SCSU); 1445 1446 #if !UCONFIG_NO_LEGACY_CONVERSION 1447 TestConverterType("x-iscii-de", UCNV_ISCII); 1448 #endif 1449 1450 TestConverterType("ascii", UCNV_US_ASCII); 1451 TestConverterType("utf-7", UCNV_UTF7); 1452 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX); 1453 TestConverterType("bocu-1", UCNV_BOCU1); 1454 } 1455 1456 static void 1457 TestAmbiguousConverter(UConverter *cnv) { 1458 static const char inBytes[3]={ 0x61, 0x5B, 0x5c }; 1459 UChar outUnicode[20]={ 0, 0, 0, 0 }; 1460 1461 const char *s; 1462 UChar *u; 1463 UErrorCode errorCode; 1464 UBool isAmbiguous; 1465 1466 /* try to convert an 'a', a square bracket and a US-ASCII backslash */ 1467 errorCode=U_ZERO_ERROR; 1468 s=inBytes; 1469 u=outUnicode; 1470 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode); 1471 if(U_FAILURE(errorCode)) { 1472 /* we do not care about general failures in this test; the input may just not be mappable */ 1473 return; 1474 } 1475 1476 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) { 1477 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */ 1478 /* There are some encodings that are partially ASCII based, 1479 like the ISO-7 and GSM series of codepages, which we ignore. */ 1480 return; 1481 } 1482 1483 isAmbiguous=ucnv_isAmbiguous(cnv); 1484 1485 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */ 1486 if((outUnicode[2]!=0x5c)!=isAmbiguous) { 1487 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n", 1488 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous); 1489 return; 1490 } 1491 1492 if(outUnicode[2]!=0x5c) { 1493 /* needs fixup, fix it */ 1494 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode)); 1495 if(outUnicode[2]!=0x5c) { 1496 /* the fix failed */ 1497 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode)); 1498 return; 1499 } 1500 } 1501 } 1502 1503 static void TestAmbiguous() 1504 { 1505 UErrorCode status = U_ZERO_ERROR; 1506 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv; 1507 static const char target[] = { 1508 /* "\\usr\\local\\share\\data\\icutest.txt" */ 1509 0x5c, 0x75, 0x73, 0x72, 1510 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c, 1511 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65, 1512 0x5c, 0x64, 0x61, 0x74, 0x61, 1513 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74, 1514 0 1515 }; 1516 UChar asciiResult[200], sjisResult[200]; 1517 int32_t /*asciiLength = 0,*/ sjisLength = 0, i; 1518 const char *name; 1519 1520 /* enumerate all converters */ 1521 status=U_ZERO_ERROR; 1522 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) { 1523 cnv=ucnv_open(name, &status); 1524 if(U_SUCCESS(status)) { 1525 /* BEGIN android-changed. To save space Android does not build full ISO-2022-CN CJK tables. */ 1526 const char* cnvName = ucnv_getName(cnv, &status); 1527 if (strlen(cnvName) < 8 || 1528 strncmp(cnvName, "ISO_2022_CN", 8) != 0) { 1529 TestAmbiguousConverter(cnv); 1530 } 1531 /* END android-changed */ 1532 ucnv_close(cnv); 1533 } else { 1534 log_err("error: unable to open available converter \"%s\"\n", name); 1535 status=U_ZERO_ERROR; 1536 } 1537 } 1538 1539 #if !UCONFIG_NO_LEGACY_CONVERSION 1540 sjis_cnv = ucnv_open("ibm-943", &status); 1541 if (U_FAILURE(status)) 1542 { 1543 log_data_err("Failed to create a SJIS converter\n"); 1544 return; 1545 } 1546 ascii_cnv = ucnv_open("LATIN-1", &status); 1547 if (U_FAILURE(status)) 1548 { 1549 log_data_err("Failed to create a LATIN-1 converter\n"); 1550 ucnv_close(sjis_cnv); 1551 return; 1552 } 1553 /* convert target from SJIS to Unicode */ 1554 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1555 if (U_FAILURE(status)) 1556 { 1557 log_err("Failed to convert the SJIS string.\n"); 1558 ucnv_close(sjis_cnv); 1559 ucnv_close(ascii_cnv); 1560 return; 1561 } 1562 /* convert target from Latin-1 to Unicode */ 1563 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status); 1564 if (U_FAILURE(status)) 1565 { 1566 log_err("Failed to convert the Latin-1 string.\n"); 1567 ucnv_close(sjis_cnv); 1568 ucnv_close(ascii_cnv); 1569 return; 1570 } 1571 if (!ucnv_isAmbiguous(sjis_cnv)) 1572 { 1573 log_err("SJIS converter should contain ambiguous character mappings.\n"); 1574 ucnv_close(sjis_cnv); 1575 ucnv_close(ascii_cnv); 1576 return; 1577 } 1578 if (u_strcmp(sjisResult, asciiResult) == 0) 1579 { 1580 log_err("File separators for SJIS don't need to be fixed.\n"); 1581 } 1582 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength); 1583 if (u_strcmp(sjisResult, asciiResult) != 0) 1584 { 1585 log_err("Fixing file separator for SJIS failed.\n"); 1586 } 1587 ucnv_close(sjis_cnv); 1588 ucnv_close(ascii_cnv); 1589 #endif 1590 } 1591 1592 static void 1593 TestSignatureDetection(){ 1594 /* with null terminated strings */ 1595 { 1596 static const char* data[] = { 1597 "\xFE\xFF\x00\x00", /* UTF-16BE */ 1598 "\xFF\xFE\x00\x00", /* UTF-16LE */ 1599 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1600 "\x0E\xFE\xFF\x00", /* SCSU */ 1601 1602 "\xFE\xFF", /* UTF-16BE */ 1603 "\xFF\xFE", /* UTF-16LE */ 1604 "\xEF\xBB\xBF", /* UTF-8 */ 1605 "\x0E\xFE\xFF", /* SCSU */ 1606 1607 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1608 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1609 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1610 "\x0E\xFE\xFF\x41", /* SCSU */ 1611 1612 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */ 1613 "\x2B\x2F\x76\x38\x41", /* UTF-7 */ 1614 "\x2B\x2F\x76\x39\x41", /* UTF-7 */ 1615 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */ 1616 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */ 1617 1618 "\xDD\x73\x66\x73" /* UTF-EBCDIC */ 1619 }; 1620 static const char* expected[] = { 1621 "UTF-16BE", 1622 "UTF-16LE", 1623 "UTF-8", 1624 "SCSU", 1625 1626 "UTF-16BE", 1627 "UTF-16LE", 1628 "UTF-8", 1629 "SCSU", 1630 1631 "UTF-16BE", 1632 "UTF-16LE", 1633 "UTF-8", 1634 "SCSU", 1635 1636 "UTF-7", 1637 "UTF-7", 1638 "UTF-7", 1639 "UTF-7", 1640 "UTF-7", 1641 "UTF-EBCDIC" 1642 }; 1643 static const int32_t expectedLength[] ={ 1644 2, 1645 2, 1646 3, 1647 3, 1648 1649 2, 1650 2, 1651 3, 1652 3, 1653 1654 2, 1655 2, 1656 3, 1657 3, 1658 1659 5, 1660 4, 1661 4, 1662 4, 1663 4, 1664 4 1665 }; 1666 int i=0; 1667 UErrorCode err; 1668 int32_t signatureLength = -1; 1669 const char* source = NULL; 1670 const char* enc = NULL; 1671 for( ; i<sizeof(data)/sizeof(char*); i++){ 1672 err = U_ZERO_ERROR; 1673 source = data[i]; 1674 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err); 1675 if(U_FAILURE(err)){ 1676 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1677 continue; 1678 } 1679 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1680 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1681 continue; 1682 } 1683 if(signatureLength != expectedLength[i]){ 1684 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1685 } 1686 } 1687 } 1688 { 1689 static const char* data[] = { 1690 "\xFE\xFF\x00", /* UTF-16BE */ 1691 "\xFF\xFE\x00", /* UTF-16LE */ 1692 "\xEF\xBB\xBF\x00", /* UTF-8 */ 1693 "\x0E\xFE\xFF\x00", /* SCSU */ 1694 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1695 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1696 "\xFE\xFF", /* UTF-16BE */ 1697 "\xFF\xFE", /* UTF-16LE */ 1698 "\xEF\xBB\xBF", /* UTF-8 */ 1699 "\x0E\xFE\xFF", /* SCSU */ 1700 "\x00\x00\xFE\xFF", /* UTF-32BE */ 1701 "\xFF\xFE\x00\x00", /* UTF-32LE */ 1702 "\xFE\xFF\x41\x42", /* UTF-16BE */ 1703 "\xFF\xFE\x41\x41", /* UTF-16LE */ 1704 "\xEF\xBB\xBF\x41", /* UTF-8 */ 1705 "\x0E\xFE\xFF\x41", /* SCSU */ 1706 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */ 1707 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */ 1708 "\xFB\xEE\x28", /* BOCU-1 */ 1709 "\xFF\x41\x42" /* NULL */ 1710 }; 1711 static const int len[] = { 1712 3, 1713 3, 1714 4, 1715 4, 1716 4, 1717 4, 1718 2, 1719 2, 1720 3, 1721 3, 1722 4, 1723 4, 1724 4, 1725 4, 1726 4, 1727 4, 1728 5, 1729 5, 1730 3, 1731 3 1732 }; 1733 1734 static const char* expected[] = { 1735 "UTF-16BE", 1736 "UTF-16LE", 1737 "UTF-8", 1738 "SCSU", 1739 "UTF-32BE", 1740 "UTF-32LE", 1741 "UTF-16BE", 1742 "UTF-16LE", 1743 "UTF-8", 1744 "SCSU", 1745 "UTF-32BE", 1746 "UTF-32LE", 1747 "UTF-16BE", 1748 "UTF-16LE", 1749 "UTF-8", 1750 "SCSU", 1751 "UTF-32BE", 1752 "UTF-32LE", 1753 "BOCU-1", 1754 NULL 1755 }; 1756 static const int32_t expectedLength[] ={ 1757 2, 1758 2, 1759 3, 1760 3, 1761 4, 1762 4, 1763 2, 1764 2, 1765 3, 1766 3, 1767 4, 1768 4, 1769 2, 1770 2, 1771 3, 1772 3, 1773 4, 1774 4, 1775 3, 1776 0 1777 }; 1778 int i=0; 1779 UErrorCode err; 1780 int32_t signatureLength = -1; 1781 int32_t sourceLength=-1; 1782 const char* source = NULL; 1783 const char* enc = NULL; 1784 for( ; i<sizeof(data)/sizeof(char*); i++){ 1785 err = U_ZERO_ERROR; 1786 source = data[i]; 1787 sourceLength = len[i]; 1788 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err); 1789 if(U_FAILURE(err)){ 1790 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err)); 1791 continue; 1792 } 1793 if(enc == NULL || strcmp(enc,expected[i]) !=0){ 1794 if(expected[i] !=NULL){ 1795 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc); 1796 continue; 1797 } 1798 } 1799 if(signatureLength != expectedLength[i]){ 1800 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]); 1801 } 1802 } 1803 } 1804 } 1805 1806 static void TestUTF7() { 1807 /* test input */ 1808 static const uint8_t in[]={ 1809 /* H - +Jjo- - ! +- +2AHcAQ */ 1810 0x48, 1811 0x2d, 1812 0x2b, 0x4a, 0x6a, 0x6f, 1813 0x2d, 0x2d, 1814 0x21, 1815 0x2b, 0x2d, 1816 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51 1817 }; 1818 1819 /* expected test results */ 1820 static const int32_t results[]={ 1821 /* number of bytes read, code point */ 1822 1, 0x48, 1823 1, 0x2d, 1824 4, 0x263a, /* <WHITE SMILING FACE> */ 1825 2, 0x2d, 1826 1, 0x21, 1827 2, 0x2b, 1828 7, 0x10401 1829 }; 1830 1831 const char *cnvName; 1832 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1833 UErrorCode errorCode=U_ZERO_ERROR; 1834 UConverter *cnv=ucnv_open("UTF-7", &errorCode); 1835 if(U_FAILURE(errorCode)) { 1836 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); 1837 return; 1838 } 1839 TestNextUChar(cnv, source, limit, results, "UTF-7"); 1840 /* Test the condition when source >= sourceLimit */ 1841 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1842 cnvName = ucnv_getName(cnv, &errorCode); 1843 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) { 1844 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1845 } 1846 ucnv_close(cnv); 1847 } 1848 1849 static void TestIMAP() { 1850 /* test input */ 1851 static const uint8_t in[]={ 1852 /* H - &Jjo- - ! &- &2AHcAQ- \ */ 1853 0x48, 1854 0x2d, 1855 0x26, 0x4a, 0x6a, 0x6f, 1856 0x2d, 0x2d, 1857 0x21, 1858 0x26, 0x2d, 1859 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d 1860 }; 1861 1862 /* expected test results */ 1863 static const int32_t results[]={ 1864 /* number of bytes read, code point */ 1865 1, 0x48, 1866 1, 0x2d, 1867 4, 0x263a, /* <WHITE SMILING FACE> */ 1868 2, 0x2d, 1869 1, 0x21, 1870 2, 0x26, 1871 7, 0x10401 1872 }; 1873 1874 const char *cnvName; 1875 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 1876 UErrorCode errorCode=U_ZERO_ERROR; 1877 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode); 1878 if(U_FAILURE(errorCode)) { 1879 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); 1880 return; 1881 } 1882 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name"); 1883 /* Test the condition when source >= sourceLimit */ 1884 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1885 cnvName = ucnv_getName(cnv, &errorCode); 1886 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) { 1887 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode)); 1888 } 1889 ucnv_close(cnv); 1890 } 1891 1892 static void TestUTF8() { 1893 /* test input */ 1894 static const uint8_t in[]={ 1895 0x61, 1896 0xc2, 0x80, 1897 0xe0, 0xa0, 0x80, 1898 0xf0, 0x90, 0x80, 0x80, 1899 0xf4, 0x84, 0x8c, 0xa1, 1900 0xf0, 0x90, 0x90, 0x81 1901 }; 1902 1903 /* expected test results */ 1904 static const int32_t results[]={ 1905 /* number of bytes read, code point */ 1906 1, 0x61, 1907 2, 0x80, 1908 3, 0x800, 1909 4, 0x10000, 1910 4, 0x104321, 1911 4, 0x10401 1912 }; 1913 1914 /* error test input */ 1915 static const uint8_t in2[]={ 1916 0x61, 1917 0xc0, 0x80, /* illegal non-shortest form */ 1918 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1919 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1920 0xc0, 0xc0, /* illegal trail byte */ 1921 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1922 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1923 0xfe, /* illegal byte altogether */ 1924 0x62 1925 }; 1926 1927 /* expected error test results */ 1928 static const int32_t results2[]={ 1929 /* number of bytes read, code point */ 1930 1, 0x61, 1931 22, 0x62 1932 }; 1933 1934 UConverterToUCallback cb; 1935 const void *p; 1936 1937 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 1938 UErrorCode errorCode=U_ZERO_ERROR; 1939 UConverter *cnv=ucnv_open("UTF-8", &errorCode); 1940 if(U_FAILURE(errorCode)) { 1941 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode)); 1942 return; 1943 } 1944 TestNextUChar(cnv, source, limit, results, "UTF-8"); 1945 /* Test the condition when source >= sourceLimit */ 1946 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 1947 1948 /* test error behavior with a skip callback */ 1949 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 1950 source=(const char *)in2; 1951 limit=(const char *)(in2+sizeof(in2)); 1952 TestNextUChar(cnv, source, limit, results2, "UTF-8"); 1953 1954 ucnv_close(cnv); 1955 } 1956 1957 static void TestCESU8() { 1958 /* test input */ 1959 static const uint8_t in[]={ 1960 0x61, 1961 0xc2, 0x80, 1962 0xe0, 0xa0, 0x80, 1963 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 1964 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82, 1965 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf, 1966 0xef, 0xbf, 0xbc 1967 }; 1968 1969 /* expected test results */ 1970 static const int32_t results[]={ 1971 /* number of bytes read, code point */ 1972 1, 0x61, 1973 2, 0x80, 1974 3, 0x800, 1975 6, 0x10000, 1976 3, 0xdc01, 1977 -1,0xd802, /* may read 3 or 6 bytes */ 1978 -1,0x10ffff,/* may read 0 or 3 bytes */ 1979 3, 0xfffc 1980 }; 1981 1982 /* error test input */ 1983 static const uint8_t in2[]={ 1984 0x61, 1985 0xc0, 0x80, /* illegal non-shortest form */ 1986 0xe0, 0x80, 0x80, /* illegal non-shortest form */ 1987 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */ 1988 0xc0, 0xc0, /* illegal trail byte */ 1989 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */ 1990 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */ 1991 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */ 1992 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */ 1993 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */ 1994 0xfe, /* illegal byte altogether */ 1995 0x62 1996 }; 1997 1998 /* expected error test results */ 1999 static const int32_t results2[]={ 2000 /* number of bytes read, code point */ 2001 1, 0x61, 2002 34, 0x62 2003 }; 2004 2005 UConverterToUCallback cb; 2006 const void *p; 2007 2008 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); 2009 UErrorCode errorCode=U_ZERO_ERROR; 2010 UConverter *cnv=ucnv_open("CESU-8", &errorCode); 2011 if(U_FAILURE(errorCode)) { 2012 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode)); 2013 return; 2014 } 2015 TestNextUChar(cnv, source, limit, results, "CESU-8"); 2016 /* Test the condition when source >= sourceLimit */ 2017 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2018 2019 /* test error behavior with a skip callback */ 2020 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2021 source=(const char *)in2; 2022 limit=(const char *)(in2+sizeof(in2)); 2023 TestNextUChar(cnv, source, limit, results2, "CESU-8"); 2024 2025 ucnv_close(cnv); 2026 } 2027 2028 static void TestUTF16() { 2029 /* test input */ 2030 static const uint8_t in1[]={ 2031 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff 2032 }; 2033 static const uint8_t in2[]={ 2034 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff 2035 }; 2036 static const uint8_t in3[]={ 2037 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01 2038 }; 2039 2040 /* expected test results */ 2041 static const int32_t results1[]={ 2042 /* number of bytes read, code point */ 2043 4, 0x4e00, 2044 2, 0xfeff 2045 }; 2046 static const int32_t results2[]={ 2047 /* number of bytes read, code point */ 2048 4, 0x004e, 2049 2, 0xfffe 2050 }; 2051 static const int32_t results3[]={ 2052 /* number of bytes read, code point */ 2053 2, 0xfefe, 2054 2, 0x4e00, 2055 2, 0xfeff, 2056 4, 0x20001 2057 }; 2058 2059 const char *source, *limit; 2060 2061 UErrorCode errorCode=U_ZERO_ERROR; 2062 UConverter *cnv=ucnv_open("UTF-16", &errorCode); 2063 if(U_FAILURE(errorCode)) { 2064 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode)); 2065 return; 2066 } 2067 2068 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2069 TestNextUChar(cnv, source, limit, results1, "UTF-16"); 2070 2071 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2072 ucnv_resetToUnicode(cnv); 2073 TestNextUChar(cnv, source, limit, results2, "UTF-16"); 2074 2075 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2076 ucnv_resetToUnicode(cnv); 2077 TestNextUChar(cnv, source, limit, results3, "UTF-16"); 2078 2079 /* Test the condition when source >= sourceLimit */ 2080 ucnv_resetToUnicode(cnv); 2081 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2082 2083 ucnv_close(cnv); 2084 } 2085 2086 static void TestUTF16BE() { 2087 /* test input */ 2088 static const uint8_t in[]={ 2089 0x00, 0x61, 2090 0x00, 0xc0, 2091 0x00, 0x31, 2092 0x00, 0xf4, 2093 0xce, 0xfe, 2094 0xd8, 0x01, 0xdc, 0x01 2095 }; 2096 2097 /* expected test results */ 2098 static const int32_t results[]={ 2099 /* number of bytes read, code point */ 2100 2, 0x61, 2101 2, 0xc0, 2102 2, 0x31, 2103 2, 0xf4, 2104 2, 0xcefe, 2105 4, 0x10401 2106 }; 2107 2108 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2109 UErrorCode errorCode=U_ZERO_ERROR; 2110 UConverter *cnv=ucnv_open("utf-16be", &errorCode); 2111 if(U_FAILURE(errorCode)) { 2112 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode)); 2113 return; 2114 } 2115 TestNextUChar(cnv, source, limit, results, "UTF-16BE"); 2116 /* Test the condition when source >= sourceLimit */ 2117 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2118 /*Test for the condition where there is an invalid character*/ 2119 { 2120 static const uint8_t source2[]={0x61}; 2121 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2122 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2123 } 2124 #if 0 2125 /* 2126 * Test disabled because currently the UTF-16BE/LE converters are supposed 2127 * to not set errors for unpaired surrogates. 2128 * This may change with 2129 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2130 */ 2131 2132 /*Test for the condition where there is a surrogate pair*/ 2133 { 2134 const uint8_t source2[]={0xd8, 0x01}; 2135 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2136 } 2137 #endif 2138 ucnv_close(cnv); 2139 } 2140 2141 static void 2142 TestUTF16LE() { 2143 /* test input */ 2144 static const uint8_t in[]={ 2145 0x61, 0x00, 2146 0x31, 0x00, 2147 0x4e, 0x2e, 2148 0x4e, 0x00, 2149 0x01, 0xd8, 0x01, 0xdc 2150 }; 2151 2152 /* expected test results */ 2153 static const int32_t results[]={ 2154 /* number of bytes read, code point */ 2155 2, 0x61, 2156 2, 0x31, 2157 2, 0x2e4e, 2158 2, 0x4e, 2159 4, 0x10401 2160 }; 2161 2162 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2163 UErrorCode errorCode=U_ZERO_ERROR; 2164 UConverter *cnv=ucnv_open("utf-16le", &errorCode); 2165 if(U_FAILURE(errorCode)) { 2166 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode)); 2167 return; 2168 } 2169 TestNextUChar(cnv, source, limit, results, "UTF-16LE"); 2170 /* Test the condition when source >= sourceLimit */ 2171 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2172 /*Test for the condition where there is an invalid character*/ 2173 { 2174 static const uint8_t source2[]={0x61}; 2175 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2176 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character"); 2177 } 2178 #if 0 2179 /* 2180 * Test disabled because currently the UTF-16BE/LE converters are supposed 2181 * to not set errors for unpaired surrogates. 2182 * This may change with 2183 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32 2184 */ 2185 2186 /*Test for the condition where there is a surrogate character*/ 2187 { 2188 static const uint8_t source2[]={0x01, 0xd8}; 2189 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character"); 2190 } 2191 #endif 2192 2193 ucnv_close(cnv); 2194 } 2195 2196 static void TestUTF32() { 2197 /* test input */ 2198 static const uint8_t in1[]={ 2199 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff 2200 }; 2201 static const uint8_t in2[]={ 2202 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 2203 }; 2204 static const uint8_t in3[]={ 2205 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 2206 }; 2207 2208 /* expected test results */ 2209 static const int32_t results1[]={ 2210 /* number of bytes read, code point */ 2211 8, 0x100f00, 2212 4, 0xfeff 2213 }; 2214 static const int32_t results2[]={ 2215 /* number of bytes read, code point */ 2216 8, 0x0f1000, 2217 4, 0xfffe 2218 }; 2219 static const int32_t results3[]={ 2220 /* number of bytes read, code point */ 2221 4, 0xfefe, 2222 4, 0x100f00, 2223 4, 0xfffd, /* unmatched surrogate */ 2224 4, 0xfffd /* unmatched surrogate */ 2225 }; 2226 2227 const char *source, *limit; 2228 2229 UErrorCode errorCode=U_ZERO_ERROR; 2230 UConverter *cnv=ucnv_open("UTF-32", &errorCode); 2231 if(U_FAILURE(errorCode)) { 2232 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode)); 2233 return; 2234 } 2235 2236 source=(const char *)in1, limit=(const char *)in1+sizeof(in1); 2237 TestNextUChar(cnv, source, limit, results1, "UTF-32"); 2238 2239 source=(const char *)in2, limit=(const char *)in2+sizeof(in2); 2240 ucnv_resetToUnicode(cnv); 2241 TestNextUChar(cnv, source, limit, results2, "UTF-32"); 2242 2243 source=(const char *)in3, limit=(const char *)in3+sizeof(in3); 2244 ucnv_resetToUnicode(cnv); 2245 TestNextUChar(cnv, source, limit, results3, "UTF-32"); 2246 2247 /* Test the condition when source >= sourceLimit */ 2248 ucnv_resetToUnicode(cnv); 2249 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2250 2251 ucnv_close(cnv); 2252 } 2253 2254 static void 2255 TestUTF32BE() { 2256 /* test input */ 2257 static const uint8_t in[]={ 2258 0x00, 0x00, 0x00, 0x61, 2259 0x00, 0x00, 0x30, 0x61, 2260 0x00, 0x00, 0xdc, 0x00, 2261 0x00, 0x00, 0xd8, 0x00, 2262 0x00, 0x00, 0xdf, 0xff, 2263 0x00, 0x00, 0xff, 0xfe, 2264 0x00, 0x10, 0xab, 0xcd, 2265 0x00, 0x10, 0xff, 0xff 2266 }; 2267 2268 /* expected test results */ 2269 static const int32_t results[]={ 2270 /* number of bytes read, code point */ 2271 4, 0x61, 2272 4, 0x3061, 2273 4, 0xfffd, 2274 4, 0xfffd, 2275 4, 0xfffd, 2276 4, 0xfffe, 2277 4, 0x10abcd, 2278 4, 0x10ffff 2279 }; 2280 2281 /* error test input */ 2282 static const uint8_t in2[]={ 2283 0x00, 0x00, 0x00, 0x61, 2284 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */ 2285 0x00, 0x00, 0x00, 0x62, 2286 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2287 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */ 2288 0x00, 0x00, 0x01, 0x62, 2289 0x00, 0x00, 0x02, 0x62 2290 }; 2291 2292 /* expected error test results */ 2293 static const int32_t results2[]={ 2294 /* number of bytes read, code point */ 2295 4, 0x61, 2296 8, 0x62, 2297 12, 0x162, 2298 4, 0x262 2299 }; 2300 2301 UConverterToUCallback cb; 2302 const void *p; 2303 2304 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2305 UErrorCode errorCode=U_ZERO_ERROR; 2306 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode); 2307 if(U_FAILURE(errorCode)) { 2308 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode)); 2309 return; 2310 } 2311 TestNextUChar(cnv, source, limit, results, "UTF-32BE"); 2312 2313 /* Test the condition when source >= sourceLimit */ 2314 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2315 2316 /* test error behavior with a skip callback */ 2317 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2318 source=(const char *)in2; 2319 limit=(const char *)(in2+sizeof(in2)); 2320 TestNextUChar(cnv, source, limit, results2, "UTF-32BE"); 2321 2322 ucnv_close(cnv); 2323 } 2324 2325 static void 2326 TestUTF32LE() { 2327 /* test input */ 2328 static const uint8_t in[]={ 2329 0x61, 0x00, 0x00, 0x00, 2330 0x61, 0x30, 0x00, 0x00, 2331 0x00, 0xdc, 0x00, 0x00, 2332 0x00, 0xd8, 0x00, 0x00, 2333 0xff, 0xdf, 0x00, 0x00, 2334 0xfe, 0xff, 0x00, 0x00, 2335 0xcd, 0xab, 0x10, 0x00, 2336 0xff, 0xff, 0x10, 0x00 2337 }; 2338 2339 /* expected test results */ 2340 static const int32_t results[]={ 2341 /* number of bytes read, code point */ 2342 4, 0x61, 2343 4, 0x3061, 2344 4, 0xfffd, 2345 4, 0xfffd, 2346 4, 0xfffd, 2347 4, 0xfffe, 2348 4, 0x10abcd, 2349 4, 0x10ffff 2350 }; 2351 2352 /* error test input */ 2353 static const uint8_t in2[]={ 2354 0x61, 0x00, 0x00, 0x00, 2355 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */ 2356 0x62, 0x00, 0x00, 0x00, 2357 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */ 2358 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */ 2359 0x62, 0x01, 0x00, 0x00, 2360 0x62, 0x02, 0x00, 0x00, 2361 }; 2362 2363 /* expected error test results */ 2364 static const int32_t results2[]={ 2365 /* number of bytes read, code point */ 2366 4, 0x61, 2367 8, 0x62, 2368 12, 0x162, 2369 4, 0x262, 2370 }; 2371 2372 UConverterToUCallback cb; 2373 const void *p; 2374 2375 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2376 UErrorCode errorCode=U_ZERO_ERROR; 2377 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode); 2378 if(U_FAILURE(errorCode)) { 2379 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode)); 2380 return; 2381 } 2382 TestNextUChar(cnv, source, limit, results, "UTF-32LE"); 2383 2384 /* Test the condition when source >= sourceLimit */ 2385 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2386 2387 /* test error behavior with a skip callback */ 2388 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode); 2389 source=(const char *)in2; 2390 limit=(const char *)(in2+sizeof(in2)); 2391 TestNextUChar(cnv, source, limit, results2, "UTF-32LE"); 2392 2393 ucnv_close(cnv); 2394 } 2395 2396 static void 2397 TestLATIN1() { 2398 /* test input */ 2399 static const uint8_t in[]={ 2400 0x61, 2401 0x31, 2402 0x32, 2403 0xc0, 2404 0xf0, 2405 0xf4, 2406 }; 2407 2408 /* expected test results */ 2409 static const int32_t results[]={ 2410 /* number of bytes read, code point */ 2411 1, 0x61, 2412 1, 0x31, 2413 1, 0x32, 2414 1, 0xc0, 2415 1, 0xf0, 2416 1, 0xf4, 2417 }; 2418 static const uint16_t in1[] = { 2419 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2420 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2421 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2422 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2423 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2424 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2425 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2426 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2427 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2428 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2429 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2430 0xcb, 0x82 2431 }; 2432 static const uint8_t out1[] = { 2433 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 2434 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 2435 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 2436 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 2437 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 2438 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 2439 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 2440 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 2441 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 2442 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 2443 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 2444 0xcb, 0x82 2445 }; 2446 static const uint16_t in2[]={ 2447 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2448 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2449 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2450 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2451 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2452 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2453 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2454 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2455 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2456 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2457 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2458 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2459 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2460 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2461 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2462 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2463 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2464 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2465 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2466 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2467 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2468 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2469 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2470 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2471 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2472 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2473 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2474 0x37, 0x20, 0x2A, 0x2F, 2475 }; 2476 static const unsigned char out2[]={ 2477 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 2478 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 2479 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 2480 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 2481 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 2482 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 2483 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70, 2484 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 2485 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 2486 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 2487 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 2488 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 2489 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 2490 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 2491 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2492 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2493 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 2494 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 2495 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 2496 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 2497 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 2498 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 2499 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 2500 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 2501 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 2502 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 2503 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 2504 0x37, 0x20, 0x2A, 0x2F, 2505 }; 2506 const char *source=(const char *)in; 2507 const char *limit=(const char *)in+sizeof(in); 2508 2509 UErrorCode errorCode=U_ZERO_ERROR; 2510 UConverter *cnv=ucnv_open("LATIN_1", &errorCode); 2511 if(U_FAILURE(errorCode)) { 2512 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode)); 2513 return; 2514 } 2515 TestNextUChar(cnv, source, limit, results, "LATIN_1"); 2516 /* Test the condition when source >= sourceLimit */ 2517 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2518 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1)); 2519 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2)); 2520 2521 ucnv_close(cnv); 2522 } 2523 2524 static void 2525 TestSBCS() { 2526 /* test input */ 2527 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4}; 2528 /* expected test results */ 2529 static const int32_t results[]={ 2530 /* number of bytes read, code point */ 2531 1, 0x61, 2532 1, 0xbf, 2533 1, 0xc4, 2534 1, 0x2021, 2535 1, 0xf8ff, 2536 1, 0x00d9 2537 }; 2538 2539 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2540 UErrorCode errorCode=U_ZERO_ERROR; 2541 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode); 2542 if(U_FAILURE(errorCode)) { 2543 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode)); 2544 return; 2545 } 2546 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)"); 2547 /* Test the condition when source >= sourceLimit */ 2548 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2549 /*Test for Illegal character */ /* 2550 { 2551 static const uint8_t input1[]={ 0xA1 }; 2552 const char* illegalsource=(const char*)input1; 2553 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte"); 2554 } 2555 */ 2556 ucnv_close(cnv); 2557 } 2558 2559 static void 2560 TestDBCS() { 2561 /* test input */ 2562 static const uint8_t in[]={ 2563 0x44, 0x6a, 2564 0xc4, 0x9c, 2565 0x7a, 0x74, 2566 0x46, 0xab, 2567 0x42, 0x5b, 2568 2569 }; 2570 2571 /* expected test results */ 2572 static const int32_t results[]={ 2573 /* number of bytes read, code point */ 2574 2, 0x00a7, 2575 2, 0xe1d2, 2576 2, 0x6962, 2577 2, 0xf842, 2578 2, 0xffe5, 2579 }; 2580 2581 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2582 UErrorCode errorCode=U_ZERO_ERROR; 2583 2584 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode); 2585 if(U_FAILURE(errorCode)) { 2586 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode)); 2587 return; 2588 } 2589 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)"); 2590 /* Test the condition when source >= sourceLimit */ 2591 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2592 /*Test for the condition where there is an invalid character*/ 2593 { 2594 static const uint8_t source2[]={0x1a, 0x1b}; 2595 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2596 } 2597 /*Test for the condition where we have a truncated char*/ 2598 { 2599 static const uint8_t source1[]={0xc4}; 2600 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2601 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2602 } 2603 ucnv_close(cnv); 2604 } 2605 2606 static void 2607 TestMBCS() { 2608 /* test input */ 2609 static const uint8_t in[]={ 2610 0x01, 2611 0xa6, 0xa3, 2612 0x00, 2613 0xa6, 0xa1, 2614 0x08, 2615 0xc2, 0x76, 2616 0xc2, 0x78, 2617 2618 }; 2619 2620 /* expected test results */ 2621 static const int32_t results[]={ 2622 /* number of bytes read, code point */ 2623 1, 0x0001, 2624 2, 0x250c, 2625 1, 0x0000, 2626 2, 0x2500, 2627 1, 0x0008, 2628 2, 0xd60c, 2629 2, 0xd60e, 2630 }; 2631 2632 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2633 UErrorCode errorCode=U_ZERO_ERROR; 2634 2635 UConverter *cnv=ucnv_open("ibm-1363", &errorCode); 2636 if(U_FAILURE(errorCode)) { 2637 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode)); 2638 return; 2639 } 2640 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)"); 2641 /* Test the condition when source >= sourceLimit */ 2642 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2643 /*Test for the condition where there is an invalid character*/ 2644 { 2645 static const uint8_t source2[]={0xa1, 0x80}; 2646 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); 2647 } 2648 /*Test for the condition where we have a truncated char*/ 2649 { 2650 static const uint8_t source1[]={0xc4}; 2651 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2652 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2653 } 2654 ucnv_close(cnv); 2655 2656 } 2657 2658 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO 2659 static void 2660 TestICCRunout() { 2661 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */ 2662 2663 const char *cnvName = "ibm-1363"; 2664 UErrorCode status = U_ZERO_ERROR; 2665 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 }; 2666 /* UChar expectUData[] = { 0x00a1, 0x001a }; */ 2667 const char *source = sourceData; 2668 const char *sourceLim = sourceData+sizeof(sourceData); 2669 UChar c1, c2, c3; 2670 UConverter *cnv=ucnv_open(cnvName, &status); 2671 if(U_FAILURE(status)) { 2672 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status)); 2673 return; 2674 } 2675 2676 #if 0 2677 { 2678 UChar targetBuf[256]; 2679 UChar *target = targetBuf; 2680 UChar *targetLim = target+256; 2681 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status); 2682 2683 log_info("After convert: target@%d, source@%d, status%s\n", 2684 target-targetBuf, source-sourceData, u_errorName(status)); 2685 2686 if(U_FAILURE(status)) { 2687 log_err("Failed to convert: %s\n", u_errorName(status)); 2688 } else { 2689 2690 } 2691 } 2692 #endif 2693 2694 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2695 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status)); 2696 2697 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2698 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status)); 2699 2700 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status); 2701 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status)); 2702 2703 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) { 2704 log_verbose("OK\n"); 2705 } else { 2706 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n"); 2707 } 2708 2709 ucnv_close(cnv); 2710 2711 } 2712 #endif 2713 2714 #ifdef U_ENABLE_GENERIC_ISO_2022 2715 2716 static void 2717 TestISO_2022() { 2718 /* test input */ 2719 static const uint8_t in[]={ 2720 0x1b, 0x25, 0x42, 2721 0x31, 2722 0x32, 2723 0x61, 2724 0xc2, 0x80, 2725 0xe0, 0xa0, 0x80, 2726 0xf0, 0x90, 0x80, 0x80 2727 }; 2728 2729 2730 2731 /* expected test results */ 2732 static const int32_t results[]={ 2733 /* number of bytes read, code point */ 2734 4, 0x0031, /* 4 bytes including the escape sequence */ 2735 1, 0x0032, 2736 1, 0x61, 2737 2, 0x80, 2738 3, 0x800, 2739 4, 0x10000 2740 }; 2741 2742 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 2743 UErrorCode errorCode=U_ZERO_ERROR; 2744 UConverter *cnv; 2745 2746 cnv=ucnv_open("ISO_2022", &errorCode); 2747 if(U_FAILURE(errorCode)) { 2748 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 2749 return; 2750 } 2751 TestNextUChar(cnv, source, limit, results, "ISO_2022"); 2752 2753 /* Test the condition when source >= sourceLimit */ 2754 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source"); 2755 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 2756 /*Test for the condition where we have a truncated char*/ 2757 { 2758 static const uint8_t source1[]={0xc4}; 2759 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2760 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated"); 2761 } 2762 /*Test for the condition where there is an invalid character*/ 2763 { 2764 static const uint8_t source2[]={0xa1, 0x01}; 2765 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character"); 2766 } 2767 ucnv_close(cnv); 2768 } 2769 2770 #endif 2771 2772 static void 2773 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2774 const UChar* uSource; 2775 const UChar* uSourceLimit; 2776 const char* cSource; 2777 const char* cSourceLimit; 2778 UChar *uTargetLimit =NULL; 2779 UChar *uTarget; 2780 char *cTarget; 2781 const char *cTargetLimit; 2782 char *cBuf; 2783 UChar *uBuf; /*,*test;*/ 2784 int32_t uBufSize = 120; 2785 int len=0; 2786 int i=2; 2787 UErrorCode errorCode=U_ZERO_ERROR; 2788 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2789 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2790 ucnv_reset(cnv); 2791 for(;--i>0; ){ 2792 uSource = (UChar*) source; 2793 uSourceLimit=(const UChar*)sourceLimit; 2794 cTarget = cBuf; 2795 uTarget = uBuf; 2796 cSource = cBuf; 2797 cTargetLimit = cBuf; 2798 uTargetLimit = uBuf; 2799 2800 do{ 2801 2802 cTargetLimit = cTargetLimit+ i; 2803 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2804 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2805 errorCode=U_ZERO_ERROR; 2806 continue; 2807 } 2808 2809 if(U_FAILURE(errorCode)){ 2810 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2811 return; 2812 } 2813 2814 }while (uSource<uSourceLimit); 2815 2816 cSourceLimit =cTarget; 2817 do{ 2818 uTargetLimit=uTargetLimit+i; 2819 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2820 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2821 errorCode=U_ZERO_ERROR; 2822 continue; 2823 } 2824 if(U_FAILURE(errorCode)){ 2825 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2826 return; 2827 } 2828 }while(cSource<cSourceLimit); 2829 2830 uSource = source; 2831 /*test =uBuf;*/ 2832 for(len=0;len<(int)(source - sourceLimit);len++){ 2833 if(uBuf[len]!=uSource[len]){ 2834 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2835 } 2836 } 2837 } 2838 free(uBuf); 2839 free(cBuf); 2840 } 2841 /* Test for Jitterbug 778 */ 2842 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2843 const UChar* uSource; 2844 const UChar* uSourceLimit; 2845 const char* cSource; 2846 UChar *uTargetLimit =NULL; 2847 UChar *uTarget; 2848 char *cTarget; 2849 const char *cTargetLimit; 2850 char *cBuf; 2851 UChar *uBuf,*test; 2852 int32_t uBufSize = 120; 2853 int numCharsInTarget=0; 2854 UErrorCode errorCode=U_ZERO_ERROR; 2855 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2856 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 2857 uSource = source; 2858 uSourceLimit=sourceLimit; 2859 cTarget = cBuf; 2860 cTargetLimit = cBuf +uBufSize*5; 2861 uTarget = uBuf; 2862 uTargetLimit = uBuf+ uBufSize*5; 2863 ucnv_reset(cnv); 2864 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode); 2865 if(U_FAILURE(errorCode)){ 2866 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2867 return; 2868 } 2869 cSource = cBuf; 2870 test =uBuf; 2871 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode); 2872 if(U_FAILURE(errorCode)){ 2873 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode)); 2874 return; 2875 } 2876 uSource = source; 2877 while(uSource<uSourceLimit){ 2878 if(*test!=*uSource){ 2879 2880 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 2881 } 2882 uSource++; 2883 test++; 2884 } 2885 free(uBuf); 2886 free(cBuf); 2887 } 2888 2889 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){ 2890 const UChar* uSource; 2891 const UChar* uSourceLimit; 2892 const char* cSource; 2893 const char* cSourceLimit; 2894 UChar *uTargetLimit =NULL; 2895 UChar *uTarget; 2896 char *cTarget; 2897 const char *cTargetLimit; 2898 char *cBuf; 2899 UChar *uBuf; /*,*test;*/ 2900 int32_t uBufSize = 120; 2901 int len=0; 2902 int i=2; 2903 const UChar *temp = sourceLimit; 2904 UErrorCode errorCode=U_ZERO_ERROR; 2905 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 2906 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 2907 2908 ucnv_reset(cnv); 2909 for(;--i>0;){ 2910 uSource = (UChar*) source; 2911 cTarget = cBuf; 2912 uTarget = uBuf; 2913 cSource = cBuf; 2914 cTargetLimit = cBuf; 2915 uTargetLimit = uBuf+uBufSize*5; 2916 cTargetLimit = cTargetLimit+uBufSize*10; 2917 uSourceLimit=uSource; 2918 do{ 2919 2920 if (uSourceLimit < sourceLimit) { 2921 uSourceLimit = uSourceLimit+1; 2922 } 2923 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode); 2924 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2925 errorCode=U_ZERO_ERROR; 2926 continue; 2927 } 2928 2929 if(U_FAILURE(errorCode)){ 2930 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2931 return; 2932 } 2933 2934 }while (uSource<temp); 2935 2936 cSourceLimit =cBuf; 2937 do{ 2938 if (cSourceLimit < cBuf + (cTarget - cBuf)) { 2939 cSourceLimit = cSourceLimit+1; 2940 } 2941 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode); 2942 if(errorCode==U_BUFFER_OVERFLOW_ERROR){ 2943 errorCode=U_ZERO_ERROR; 2944 continue; 2945 } 2946 if(U_FAILURE(errorCode)){ 2947 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 2948 return; 2949 } 2950 }while(cSource<cTarget); 2951 2952 uSource = source; 2953 /*test =uBuf;*/ 2954 for(;len<(int)(source - sourceLimit);len++){ 2955 if(uBuf[len]!=uSource[len]){ 2956 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ; 2957 } 2958 } 2959 } 2960 free(uBuf); 2961 free(cBuf); 2962 } 2963 static void 2964 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit, 2965 const uint16_t results[], const char* message){ 2966 /* const char* s0; */ 2967 const char* s=(char*)source; 2968 const uint16_t *r=results; 2969 UErrorCode errorCode=U_ZERO_ERROR; 2970 uint32_t c,exC; 2971 ucnv_reset(cnv); 2972 while(s<limit) { 2973 /* s0=s; */ 2974 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode); 2975 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 2976 break; /* no more significant input */ 2977 } else if(U_FAILURE(errorCode)) { 2978 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode)); 2979 break; 2980 } else { 2981 if(U16_IS_LEAD(*r)){ 2982 int i =0, len = 2; 2983 U16_NEXT(r, i, len, exC); 2984 r++; 2985 }else{ 2986 exC = *r; 2987 } 2988 if(c!=(uint32_t)(exC)) 2989 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c); 2990 } 2991 r++; 2992 } 2993 } 2994 2995 static int TestJitterbug930(const char* enc){ 2996 UErrorCode err = U_ZERO_ERROR; 2997 UConverter*converter; 2998 char out[80]; 2999 char*target = out; 3000 UChar in[4]; 3001 const UChar*source = in; 3002 int32_t off[80]; 3003 int32_t* offsets = off; 3004 int numOffWritten=0; 3005 UBool flush = 0; 3006 converter = my_ucnv_open(enc, &err); 3007 3008 in[0] = 0x41; /* 0x4E00;*/ 3009 in[1] = 0x4E01; 3010 in[2] = 0x4E02; 3011 in[3] = 0x4E03; 3012 3013 memset(off, '*', sizeof(off)); 3014 3015 ucnv_fromUnicode (converter, 3016 &target, 3017 target+2, 3018 &source, 3019 source+3, 3020 offsets, 3021 flush, 3022 &err); 3023 3024 /* writes three bytes into the output buffer: 41 1B 24 3025 * but offsets contains 0 1 1 3026 */ 3027 while(*offsets< off[10]){ 3028 numOffWritten++; 3029 offsets++; 3030 } 3031 log_verbose("Testing Jitterbug 930 for encoding %s",enc); 3032 if(numOffWritten!= (int)(target-out)){ 3033 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten); 3034 } 3035 3036 err = U_ZERO_ERROR; 3037 3038 memset(off,'*' , sizeof(off)); 3039 3040 flush = 1; 3041 offsets=off; 3042 ucnv_fromUnicode (converter, 3043 &target, 3044 target+4, 3045 &source, 3046 source, 3047 offsets, 3048 flush, 3049 &err); 3050 numOffWritten=0; 3051 while(*offsets< off[10]){ 3052 numOffWritten++; 3053 if(*offsets!= -1){ 3054 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ; 3055 } 3056 offsets++; 3057 } 3058 3059 /* writes 42 43 7A into output buffer, 3060 * offsets contains -1 -1 -1 3061 */ 3062 ucnv_close(converter); 3063 return 0; 3064 } 3065 3066 static void 3067 TestHZ() { 3068 /* test input */ 3069 static const uint16_t in[]={ 3070 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014, 3071 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0, 3072 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94, 3073 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355, 3074 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8, 3075 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496, 3076 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477, 3077 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480, 3078 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E, 3079 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 3080 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 3081 0x005A, 0x005B, 0x005C, 0x000A 3082 }; 3083 const UChar* uSource; 3084 const UChar* uSourceLimit; 3085 const char* cSource; 3086 const char* cSourceLimit; 3087 UChar *uTargetLimit =NULL; 3088 UChar *uTarget; 3089 char *cTarget; 3090 const char *cTargetLimit; 3091 char *cBuf; 3092 UChar *uBuf,*test; 3093 int32_t uBufSize = 120; 3094 UErrorCode errorCode=U_ZERO_ERROR; 3095 UConverter *cnv; 3096 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3097 int32_t* myOff= offsets; 3098 cnv=ucnv_open("HZ", &errorCode); 3099 if(U_FAILURE(errorCode)) { 3100 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode)); 3101 return; 3102 } 3103 3104 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3105 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3106 uSource = (const UChar*)in; 3107 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3108 cTarget = cBuf; 3109 cTargetLimit = cBuf +uBufSize*5; 3110 uTarget = uBuf; 3111 uTargetLimit = uBuf+ uBufSize*5; 3112 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3113 if(U_FAILURE(errorCode)){ 3114 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3115 return; 3116 } 3117 cSource = cBuf; 3118 cSourceLimit =cTarget; 3119 test =uBuf; 3120 myOff=offsets; 3121 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3122 if(U_FAILURE(errorCode)){ 3123 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3124 return; 3125 } 3126 uSource = (const UChar*)in; 3127 while(uSource<uSourceLimit){ 3128 if(*test!=*uSource){ 3129 3130 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3131 } 3132 uSource++; 3133 test++; 3134 } 3135 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding"); 3136 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3137 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3138 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3139 TestJitterbug930("csISO2022JP"); 3140 ucnv_close(cnv); 3141 free(offsets); 3142 free(uBuf); 3143 free(cBuf); 3144 } 3145 3146 static void 3147 TestISCII(){ 3148 /* test input */ 3149 static const uint16_t in[]={ 3150 /* test full range of Devanagari */ 3151 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A, 3152 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911, 3153 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D, 3154 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926, 3155 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F, 3156 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937, 3157 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943, 3158 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D, 3159 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C, 3160 0x096D,0x096E,0x096F, 3161 /* test Soft halant*/ 3162 0x0915,0x094d, 0x200D, 3163 /* test explicit halant */ 3164 0x0915,0x094d, 0x200c, 3165 /* test double danda */ 3166 0x965, 3167 /* test ASCII */ 3168 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3169 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3170 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3171 /* tests from Lotus */ 3172 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043, 3173 0x0930,0x094D,0x200D, 3174 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043, 3175 0x0915,0x0921,0x002B,0x095F, 3176 /* tamil range */ 3177 0x0B86, 0xB87, 0xB88, 3178 /* telugu range */ 3179 0x0C05, 0x0C02, 0x0C03,0x0c31, 3180 /* kannada range */ 3181 0x0C85, 0xC82, 0x0C83, 3182 /* test Abbr sign and Anudatta */ 3183 0x0970, 0x952, 3184 /* 0x0958, 3185 0x0959, 3186 0x095A, 3187 0x095B, 3188 0x095C, 3189 0x095D, 3190 0x095E, 3191 0x095F,*/ 3192 0x0960 /* Vocallic RRI 0xAB, 0xE9*/, 3193 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */, 3194 0x090C , 3195 0x0962, 3196 0x0961 /* Vocallic LL 0xa6, 0xE9 */, 3197 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */, 3198 0x0950 /* OM Symbol 0xa1, 0xE9,*/, 3199 0x093D /* Avagraha 0xEA, 0xE9*/, 3200 0x0958, 3201 0x0959, 3202 0x095A, 3203 0x095B, 3204 0x095C, 3205 0x095D, 3206 0x095E, 3207 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0 3208 }; 3209 static const unsigned char byteArr[]={ 3210 3211 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9, 3212 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2, 3213 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb, 3214 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4, 3215 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd, 3216 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6, 3217 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf, 3218 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8, 3219 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7, 3220 0xf8,0xf9,0xfa, 3221 /* test soft halant */ 3222 0xb3, 0xE8, 0xE9, 3223 /* test explicit halant */ 3224 0xb3, 0xE8, 0xE8, 3225 /* test double danda */ 3226 0xea, 0xea, 3227 /* test ASCII */ 3228 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 3229 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 3230 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 3231 /* test ATR code */ 3232 3233 /* tests from Lotus */ 3234 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43, 3235 0xEF,0x42,0xCF,0xE8,0xD9, 3236 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43, 3237 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE, 3238 /* tamil range */ 3239 0xEF, 0x44, 0xa5, 0xa6, 0xa7, 3240 /* telugu range */ 3241 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0, 3242 /* kannada range */ 3243 0xEF, 0x48,0xa4, 0xa2, 0xa3, 3244 /* anudatta and abbreviation sign */ 3245 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8, 3246 3247 3248 0xAA, 0xE9,/* RI + NUKTA 0x0960*/ 3249 3250 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/ 3251 3252 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/ 3253 3254 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/ 3255 3256 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/ 3257 3258 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/ 3259 3260 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/ 3261 3262 0xEA, 0xE9, /* Danda + Nukta 0x093D*/ 3263 3264 0xB3, 0xE9, /* Ka + NUKTA */ 3265 3266 0xB4, 0xE9, /* Kha + NUKTA */ 3267 3268 0xB5, 0xE9, /* Ga + NUKTA */ 3269 3270 0xBA, 0xE9, 3271 3272 0xBF, 0xE9, 3273 3274 0xC0, 0xE9, 3275 3276 0xC9, 0xE9, 3277 /* INV halant RA */ 3278 0xD9, 0xE8, 0xCF, 3279 0x00, 0x00A0, 3280 /* just consume unhandled codepoints */ 3281 0xEF, 0x30, 3282 3283 }; 3284 testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE); 3285 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr)); 3286 3287 } 3288 3289 static void 3290 TestISO_2022_JP() { 3291 /* test input */ 3292 static const uint16_t in[]={ 3293 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A, 3294 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3295 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3296 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3297 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3298 0x201D, 0x3014, 0x000D, 0x000A, 3299 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3300 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3301 }; 3302 const UChar* uSource; 3303 const UChar* uSourceLimit; 3304 const char* cSource; 3305 const char* cSourceLimit; 3306 UChar *uTargetLimit =NULL; 3307 UChar *uTarget; 3308 char *cTarget; 3309 const char *cTargetLimit; 3310 char *cBuf; 3311 UChar *uBuf,*test; 3312 int32_t uBufSize = 120; 3313 UErrorCode errorCode=U_ZERO_ERROR; 3314 UConverter *cnv; 3315 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3316 int32_t* myOff= offsets; 3317 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3318 if(U_FAILURE(errorCode)) { 3319 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode)); 3320 return; 3321 } 3322 3323 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3324 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3325 uSource = (const UChar*)in; 3326 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3327 cTarget = cBuf; 3328 cTargetLimit = cBuf +uBufSize*5; 3329 uTarget = uBuf; 3330 uTargetLimit = uBuf+ uBufSize*5; 3331 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3332 if(U_FAILURE(errorCode)){ 3333 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3334 return; 3335 } 3336 cSource = cBuf; 3337 cSourceLimit =cTarget; 3338 test =uBuf; 3339 myOff=offsets; 3340 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3341 if(U_FAILURE(errorCode)){ 3342 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3343 return; 3344 } 3345 3346 uSource = (const UChar*)in; 3347 while(uSource<uSourceLimit){ 3348 if(*test!=*uSource){ 3349 3350 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3351 } 3352 uSource++; 3353 test++; 3354 } 3355 3356 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3357 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3358 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding"); 3359 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3360 TestJitterbug930("csISO2022JP"); 3361 ucnv_close(cnv); 3362 free(uBuf); 3363 free(cBuf); 3364 free(offsets); 3365 } 3366 3367 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){ 3368 const UChar* uSource; 3369 const UChar* uSourceLimit; 3370 const char* cSource; 3371 const char* cSourceLimit; 3372 UChar *uTargetLimit =NULL; 3373 UChar *uTarget; 3374 char *cTarget; 3375 const char *cTargetLimit; 3376 char *cBuf; 3377 UChar *uBuf,*test; 3378 int32_t uBufSize = 120*10; 3379 UErrorCode errorCode=U_ZERO_ERROR; 3380 UConverter *cnv; 3381 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) ); 3382 int32_t* myOff= offsets; 3383 cnv=my_ucnv_open(conv, &errorCode); 3384 if(U_FAILURE(errorCode)) { 3385 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode)); 3386 return; 3387 } 3388 3389 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)); 3390 cBuf =(char*)malloc(uBufSize * sizeof(char)); 3391 uSource = (const UChar*)in; 3392 uSourceLimit=uSource+len; 3393 cTarget = cBuf; 3394 cTargetLimit = cBuf +uBufSize; 3395 uTarget = uBuf; 3396 uTargetLimit = uBuf+ uBufSize; 3397 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3398 if(U_FAILURE(errorCode)){ 3399 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3400 return; 3401 } 3402 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/ 3403 cSource = cBuf; 3404 cSourceLimit =cTarget; 3405 test =uBuf; 3406 myOff=offsets; 3407 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3408 if(U_FAILURE(errorCode)){ 3409 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode)); 3410 return; 3411 } 3412 3413 uSource = (const UChar*)in; 3414 while(uSource<uSourceLimit){ 3415 if(*test!=*uSource){ 3416 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ; 3417 } 3418 uSource++; 3419 test++; 3420 } 3421 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv); 3422 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv); 3423 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv); 3424 if(byteArr && byteArrLen!=0){ 3425 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang); 3426 TestToAndFromUChars(in,(const UChar*)&in[len],cnv); 3427 { 3428 cSource = byteArr; 3429 cSourceLimit = cSource+byteArrLen; 3430 test=uBuf; 3431 myOff = offsets; 3432 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3433 if(U_FAILURE(errorCode)){ 3434 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3435 return; 3436 } 3437 3438 uSource = (const UChar*)in; 3439 while(uSource<uSourceLimit){ 3440 if(*test!=*uSource){ 3441 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3442 } 3443 uSource++; 3444 test++; 3445 } 3446 } 3447 } 3448 3449 ucnv_close(cnv); 3450 free(uBuf); 3451 free(cBuf); 3452 free(offsets); 3453 } 3454 static UChar U_CALLCONV 3455 _charAt(int32_t offset, void *context) { 3456 return ((char*)context)[offset]; 3457 } 3458 3459 static int32_t 3460 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){ 3461 int32_t srcIndex=0; 3462 int32_t dstIndex=0; 3463 if(U_FAILURE(*status)){ 3464 return 0; 3465 } 3466 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){ 3467 *status = U_ILLEGAL_ARGUMENT_ERROR; 3468 return 0; 3469 } 3470 if(srcLen==-1){ 3471 srcLen = (int32_t)uprv_strlen(src); 3472 } 3473 3474 for (; srcIndex<srcLen; ) { 3475 UChar32 c = src[srcIndex++]; 3476 if (c == 0x005C /*'\\'*/) { 3477 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/ 3478 if (c == (UChar32)0xFFFFFFFF) { 3479 *status=U_INVALID_CHAR_FOUND; /* return empty string */ 3480 break; /* invalid escape sequence */ 3481 } 3482 } 3483 if(dstIndex < dstLen){ 3484 if(c>0xFFFF){ 3485 dst[dstIndex++] = U16_LEAD(c); 3486 if(dstIndex<dstLen){ 3487 dst[dstIndex]=U16_TRAIL(c); 3488 }else{ 3489 *status=U_BUFFER_OVERFLOW_ERROR; 3490 } 3491 }else{ 3492 dst[dstIndex]=(UChar)c; 3493 } 3494 3495 }else{ 3496 *status = U_BUFFER_OVERFLOW_ERROR; 3497 } 3498 dstIndex++; /* for preflighting */ 3499 } 3500 return dstIndex; 3501 } 3502 3503 static void 3504 TestFullRoundtrip(const char* cp){ 3505 UChar usource[10] ={0}; 3506 UChar nsrc[10] = {0}; 3507 uint32_t i=1; 3508 int len=0, ulen; 3509 nsrc[0]=0x0061; 3510 /* Test codepoint 0 */ 3511 TestConv(usource,1,cp,"",NULL,0); 3512 TestConv(usource,2,cp,"",NULL,0); 3513 nsrc[2]=0x5555; 3514 TestConv(nsrc,3,cp,"",NULL,0); 3515 3516 for(;i<=0x10FFFF;i++){ 3517 if(i==0xD800){ 3518 i=0xDFFF; 3519 continue; 3520 } 3521 if(i<=0xFFFF){ 3522 usource[0] =(UChar) i; 3523 len=1; 3524 }else{ 3525 usource[0]=U16_LEAD(i); 3526 usource[1]=U16_TRAIL(i); 3527 len=2; 3528 } 3529 ulen=len; 3530 if(i==0x80) { 3531 usource[2]=0; 3532 } 3533 /* Test only single code points */ 3534 TestConv(usource,ulen,cp,"",NULL,0); 3535 /* Test codepoint repeated twice */ 3536 usource[ulen]=usource[0]; 3537 usource[ulen+1]=usource[1]; 3538 ulen+=len; 3539 TestConv(usource,ulen,cp,"",NULL,0); 3540 /* Test codepoint repeated 3 times */ 3541 usource[ulen]=usource[0]; 3542 usource[ulen+1]=usource[1]; 3543 ulen+=len; 3544 TestConv(usource,ulen,cp,"",NULL,0); 3545 /* Test codepoint in between 2 codepoints */ 3546 nsrc[1]=usource[0]; 3547 nsrc[2]=usource[1]; 3548 nsrc[len+1]=0x5555; 3549 TestConv(nsrc,len+2,cp,"",NULL,0); 3550 uprv_memset(usource,0,sizeof(UChar)*10); 3551 } 3552 } 3553 3554 static void 3555 TestRoundTrippingAllUTF(void){ 3556 if(!getTestOption(QUICK_OPTION)){ 3557 log_verbose("Running exhaustive round trip test for BOCU-1\n"); 3558 TestFullRoundtrip("BOCU-1"); 3559 log_verbose("Running exhaustive round trip test for SCSU\n"); 3560 TestFullRoundtrip("SCSU"); 3561 log_verbose("Running exhaustive round trip test for UTF-8\n"); 3562 TestFullRoundtrip("UTF-8"); 3563 log_verbose("Running exhaustive round trip test for CESU-8\n"); 3564 TestFullRoundtrip("CESU-8"); 3565 log_verbose("Running exhaustive round trip test for UTF-16BE\n"); 3566 TestFullRoundtrip("UTF-16BE"); 3567 log_verbose("Running exhaustive round trip test for UTF-16LE\n"); 3568 TestFullRoundtrip("UTF-16LE"); 3569 log_verbose("Running exhaustive round trip test for UTF-16\n"); 3570 TestFullRoundtrip("UTF-16"); 3571 log_verbose("Running exhaustive round trip test for UTF-32BE\n"); 3572 TestFullRoundtrip("UTF-32BE"); 3573 log_verbose("Running exhaustive round trip test for UTF-32LE\n"); 3574 TestFullRoundtrip("UTF-32LE"); 3575 log_verbose("Running exhaustive round trip test for UTF-32\n"); 3576 TestFullRoundtrip("UTF-32"); 3577 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3578 TestFullRoundtrip("UTF-7"); 3579 log_verbose("Running exhaustive round trip test for UTF-7\n"); 3580 TestFullRoundtrip("UTF-7,version=1"); 3581 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n"); 3582 TestFullRoundtrip("IMAP-mailbox-name"); 3583 /* 3584 * 3585 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of 3586 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA). 3587 * The old mappings remain as fallbacks. 3588 * This test may be reintroduced at a later time. 3589 * 3590 * 110118 - mow 3591 */ 3592 /* 3593 log_verbose("Running exhaustive round trip test for GB18030\n"); 3594 TestFullRoundtrip("GB18030"); 3595 */ 3596 } 3597 } 3598 3599 static void 3600 TestSCSU() { 3601 3602 static const uint16_t germanUTF16[]={ 3603 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074 3604 }; 3605 3606 static const uint8_t germanSCSU[]={ 3607 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74 3608 }; 3609 3610 static const uint16_t russianUTF16[]={ 3611 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430 3612 }; 3613 3614 static const uint8_t russianSCSU[]={ 3615 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0 3616 }; 3617 3618 static const uint16_t japaneseUTF16[]={ 3619 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b, 3620 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3, 3621 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b, 3622 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4, 3623 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a, 3624 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044, 3625 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3, 3626 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd, 3627 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de, 3628 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09, 3629 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b, 3630 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068, 3631 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1, 3632 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9, 3633 0x307e, 0x3067, 0x3042, 0x308b, 0x3002 3634 }; 3635 3636 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice: 3637 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */ 3638 static const uint8_t japaneseSCSU[]={ 3639 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 3640 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f, 3641 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c, 3642 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d, 3643 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e, 3644 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e, 3645 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d, 3646 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa, 3647 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08, 3648 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d, 3649 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06, 3650 0xcb, 0x82 3651 }; 3652 3653 static const uint16_t allFeaturesUTF16[]={ 3654 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff, 3655 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 3656 0x01df, 0xf000, 0xdbff, 0xdfff 3657 }; 3658 3659 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter 3660 * result here (34B vs. 35B) 3661 */ 3662 static const uint8_t allFeaturesSCSU[]={ 3663 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03, 3664 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a, 3665 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13, 3666 0xdf, 0x14, 0x80, 0x15, 0xff 3667 }; 3668 static const uint16_t monkeyIn[]={ 3669 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3670 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3671 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3672 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3673 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3674 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3675 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3676 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3677 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3678 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3679 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3680 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3681 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3682 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3683 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3684 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3685 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3686 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3687 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A, 3688 /* test non-BMP code points */ 3689 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 3690 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 3691 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 3692 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 3693 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 3694 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 3695 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 3696 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 3697 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 3698 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF, 3699 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF, 3700 3701 3702 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 3703 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 3704 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 3705 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 3706 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 3707 }; 3708 static const char *fTestCases [] = { 3709 "\\ud800\\udc00", /* smallest surrogate*/ 3710 "\\ud8ff\\udcff", 3711 "\\udBff\\udFff", /* largest surrogate pair*/ 3712 "\\ud834\\udc00", 3713 "\\U0010FFFF", 3714 "Hello \\u9292 \\u9192 World!", 3715 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!", 3716 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3717 3718 "\\u0648\\u06c8", /* catch missing reset*/ 3719 "\\u0648\\u06c8", 3720 3721 "\\u4444\\uE001", /* lowest quotable*/ 3722 "\\u4444\\uf2FF", /* highest quotable*/ 3723 "\\u4444\\uf188\\u4444", 3724 "\\u4444\\uf188\\uf288", 3725 "\\u4444\\uf188abc\\u0429\\uf288", 3726 "\\u9292\\u2222", 3727 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!", 3728 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!", 3729 "Hello World!123456", 3730 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/ 3731 3732 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/ 3733 "abc\\u4411d", /* uses SQU*/ 3734 "abc\\u4411\\u4412d",/* uses SCU*/ 3735 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/ 3736 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/ 3737 "\\u9292\\u2222", 3738 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", 3739 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c", 3740 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002", 3741 3742 "", /* empty input*/ 3743 "\\u0000", /* smallest BMP character*/ 3744 "\\uFFFF", /* largest BMP character*/ 3745 3746 /* regression tests*/ 3747 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa", 3748 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff", 3749 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c", 3750 "\\u0041\\u00df\\u0401\\u015f", 3751 "\\u9066\\u2123abc", 3752 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5", 3753 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489", 3754 }; 3755 int i=0; 3756 for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){ 3757 const char* cSrc = fTestCases[i]; 3758 UErrorCode status = U_ZERO_ERROR; 3759 int32_t cSrcLen,srcLen; 3760 UChar* src; 3761 /* UConverter* cnv = ucnv_open("SCSU",&status); */ 3762 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]); 3763 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar)); 3764 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status); 3765 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i); 3766 TestConv(src,srcLen,"SCSU","Coverage",NULL,0); 3767 free(src); 3768 } 3769 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3770 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU)); 3771 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3772 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU)); 3773 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU)); 3774 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU)); 3775 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0); 3776 } 3777 3778 #if !UCONFIG_NO_LEGACY_CONVERSION 3779 static void TestJitterbug2346(){ 3780 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a, 3781 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a}; 3782 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A}; 3783 3784 UChar uTarget[500]={'\0'}; 3785 UChar* utarget=uTarget; 3786 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 3787 3788 char cTarget[500]={'\0'}; 3789 char* ctarget=cTarget; 3790 char* ctargetLimit=cTarget+sizeof(cTarget); 3791 const char* csource=source; 3792 UChar* temp = expected; 3793 UErrorCode err=U_ZERO_ERROR; 3794 3795 UConverter* conv =ucnv_open("ISO_2022_JP",&err); 3796 if(U_FAILURE(err)) { 3797 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 3798 return; 3799 } 3800 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err); 3801 if(U_FAILURE(err)) { 3802 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err)); 3803 return; 3804 } 3805 utargetLimit=utarget; 3806 utarget = uTarget; 3807 while(utarget<utargetLimit){ 3808 if(*temp!=*utarget){ 3809 3810 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ; 3811 } 3812 utarget++; 3813 temp++; 3814 } 3815 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 3816 if(U_FAILURE(err)) { 3817 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err)); 3818 return; 3819 } 3820 ctargetLimit=ctarget; 3821 ctarget =cTarget; 3822 ucnv_close(conv); 3823 3824 3825 } 3826 3827 static void 3828 TestISO_2022_JP_1() { 3829 /* test input */ 3830 static const uint16_t in[]={ 3831 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A, 3832 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3833 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A, 3834 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3835 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A, 3836 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A, 3837 0x201D, 0x000D, 0x000A, 3838 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3839 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A, 3840 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3841 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A, 3842 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A, 3843 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A 3844 }; 3845 const UChar* uSource; 3846 const UChar* uSourceLimit; 3847 const char* cSource; 3848 const char* cSourceLimit; 3849 UChar *uTargetLimit =NULL; 3850 UChar *uTarget; 3851 char *cTarget; 3852 const char *cTargetLimit; 3853 char *cBuf; 3854 UChar *uBuf,*test; 3855 int32_t uBufSize = 120; 3856 UErrorCode errorCode=U_ZERO_ERROR; 3857 UConverter *cnv; 3858 3859 cnv=ucnv_open("ISO_2022_JP_1", &errorCode); 3860 if(U_FAILURE(errorCode)) { 3861 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3862 return; 3863 } 3864 3865 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3866 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3867 uSource = (const UChar*)in; 3868 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3869 cTarget = cBuf; 3870 cTargetLimit = cBuf +uBufSize*5; 3871 uTarget = uBuf; 3872 uTargetLimit = uBuf+ uBufSize*5; 3873 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode); 3874 if(U_FAILURE(errorCode)){ 3875 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3876 return; 3877 } 3878 cSource = cBuf; 3879 cSourceLimit =cTarget; 3880 test =uBuf; 3881 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode); 3882 if(U_FAILURE(errorCode)){ 3883 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3884 return; 3885 } 3886 uSource = (const UChar*)in; 3887 while(uSource<uSourceLimit){ 3888 if(*test!=*uSource){ 3889 3890 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3891 } 3892 uSource++; 3893 test++; 3894 } 3895 /*ucnv_close(cnv); 3896 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/ 3897 /*Test for the condition where there is an invalid character*/ 3898 ucnv_reset(cnv); 3899 { 3900 static const uint8_t source2[]={0x0e,0x24,0x053}; 3901 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]"); 3902 } 3903 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3904 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3905 ucnv_close(cnv); 3906 free(uBuf); 3907 free(cBuf); 3908 } 3909 3910 static void 3911 TestISO_2022_JP_2() { 3912 /* test input */ 3913 static const uint16_t in[]={ 3914 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A, 3915 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A, 3916 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A, 3917 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A, 3918 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A, 3919 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A, 3920 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A, 3921 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A, 3922 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A, 3923 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A, 3924 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A, 3925 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 3926 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 3927 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 3928 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A, 3929 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 3930 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A, 3931 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A, 3932 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A 3933 }; 3934 const UChar* uSource; 3935 const UChar* uSourceLimit; 3936 const char* cSource; 3937 const char* cSourceLimit; 3938 UChar *uTargetLimit =NULL; 3939 UChar *uTarget; 3940 char *cTarget; 3941 const char *cTargetLimit; 3942 char *cBuf; 3943 UChar *uBuf,*test; 3944 int32_t uBufSize = 120; 3945 UErrorCode errorCode=U_ZERO_ERROR; 3946 UConverter *cnv; 3947 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 3948 int32_t* myOff= offsets; 3949 cnv=ucnv_open("ISO_2022_JP_2", &errorCode); 3950 if(U_FAILURE(errorCode)) { 3951 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 3952 return; 3953 } 3954 3955 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 3956 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 3957 uSource = (const UChar*)in; 3958 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 3959 cTarget = cBuf; 3960 cTargetLimit = cBuf +uBufSize*5; 3961 uTarget = uBuf; 3962 uTargetLimit = uBuf+ uBufSize*5; 3963 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 3964 if(U_FAILURE(errorCode)){ 3965 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3966 return; 3967 } 3968 cSource = cBuf; 3969 cSourceLimit =cTarget; 3970 test =uBuf; 3971 myOff=offsets; 3972 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 3973 if(U_FAILURE(errorCode)){ 3974 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 3975 return; 3976 } 3977 uSource = (const UChar*)in; 3978 while(uSource<uSourceLimit){ 3979 if(*test!=*uSource){ 3980 3981 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 3982 } 3983 uSource++; 3984 test++; 3985 } 3986 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3987 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3988 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 3989 /*Test for the condition where there is an invalid character*/ 3990 ucnv_reset(cnv); 3991 { 3992 static const uint8_t source2[]={0x0e,0x24,0x053}; 3993 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]"); 3994 } 3995 ucnv_close(cnv); 3996 free(uBuf); 3997 free(cBuf); 3998 free(offsets); 3999 } 4000 4001 static void 4002 TestISO_2022_KR() { 4003 /* test input */ 4004 static const uint16_t in[]={ 4005 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4006 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4007 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4008 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4009 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4010 ,0x53E3,0x53E4,0x000A,0x000D}; 4011 const UChar* uSource; 4012 const UChar* uSourceLimit; 4013 const char* cSource; 4014 const char* cSourceLimit; 4015 UChar *uTargetLimit =NULL; 4016 UChar *uTarget; 4017 char *cTarget; 4018 const char *cTargetLimit; 4019 char *cBuf; 4020 UChar *uBuf,*test; 4021 int32_t uBufSize = 120; 4022 UErrorCode errorCode=U_ZERO_ERROR; 4023 UConverter *cnv; 4024 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4025 int32_t* myOff= offsets; 4026 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode); 4027 if(U_FAILURE(errorCode)) { 4028 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4029 return; 4030 } 4031 4032 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4033 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4034 uSource = (const UChar*)in; 4035 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4036 cTarget = cBuf; 4037 cTargetLimit = cBuf +uBufSize*5; 4038 uTarget = uBuf; 4039 uTargetLimit = uBuf+ uBufSize*5; 4040 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4041 if(U_FAILURE(errorCode)){ 4042 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4043 return; 4044 } 4045 cSource = cBuf; 4046 cSourceLimit =cTarget; 4047 test =uBuf; 4048 myOff=offsets; 4049 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4050 if(U_FAILURE(errorCode)){ 4051 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4052 return; 4053 } 4054 uSource = (const UChar*)in; 4055 while(uSource<uSourceLimit){ 4056 if(*test!=*uSource){ 4057 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4058 } 4059 uSource++; 4060 test++; 4061 } 4062 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4063 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4064 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4065 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4066 TestJitterbug930("csISO2022KR"); 4067 /*Test for the condition where there is an invalid character*/ 4068 ucnv_reset(cnv); 4069 { 4070 static const uint8_t source2[]={0x1b,0x24,0x053}; 4071 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4072 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4073 } 4074 ucnv_close(cnv); 4075 free(uBuf); 4076 free(cBuf); 4077 free(offsets); 4078 } 4079 4080 static void 4081 TestISO_2022_KR_1() { 4082 /* test input */ 4083 static const uint16_t in[]={ 4084 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D 4085 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 4086 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 4087 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB 4088 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 4089 ,0x53E3,0x53E4,0x000A,0x000D}; 4090 const UChar* uSource; 4091 const UChar* uSourceLimit; 4092 const char* cSource; 4093 const char* cSourceLimit; 4094 UChar *uTargetLimit =NULL; 4095 UChar *uTarget; 4096 char *cTarget; 4097 const char *cTargetLimit; 4098 char *cBuf; 4099 UChar *uBuf,*test; 4100 int32_t uBufSize = 120; 4101 UErrorCode errorCode=U_ZERO_ERROR; 4102 UConverter *cnv; 4103 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4104 int32_t* myOff= offsets; 4105 cnv=ucnv_open("ibm-25546", &errorCode); 4106 if(U_FAILURE(errorCode)) { 4107 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4108 return; 4109 } 4110 4111 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4112 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5); 4113 uSource = (const UChar*)in; 4114 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4115 cTarget = cBuf; 4116 cTargetLimit = cBuf +uBufSize*5; 4117 uTarget = uBuf; 4118 uTargetLimit = uBuf+ uBufSize*5; 4119 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4120 if(U_FAILURE(errorCode)){ 4121 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4122 return; 4123 } 4124 cSource = cBuf; 4125 cSourceLimit =cTarget; 4126 test =uBuf; 4127 myOff=offsets; 4128 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4129 if(U_FAILURE(errorCode)){ 4130 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4131 return; 4132 } 4133 uSource = (const UChar*)in; 4134 while(uSource<uSourceLimit){ 4135 if(*test!=*uSource){ 4136 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ; 4137 } 4138 uSource++; 4139 test++; 4140 } 4141 ucnv_reset(cnv); 4142 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding"); 4143 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4144 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4145 ucnv_reset(cnv); 4146 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4147 /*Test for the condition where there is an invalid character*/ 4148 ucnv_reset(cnv); 4149 { 4150 static const uint8_t source2[]={0x1b,0x24,0x053}; 4151 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 4152 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]"); 4153 } 4154 ucnv_close(cnv); 4155 free(uBuf); 4156 free(cBuf); 4157 free(offsets); 4158 } 4159 4160 static void TestJitterbug2411(){ 4161 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A" 4162 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43"; 4163 UConverter* kr=NULL, *kr1=NULL; 4164 UErrorCode errorCode = U_ZERO_ERROR; 4165 UChar tgt[100]={'\0'}; 4166 UChar* target = tgt; 4167 UChar* targetLimit = target+100; 4168 kr=ucnv_open("iso-2022-kr", &errorCode); 4169 if(U_FAILURE(errorCode)) { 4170 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode)); 4171 return; 4172 } 4173 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4174 if(U_FAILURE(errorCode)) { 4175 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4176 return; 4177 } 4178 kr1 = ucnv_open("ibm-25546", &errorCode); 4179 if(U_FAILURE(errorCode)) { 4180 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode)); 4181 return; 4182 } 4183 target = tgt; 4184 targetLimit = target+100; 4185 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode); 4186 4187 if(U_FAILURE(errorCode)) { 4188 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode)); 4189 return; 4190 } 4191 4192 ucnv_close(kr); 4193 ucnv_close(kr1); 4194 4195 } 4196 4197 static void 4198 TestJIS(){ 4199 /* From Unicode moved to testdata/conversion.txt */ 4200 /*To Unicode*/ 4201 { 4202 static const uint8_t sampleTextJIS[] = { 4203 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/ 4204 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4205 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4206 }; 4207 static const uint16_t expectedISO2022JIS[] = { 4208 0x0041, 0x0042, 4209 0xFF81, 0xFF82, 4210 0x3000 4211 }; 4212 static const int32_t toISO2022JISOffs[]={ 4213 3,4, 4214 8,9, 4215 16 4216 }; 4217 4218 static const uint8_t sampleTextJIS7[] = { 4219 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/ 4220 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/ 4221 0x1b,0x24,0x42,0x21,0x21, 4222 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */ 4223 0x21,0x22, 4224 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/ 4225 }; 4226 static const uint16_t expectedISO2022JIS7[] = { 4227 0x0041, 0x0042, 4228 0xFF81, 0xFF82, 4229 0x3000, 4230 0xFF81, 0xFF82, 4231 0x3001, 4232 0x3000 4233 }; 4234 static const int32_t toISO2022JIS7Offs[]={ 4235 3,4, 4236 8,9, 4237 13,16, 4238 17, 4239 19,27 4240 }; 4241 static const uint8_t sampleTextJIS8[] = { 4242 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/ 4243 0xa1,0xc8,0xd9,/*Katakana Set*/ 4244 0x1b,0x28,0x42, 4245 0x41,0x42, 4246 0xb1,0xc3, /*Katakana Set*/ 4247 0x1b,0x24,0x42,0x21,0x21 4248 }; 4249 static const uint16_t expectedISO2022JIS8[] = { 4250 0x0041, 0x0042, 4251 0xff61, 0xff88, 0xff99, 4252 0x0041, 0x0042, 4253 0xff71, 0xff83, 4254 0x3000 4255 }; 4256 static const int32_t toISO2022JIS8Offs[]={ 4257 3, 4, 5, 6, 4258 7, 11, 12, 13, 4259 14, 18, 4260 }; 4261 4262 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS, 4263 sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE); 4264 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7, 4265 sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE); 4266 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8, 4267 sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE); 4268 } 4269 4270 } 4271 4272 4273 #if 0 4274 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7 4275 4276 static void TestJitterbug915(){ 4277 /* tests for roundtripping of the below sequence 4278 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * / 4279 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * / 4280 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * / 4281 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * / 4282 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * / 4283 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * / 4284 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * / 4285 */ 4286 static const char cSource[]={ 4287 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23, 4288 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23, 4289 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A, 4290 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F, 4291 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21, 4292 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E, 4293 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70, 4294 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A, 4295 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F, 4296 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47, 4297 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 4298 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 4299 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21, 4300 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B, 4301 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4302 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4303 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50, 4304 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F, 4305 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 4306 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 4307 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C, 4308 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F, 4309 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 4310 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B, 4311 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B, 4312 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23, 4313 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 4314 0x37, 0x20, 0x2A, 0x2F 4315 }; 4316 UChar uTarget[500]={'\0'}; 4317 UChar* utarget=uTarget; 4318 UChar* utargetLimit=uTarget+sizeof(uTarget)/2; 4319 4320 char cTarget[500]={'\0'}; 4321 char* ctarget=cTarget; 4322 char* ctargetLimit=cTarget+sizeof(cTarget); 4323 const char* csource=cSource; 4324 const char* tempSrc = cSource; 4325 UErrorCode err=U_ZERO_ERROR; 4326 4327 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err); 4328 if(U_FAILURE(err)) { 4329 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 4330 return; 4331 } 4332 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err); 4333 if(U_FAILURE(err)) { 4334 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err)); 4335 return; 4336 } 4337 utargetLimit=utarget; 4338 utarget = uTarget; 4339 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err); 4340 if(U_FAILURE(err)) { 4341 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err)); 4342 return; 4343 } 4344 ctargetLimit=ctarget; 4345 ctarget =cTarget; 4346 while(ctarget<ctargetLimit){ 4347 if(*ctarget != *tempSrc){ 4348 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ; 4349 } 4350 ++ctarget; 4351 ++tempSrc; 4352 } 4353 4354 ucnv_close(conv); 4355 } 4356 4357 static void 4358 TestISO_2022_CN_EXT() { 4359 /* test input */ 4360 static const uint16_t in[]={ 4361 /* test Non-BMP code points */ 4362 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F, 4363 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8, 4364 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF, 4365 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6, 4366 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB, 4367 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0, 4368 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8, 4369 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF, 4370 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4, 4371 0xD869, 0xDED5, 4372 4373 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A, 4374 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A, 4375 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A, 4376 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4377 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4378 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4379 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4380 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4381 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4382 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4383 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4384 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4385 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4386 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A, 4387 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 4388 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A, 4389 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A, 4390 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A, 4391 4392 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A 4393 4394 }; 4395 4396 const UChar* uSource; 4397 const UChar* uSourceLimit; 4398 const char* cSource; 4399 const char* cSourceLimit; 4400 UChar *uTargetLimit =NULL; 4401 UChar *uTarget; 4402 char *cTarget; 4403 const char *cTargetLimit; 4404 char *cBuf; 4405 UChar *uBuf,*test; 4406 int32_t uBufSize = 180; 4407 UErrorCode errorCode=U_ZERO_ERROR; 4408 UConverter *cnv; 4409 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4410 int32_t* myOff= offsets; 4411 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode); 4412 if(U_FAILURE(errorCode)) { 4413 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4414 return; 4415 } 4416 4417 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4418 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4419 uSource = (const UChar*)in; 4420 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4421 cTarget = cBuf; 4422 cTargetLimit = cBuf +uBufSize*5; 4423 uTarget = uBuf; 4424 uTargetLimit = uBuf+ uBufSize*5; 4425 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4426 if(U_FAILURE(errorCode)){ 4427 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4428 return; 4429 } 4430 cSource = cBuf; 4431 cSourceLimit =cTarget; 4432 test =uBuf; 4433 myOff=offsets; 4434 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4435 if(U_FAILURE(errorCode)){ 4436 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4437 return; 4438 } 4439 uSource = (const UChar*)in; 4440 while(uSource<uSourceLimit){ 4441 if(*test!=*uSource){ 4442 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4443 } 4444 else{ 4445 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4446 } 4447 uSource++; 4448 test++; 4449 } 4450 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4451 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4452 /*Test for the condition where there is an invalid character*/ 4453 ucnv_reset(cnv); 4454 { 4455 static const uint8_t source2[]={0x0e,0x24,0x053}; 4456 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]"); 4457 } 4458 ucnv_close(cnv); 4459 free(uBuf); 4460 free(cBuf); 4461 free(offsets); 4462 } 4463 #endif 4464 4465 static void 4466 TestISO_2022_CN() { 4467 /* test input */ 4468 static const uint16_t in[]={ 4469 /* jitterbug 951 */ 4470 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52, 4471 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17, 4472 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52, 4473 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45, 4474 0x0020, 0x0045, 0x004e, 0x0044, 4475 /**/ 4476 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A, 4477 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A, 4478 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A, 4479 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A, 4480 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A, 4481 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A, 4482 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A, 4483 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A, 4484 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A, 4485 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A, 4486 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A, 4487 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A, 4488 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A, 4489 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A, 4490 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A, 4491 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486, 4492 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A, 4493 4494 }; 4495 const UChar* uSource; 4496 const UChar* uSourceLimit; 4497 const char* cSource; 4498 const char* cSourceLimit; 4499 UChar *uTargetLimit =NULL; 4500 UChar *uTarget; 4501 char *cTarget; 4502 const char *cTargetLimit; 4503 char *cBuf; 4504 UChar *uBuf,*test; 4505 int32_t uBufSize = 180; 4506 UErrorCode errorCode=U_ZERO_ERROR; 4507 UConverter *cnv; 4508 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5); 4509 int32_t* myOff= offsets; 4510 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode); 4511 if(U_FAILURE(errorCode)) { 4512 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode)); 4513 return; 4514 } 4515 4516 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5); 4517 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10); 4518 uSource = (const UChar*)in; 4519 uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0])); 4520 cTarget = cBuf; 4521 cTargetLimit = cBuf +uBufSize*5; 4522 uTarget = uBuf; 4523 uTargetLimit = uBuf+ uBufSize*5; 4524 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode); 4525 if(U_FAILURE(errorCode)){ 4526 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4527 return; 4528 } 4529 cSource = cBuf; 4530 cSourceLimit =cTarget; 4531 test =uBuf; 4532 myOff=offsets; 4533 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode); 4534 if(U_FAILURE(errorCode)){ 4535 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode)); 4536 return; 4537 } 4538 uSource = (const UChar*)in; 4539 while(uSource<uSourceLimit){ 4540 if(*test!=*uSource){ 4541 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ; 4542 } 4543 else{ 4544 log_verbose(" Got: \\u%04X\n",(int)*test) ; 4545 } 4546 uSource++; 4547 test++; 4548 } 4549 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding"); 4550 TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4551 TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4552 TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv); 4553 TestJitterbug930("csISO2022CN"); 4554 /*Test for the condition where there is an invalid character*/ 4555 ucnv_reset(cnv); 4556 { 4557 static const uint8_t source2[]={0x0e,0x24,0x053}; 4558 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]"); 4559 } 4560 4561 ucnv_close(cnv); 4562 free(uBuf); 4563 free(cBuf); 4564 free(offsets); 4565 } 4566 4567 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */ 4568 typedef struct { 4569 const char * converterName; 4570 const char * inputText; 4571 int inputTextLength; 4572 } EmptySegmentTest; 4573 4574 /* Callback for TestJitterbug6175, should only get called for empty segment errors */ 4575 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits, 4576 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) { 4577 if (reason > UCNV_IRREGULAR) { 4578 return; 4579 } 4580 if (reason != UCNV_IRREGULAR) { 4581 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n"); 4582 } 4583 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */ 4584 *err = U_ZERO_ERROR; 4585 ucnv_cbToUWriteSub(toArgs,0,err); 4586 } 4587 4588 enum { kEmptySegmentToUCharsMax = 64 }; 4589 static void TestJitterbug6175(void) { 4590 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A }; 4591 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A }; 4592 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A }; 4593 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A }; 4594 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 }; 4595 static const EmptySegmentTest emptySegmentTests[] = { 4596 /* converterName inputText inputTextLength */ 4597 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) }, 4598 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) }, 4599 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) }, 4600 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) }, 4601 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) }, 4602 /* terminator: */ 4603 { NULL, NULL, 0, } 4604 }; 4605 const EmptySegmentTest * testPtr; 4606 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) { 4607 UErrorCode err = U_ZERO_ERROR; 4608 UConverter * cnv = ucnv_open(testPtr->converterName, &err); 4609 if (U_FAILURE(err)) { 4610 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4611 return; 4612 } 4613 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err); 4614 if (U_FAILURE(err)) { 4615 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err)); 4616 ucnv_close(cnv); 4617 return; 4618 } 4619 { 4620 UChar toUChars[kEmptySegmentToUCharsMax]; 4621 UChar * toUCharsPtr = toUChars; 4622 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax; 4623 const char * inCharsPtr = testPtr->inputText; 4624 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength; 4625 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err); 4626 } 4627 ucnv_close(cnv); 4628 } 4629 } 4630 4631 static void 4632 TestEBCDIC_STATEFUL() { 4633 /* test input */ 4634 static const uint8_t in[]={ 4635 0x61, 4636 0x1a, 4637 0x0f, 0x4b, 4638 0x42, 4639 0x40, 4640 0x36, 4641 }; 4642 4643 /* expected test results */ 4644 static const int32_t results[]={ 4645 /* number of bytes read, code point */ 4646 1, 0x002f, 4647 1, 0x0092, 4648 2, 0x002e, 4649 1, 0xff62, 4650 1, 0x0020, 4651 1, 0x0096, 4652 4653 }; 4654 static const uint8_t in2[]={ 4655 0x0f, 4656 0xa1, 4657 0x01 4658 }; 4659 4660 /* expected test results */ 4661 static const int32_t results2[]={ 4662 /* number of bytes read, code point */ 4663 2, 0x203E, 4664 1, 0x0001, 4665 }; 4666 4667 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 4668 UErrorCode errorCode=U_ZERO_ERROR; 4669 UConverter *cnv=ucnv_open("ibm-930", &errorCode); 4670 if(U_FAILURE(errorCode)) { 4671 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode)); 4672 return; 4673 } 4674 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)"); 4675 ucnv_reset(cnv); 4676 /* Test the condition when source >= sourceLimit */ 4677 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); 4678 ucnv_reset(cnv); 4679 /*Test for the condition where source > sourcelimit after consuming the shift chracter */ 4680 { 4681 static const uint8_t source1[]={0x0f}; 4682 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated"); 4683 } 4684 /*Test for the condition where there is an invalid character*/ 4685 ucnv_reset(cnv); 4686 { 4687 static const uint8_t source2[]={0x0e, 0x7F, 0xFF}; 4688 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]"); 4689 } 4690 ucnv_reset(cnv); 4691 source=(const char*)in2; 4692 limit=(const char*)in2+sizeof(in2); 4693 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2"); 4694 ucnv_close(cnv); 4695 4696 } 4697 4698 static void 4699 TestGB18030() { 4700 /* test input */ 4701 static const uint8_t in[]={ 4702 0x24, 4703 0x7f, 4704 0x81, 0x30, 0x81, 0x30, 4705 0xa8, 0xbf, 4706 0xa2, 0xe3, 4707 0xd2, 0xbb, 4708 0x82, 0x35, 0x8f, 0x33, 4709 0x84, 0x31, 0xa4, 0x39, 4710 0x90, 0x30, 0x81, 0x30, 4711 0xe3, 0x32, 0x9a, 0x35 4712 #if 0 4713 /* 4714 * Feature removed markus 2000-oct-26 4715 * Only some codepages must match surrogate pairs into supplementary code points - 4716 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c . 4717 * GB 18030 provides direct encodings for supplementary code points, therefore 4718 * it must not combine two single-encoded surrogates into one code point. 4719 */ 4720 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */ 4721 #endif 4722 }; 4723 4724 /* expected test results */ 4725 static const int32_t results[]={ 4726 /* number of bytes read, code point */ 4727 1, 0x24, 4728 1, 0x7f, 4729 4, 0x80, 4730 2, 0x1f9, 4731 2, 0x20ac, 4732 2, 0x4e00, 4733 4, 0x9fa6, 4734 4, 0xffff, 4735 4, 0x10000, 4736 4, 0x10ffff 4737 #if 0 4738 /* Feature removed. See comment above. */ 4739 8, 0x10000 4740 #endif 4741 }; 4742 4743 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */ 4744 UErrorCode errorCode=U_ZERO_ERROR; 4745 UConverter *cnv=ucnv_open("gb18030", &errorCode); 4746 if(U_FAILURE(errorCode)) { 4747 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode)); 4748 return; 4749 } 4750 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030"); 4751 ucnv_close(cnv); 4752 } 4753 4754 static void 4755 TestLMBCS() { 4756 /* LMBCS-1 string */ 4757 static const uint8_t pszLMBCS[]={ 4758 0x61, 4759 0x01, 0x29, 4760 0x81, 4761 0xA0, 4762 0x0F, 0x27, 4763 0x0F, 0x91, 4764 0x14, 0x0a, 0x74, 4765 0x14, 0xF6, 0x02, 4766 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */ 4767 0x10, 0x88, 0xA0, 4768 }; 4769 4770 /* Unicode UChar32 equivalents */ 4771 static const UChar32 pszUnicode32[]={ 4772 /* code point */ 4773 0x00000061, 4774 0x00002013, 4775 0x000000FC, 4776 0x000000E1, 4777 0x00000007, 4778 0x00000091, 4779 0x00000a74, 4780 0x00000200, 4781 0x00023456, /* code point for surrogate pair */ 4782 0x00005516 4783 }; 4784 4785 /* Unicode UChar equivalents */ 4786 static const UChar pszUnicode[]={ 4787 /* code point */ 4788 0x0061, 4789 0x2013, 4790 0x00FC, 4791 0x00E1, 4792 0x0007, 4793 0x0091, 4794 0x0a74, 4795 0x0200, 4796 0xD84D, /* low surrogate */ 4797 0xDC56, /* high surrogate */ 4798 0x5516 4799 }; 4800 4801 /* expected test results */ 4802 static const int offsets32[]={ 4803 /* number of bytes read, code point */ 4804 0, 4805 1, 4806 3, 4807 4, 4808 5, 4809 7, 4810 9, 4811 12, 4812 15, 4813 21, 4814 24 4815 }; 4816 4817 /* expected test results */ 4818 static const int offsets[]={ 4819 /* number of bytes read, code point */ 4820 0, 4821 1, 4822 3, 4823 4, 4824 5, 4825 7, 4826 9, 4827 12, 4828 15, 4829 18, 4830 21, 4831 24 4832 }; 4833 4834 4835 UConverter *cnv; 4836 4837 #define NAME_LMBCS_1 "LMBCS-1" 4838 #define NAME_LMBCS_2 "LMBCS-2" 4839 4840 4841 /* Some basic open/close/property tests on some LMBCS converters */ 4842 { 4843 4844 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */ 4845 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/ 4846 char get_subchars [1]; 4847 const char * get_name; 4848 UConverter *cnv1; 4849 UConverter *cnv2; 4850 4851 int8_t len = sizeof(get_subchars); 4852 4853 UErrorCode errorCode=U_ZERO_ERROR; 4854 4855 /* Open */ 4856 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode); 4857 if(U_FAILURE(errorCode)) { 4858 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4859 return; 4860 } 4861 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode); 4862 if(U_FAILURE(errorCode)) { 4863 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode)); 4864 return; 4865 } 4866 4867 /* Name */ 4868 get_name = ucnv_getName (cnv1, &errorCode); 4869 if (strcmp(NAME_LMBCS_1,get_name)){ 4870 log_err("Unexpected converter name: %s\n", get_name); 4871 } 4872 get_name = ucnv_getName (cnv2, &errorCode); 4873 if (strcmp(NAME_LMBCS_2,get_name)){ 4874 log_err("Unexpected converter name: %s\n", get_name); 4875 } 4876 4877 /* substitution chars */ 4878 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode); 4879 if(U_FAILURE(errorCode)) { 4880 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4881 } 4882 if (len!=1){ 4883 log_err("Unexpected length of sub chars\n"); 4884 } 4885 if (get_subchars[0] != expected_subchars[0]){ 4886 log_err("Unexpected value of sub chars\n"); 4887 } 4888 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode); 4889 if(U_FAILURE(errorCode)) { 4890 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode)); 4891 } 4892 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode); 4893 if(U_FAILURE(errorCode)) { 4894 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode)); 4895 } 4896 if (len!=1){ 4897 log_err("Unexpected length of sub chars\n"); 4898 } 4899 if (get_subchars[0] != new_subchars[0]){ 4900 log_err("Unexpected value of sub chars\n"); 4901 } 4902 ucnv_close(cnv1); 4903 ucnv_close(cnv2); 4904 4905 } 4906 4907 /* LMBCS to Unicode - offsets */ 4908 { 4909 UErrorCode errorCode=U_ZERO_ERROR; 4910 4911 const char * pSource = (const char *)pszLMBCS; 4912 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 4913 4914 UChar Out [sizeof(pszUnicode) + 1]; 4915 UChar * pOut = Out; 4916 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 4917 4918 int32_t off [sizeof(offsets)]; 4919 4920 /* last 'offset' in expected results is just the final size. 4921 (Makes other tests easier). Compensate here: */ 4922 4923 off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS); 4924 4925 4926 4927 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */ 4928 if(U_FAILURE(errorCode)) { 4929 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode)); 4930 return; 4931 } 4932 4933 4934 4935 ucnv_toUnicode (cnv, 4936 &pOut, 4937 OutLimit, 4938 &pSource, 4939 sourceLimit, 4940 off, 4941 TRUE, 4942 &errorCode); 4943 4944 4945 if (memcmp(off,offsets,sizeof(offsets))) 4946 { 4947 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n"); 4948 } 4949 if (memcmp(Out,pszUnicode,sizeof(pszUnicode))) 4950 { 4951 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n"); 4952 } 4953 ucnv_close(cnv); 4954 } 4955 { 4956 /* LMBCS to Unicode - getNextUChar */ 4957 const char * sourceStart; 4958 const char *source=(const char *)pszLMBCS; 4959 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS); 4960 const UChar32 *results= pszUnicode32; 4961 const int *off = offsets32; 4962 4963 UErrorCode errorCode=U_ZERO_ERROR; 4964 UChar32 uniChar; 4965 4966 cnv=ucnv_open("LMBCS-1", &errorCode); 4967 if(U_FAILURE(errorCode)) { 4968 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 4969 return; 4970 } 4971 else 4972 { 4973 4974 while(source<limit) { 4975 sourceStart=source; 4976 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode); 4977 if(U_FAILURE(errorCode)) { 4978 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode)); 4979 break; 4980 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) { 4981 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n", 4982 uniChar, (source-sourceStart), *results, *off); 4983 break; 4984 } 4985 results++; 4986 off++; 4987 } 4988 } 4989 ucnv_close(cnv); 4990 } 4991 { /* test locale & optimization group operations: Unicode to LMBCS */ 4992 4993 UErrorCode errorCode=U_ZERO_ERROR; 4994 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode); 4995 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode); 4996 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode); 4997 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */ 4998 const UChar * pUniOut = uniString; 4999 UChar * pUniIn = uniString; 5000 uint8_t lmbcsString [4]; 5001 const char * pLMBCSOut = (const char *)lmbcsString; 5002 char * pLMBCSIn = (char *)lmbcsString; 5003 5004 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */ 5005 ucnv_fromUnicode (cnv16he, 5006 &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5007 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5008 NULL, 1, &errorCode); 5009 5010 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83) 5011 { 5012 log_err("LMBCS-16,locale=he gives unexpected translation\n"); 5013 } 5014 5015 pLMBCSIn= (char *)lmbcsString; 5016 pUniOut = uniString; 5017 ucnv_fromUnicode (cnv01us, 5018 &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])), 5019 &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]), 5020 NULL, 1, &errorCode); 5021 5022 if (lmbcsString[0] != 0x9F) 5023 { 5024 log_err("LMBCS-1,locale=US gives unexpected translation\n"); 5025 } 5026 5027 /* single byte char from mbcs char set */ 5028 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */ 5029 pLMBCSOut = (const char *)lmbcsString; 5030 pUniIn = uniString; 5031 ucnv_toUnicode (cnv16jp, 5032 &pUniIn, pUniIn + 1, 5033 &pLMBCSOut, (pLMBCSOut + 1), 5034 NULL, 1, &errorCode); 5035 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5036 { 5037 log_err("Unexpected results from LMBCS-16 single byte char\n"); 5038 } 5039 /* convert to group 1: should be 3 bytes */ 5040 pLMBCSIn = (char *)lmbcsString; 5041 pUniOut = uniString; 5042 ucnv_fromUnicode (cnv01us, 5043 &pLMBCSIn, (const char *)(pLMBCSIn + 3), 5044 &pUniOut, pUniOut + 1, 5045 NULL, 1, &errorCode); 5046 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1 5047 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE) 5048 { 5049 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n"); 5050 } 5051 pLMBCSOut = (const char *)lmbcsString; 5052 pUniIn = uniString; 5053 ucnv_toUnicode (cnv01us, 5054 &pUniIn, pUniIn + 1, 5055 &pLMBCSOut, (const char *)(pLMBCSOut + 3), 5056 NULL, 1, &errorCode); 5057 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E) 5058 { 5059 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n"); 5060 } 5061 pLMBCSIn = (char *)lmbcsString; 5062 pUniOut = uniString; 5063 ucnv_fromUnicode (cnv16jp, 5064 &pLMBCSIn, (const char *)(pLMBCSIn + 1), 5065 &pUniOut, pUniOut + 1, 5066 NULL, 1, &errorCode); 5067 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE) 5068 { 5069 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n"); 5070 } 5071 ucnv_close(cnv16he); 5072 ucnv_close(cnv16jp); 5073 ucnv_close(cnv01us); 5074 } 5075 { 5076 /* Small source buffer testing, LMBCS -> Unicode */ 5077 5078 UErrorCode errorCode=U_ZERO_ERROR; 5079 5080 const char * pSource = (const char *)pszLMBCS; 5081 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS); 5082 int codepointCount = 0; 5083 5084 UChar Out [sizeof(pszUnicode) + 1]; 5085 UChar * pOut = Out; 5086 UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar); 5087 5088 5089 cnv = ucnv_open(NAME_LMBCS_1, &errorCode); 5090 if(U_FAILURE(errorCode)) { 5091 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode)); 5092 return; 5093 } 5094 5095 5096 while ((pSource < sourceLimit) && U_SUCCESS (errorCode)) 5097 { 5098 ucnv_toUnicode (cnv, 5099 &pOut, 5100 OutLimit, 5101 &pSource, 5102 (pSource+1), /* claim that this is a 1- byte buffer */ 5103 NULL, 5104 FALSE, /* FALSE means there might be more chars in the next buffer */ 5105 &errorCode); 5106 5107 if (U_SUCCESS (errorCode)) 5108 { 5109 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1]) 5110 { 5111 /* we are on to the next code point: check value */ 5112 5113 if (Out[0] != pszUnicode[codepointCount]){ 5114 log_err("LMBCS->Uni result %lx should have been %lx \n", 5115 Out[0], pszUnicode[codepointCount]); 5116 } 5117 5118 pOut = Out; /* reset for accumulating next code point */ 5119 codepointCount++; 5120 } 5121 } 5122 else 5123 { 5124 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode)); 5125 } 5126 } 5127 { 5128 /* limits & surrogate error testing */ 5129 char LIn [sizeof(pszLMBCS)]; 5130 const char * pLIn = LIn; 5131 5132 char LOut [sizeof(pszLMBCS)]; 5133 char * pLOut = LOut; 5134 5135 UChar UOut [sizeof(pszUnicode)]; 5136 UChar * pUOut = UOut; 5137 5138 UChar UIn [sizeof(pszUnicode)]; 5139 const UChar * pUIn = UIn; 5140 5141 int32_t off [sizeof(offsets)]; 5142 UChar32 uniChar; 5143 5144 errorCode=U_ZERO_ERROR; 5145 5146 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */ 5147 pUIn++; 5148 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode); 5149 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5150 { 5151 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode)); 5152 } 5153 pUIn--; 5154 5155 errorCode=U_ZERO_ERROR; 5156 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode); 5157 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5158 { 5159 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode)); 5160 } 5161 errorCode=U_ZERO_ERROR; 5162 5163 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode); 5164 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) 5165 { 5166 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode)); 5167 } 5168 errorCode=U_ZERO_ERROR; 5169 5170 /* 0 byte source request - no error, no pointer movement */ 5171 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode); 5172 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode); 5173 if(U_FAILURE(errorCode)) { 5174 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode)); 5175 } 5176 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn)) 5177 { 5178 log_err("Unexpected pointer move in 0 byte source request \n"); 5179 } 5180 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */ 5181 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode); 5182 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR) 5183 { 5184 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode)); 5185 } 5186 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */ 5187 { 5188 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n"); 5189 } 5190 errorCode = U_ZERO_ERROR; 5191 5192 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */ 5193 5194 pUIn = pszUnicode; 5195 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode); 5196 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 ) 5197 { 5198 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n"); 5199 } 5200 5201 errorCode = U_ZERO_ERROR; 5202 5203 pLIn = (const char *)pszLMBCS; 5204 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode); 5205 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4]) 5206 { 5207 log_err("Unexpected results on out of target room to ucnv_toUnicode\n"); 5208 } 5209 5210 /* unpaired or chopped LMBCS surrogates */ 5211 5212 /* OK high surrogate, Low surrogate is chopped */ 5213 LIn [0] = (char)0x14; 5214 LIn [1] = (char)0xD8; 5215 LIn [2] = (char)0x01; 5216 LIn [3] = (char)0x14; 5217 LIn [4] = (char)0xDC; 5218 pLIn = LIn; 5219 errorCode = U_ZERO_ERROR; 5220 pUOut = UOut; 5221 5222 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 5223 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5224 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5225 { 5226 log_err("Unexpected results on chopped low surrogate\n"); 5227 } 5228 5229 /* chopped at surrogate boundary */ 5230 LIn [0] = (char)0x14; 5231 LIn [1] = (char)0xD8; 5232 LIn [2] = (char)0x01; 5233 pLIn = LIn; 5234 errorCode = U_ZERO_ERROR; 5235 pUOut = UOut; 5236 5237 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode); 5238 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3) 5239 { 5240 log_err("Unexpected results on chopped at surrogate boundary \n"); 5241 } 5242 5243 /* unpaired surrogate plus valid Unichar */ 5244 LIn [0] = (char)0x14; 5245 LIn [1] = (char)0xD8; 5246 LIn [2] = (char)0x01; 5247 LIn [3] = (char)0x14; 5248 LIn [4] = (char)0xC9; 5249 LIn [5] = (char)0xD0; 5250 pLIn = LIn; 5251 errorCode = U_ZERO_ERROR; 5252 pUOut = UOut; 5253 5254 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode); 5255 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6) 5256 { 5257 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n"); 5258 } 5259 5260 /* unpaired surrogate plus chopped Unichar */ 5261 LIn [0] = (char)0x14; 5262 LIn [1] = (char)0xD8; 5263 LIn [2] = (char)0x01; 5264 LIn [3] = (char)0x14; 5265 LIn [4] = (char)0xC9; 5266 5267 pLIn = LIn; 5268 errorCode = U_ZERO_ERROR; 5269 pUOut = UOut; 5270 5271 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5272 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5) 5273 { 5274 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n"); 5275 } 5276 5277 /* unpaired surrogate plus valid non-Unichar */ 5278 LIn [0] = (char)0x14; 5279 LIn [1] = (char)0xD8; 5280 LIn [2] = (char)0x01; 5281 LIn [3] = (char)0x0F; 5282 LIn [4] = (char)0x3B; 5283 5284 pLIn = LIn; 5285 errorCode = U_ZERO_ERROR; 5286 pUOut = UOut; 5287 5288 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode); 5289 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5) 5290 { 5291 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n"); 5292 } 5293 5294 /* unpaired surrogate plus chopped non-Unichar */ 5295 LIn [0] = (char)0x14; 5296 LIn [1] = (char)0xD8; 5297 LIn [2] = (char)0x01; 5298 LIn [3] = (char)0x0F; 5299 5300 pLIn = LIn; 5301 errorCode = U_ZERO_ERROR; 5302 pUOut = UOut; 5303 5304 ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode); 5305 5306 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4) 5307 { 5308 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n"); 5309 } 5310 } 5311 } 5312 ucnv_close(cnv); /* final cleanup */ 5313 } 5314 5315 5316 static void TestJitterbug255() 5317 { 5318 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 }; 5319 const char *testBuffer = (const char *)testBytes; 5320 const char *testEnd = (const char *)testBytes + sizeof(testBytes); 5321 UErrorCode status = U_ZERO_ERROR; 5322 /*UChar32 result;*/ 5323 UConverter *cnv = 0; 5324 5325 cnv = ucnv_open("shift-jis", &status); 5326 if (U_FAILURE(status) || cnv == 0) { 5327 log_data_err("Failed to open the converter for SJIS.\n"); 5328 return; 5329 } 5330 while (testBuffer != testEnd) 5331 { 5332 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status); 5333 if (U_FAILURE(status)) 5334 { 5335 log_err("Failed to convert the next UChar for SJIS.\n"); 5336 break; 5337 } 5338 } 5339 ucnv_close(cnv); 5340 } 5341 5342 static void TestEBCDICUS4XML() 5343 { 5344 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000}; 5345 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000}; 5346 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00}; 5347 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00}; 5348 char target_x[] = {0x00, 0x00, 0x00, 0x00}; 5349 UChar *unicodes = unicodes_x; 5350 const UChar *toUnicodeMaps = toUnicodeMaps_x; 5351 char *target = target_x; 5352 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x; 5353 UErrorCode status = U_ZERO_ERROR; 5354 UConverter *cnv = 0; 5355 5356 cnv = ucnv_open("ebcdic-xml-us", &status); 5357 if (U_FAILURE(status) || cnv == 0) { 5358 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n"); 5359 return; 5360 } 5361 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status); 5362 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) { 5363 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n", 5364 u_errorName(status)); 5365 printUSeqErr(unicodes_x, 3); 5366 printUSeqErr(toUnicodeMaps, 3); 5367 } 5368 status = U_ZERO_ERROR; 5369 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status); 5370 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) { 5371 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n", 5372 u_errorName(status)); 5373 printSeqErr((const unsigned char*)target_x, 3); 5374 printSeqErr((const unsigned char*)fromUnicodeMaps, 3); 5375 } 5376 ucnv_close(cnv); 5377 } 5378 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */ 5379 5380 #if !UCONFIG_NO_COLLATION 5381 5382 static void TestJitterbug981(){ 5383 const UChar* rules; 5384 int32_t rules_length, target_cap, bytes_needed, buff_size; 5385 UErrorCode status = U_ZERO_ERROR; 5386 UConverter *utf8cnv; 5387 UCollator* myCollator; 5388 char *buff; 5389 int numNeeded=0; 5390 utf8cnv = ucnv_open ("utf8", &status); 5391 if(U_FAILURE(status)){ 5392 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status)); 5393 return; 5394 } 5395 myCollator = ucol_open("zh", &status); 5396 if(U_FAILURE(status)){ 5397 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status)); 5398 ucnv_close(utf8cnv); 5399 return; 5400 } 5401 5402 rules = ucol_getRules(myCollator, &rules_length); 5403 if(rules_length == 0) { 5404 log_data_err("missing zh tailoring rule string\n"); 5405 ucol_close(myCollator); 5406 ucnv_close(utf8cnv); 5407 return; 5408 } 5409 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv); 5410 buff = malloc(buff_size); 5411 5412 target_cap = 0; 5413 do { 5414 ucnv_reset(utf8cnv); 5415 status = U_ZERO_ERROR; 5416 if(target_cap >= buff_size) { 5417 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size); 5418 break; 5419 } 5420 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap, 5421 rules, rules_length, &status); 5422 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5423 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5424 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5425 break; 5426 } 5427 numNeeded = bytes_needed; 5428 } while (status == U_BUFFER_OVERFLOW_ERROR); 5429 ucol_close(myCollator); 5430 ucnv_close(utf8cnv); 5431 free(buff); 5432 } 5433 5434 #endif 5435 5436 #if !UCONFIG_NO_LEGACY_CONVERSION 5437 static void TestJitterbug1293(){ 5438 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000}; 5439 char target[256]; 5440 UErrorCode status = U_ZERO_ERROR; 5441 UConverter* conv=NULL; 5442 int32_t target_cap, bytes_needed, numNeeded = 0; 5443 conv = ucnv_open("shift-jis",&status); 5444 if(U_FAILURE(status)){ 5445 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status)); 5446 return; 5447 } 5448 5449 do{ 5450 target_cap =0; 5451 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status); 5452 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1; 5453 if(numNeeded!=0 && numNeeded!= bytes_needed){ 5454 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes"); 5455 } 5456 numNeeded = bytes_needed; 5457 } while (status == U_BUFFER_OVERFLOW_ERROR); 5458 if(U_FAILURE(status)){ 5459 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status)); 5460 return; 5461 } 5462 ucnv_close(conv); 5463 } 5464 #endif 5465 5466 static void TestJB5275_1(){ 5467 5468 static const char* data = "\x3B\xB3\x0A" /* Easy characters */ 5469 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5470 /* Switch script: */ 5471 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */ 5472 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/ 5473 "\xEF\x40\x3B\xB3\x0A"; 5474 static const UChar expected[] ={ 5475 0x003b, 0x0a15, 0x000a, /* Easy characters */ 5476 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */ 5477 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/ 5478 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/ 5479 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/ 5480 }; 5481 5482 UErrorCode status = U_ZERO_ERROR; 5483 UConverter* conv = ucnv_open("iscii-gur", &status); 5484 UChar dest[100] = {'\0'}; 5485 UChar* target = dest; 5486 UChar* targetLimit = dest+100; 5487 const char* source = data; 5488 const char* sourceLimit = data+strlen(data); 5489 const UChar* exp = expected; 5490 5491 if (U_FAILURE(status)) { 5492 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status)); 5493 return; 5494 } 5495 5496 log_verbose("Testing switching back to default script when new line is encountered.\n"); 5497 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5498 if(U_FAILURE(status)){ 5499 log_err("conversion failed: %s \n", u_errorName(status)); 5500 } 5501 targetLimit = target; 5502 target = dest; 5503 printUSeq(target, targetLimit-target); 5504 while(target<targetLimit){ 5505 if(*exp!=*target){ 5506 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5507 } 5508 target++; 5509 exp++; 5510 } 5511 ucnv_close(conv); 5512 } 5513 5514 static void TestJB5275(){ 5515 static const char* data = 5516 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */ 5517 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */ 5518 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */ 5519 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */ 5520 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */ 5521 "\xEF\x48\x38\xB3\x0A" /* Kannada test */ 5522 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */ 5523 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */ 5524 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */ 5525 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */; 5526 static const UChar expected[] ={ 5527 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */ 5528 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */ 5529 0x0038, 0x0C95, 0x000A, /* Kannada test */ 5530 0x0039, 0x0D15, 0x000A, /* Malayalam test */ 5531 0x003A, 0x0A95, 0x000A, /* Gujarati test */ 5532 0x003B, 0x0A15, 0x000A, /* Punjabi test */ 5533 }; 5534 5535 UErrorCode status = U_ZERO_ERROR; 5536 UConverter* conv = ucnv_open("iscii", &status); 5537 UChar dest[100] = {'\0'}; 5538 UChar* target = dest; 5539 UChar* targetLimit = dest+100; 5540 const char* source = data; 5541 const char* sourceLimit = data+strlen(data); 5542 const UChar* exp = expected; 5543 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status); 5544 if(U_FAILURE(status)){ 5545 log_data_err("conversion failed: %s \n", u_errorName(status)); 5546 } 5547 targetLimit = target; 5548 target = dest; 5549 5550 printUSeq(target, targetLimit-target); 5551 5552 while(target<targetLimit){ 5553 if(*exp!=*target){ 5554 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target); 5555 } 5556 target++; 5557 exp++; 5558 } 5559 ucnv_close(conv); 5560 } 5561 5562 static void 5563 TestIsFixedWidth() { 5564 UErrorCode status = U_ZERO_ERROR; 5565 UConverter *cnv = NULL; 5566 int32_t i; 5567 5568 const char *fixedWidth[] = { 5569 "US-ASCII", 5570 "UTF32", 5571 "ibm-5478_P100-1995" 5572 }; 5573 5574 const char *notFixedWidth[] = { 5575 "GB18030", 5576 "UTF8", 5577 "windows-949-2000", 5578 "UTF16" 5579 }; 5580 5581 for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) { 5582 cnv = ucnv_open(fixedWidth[i], &status); 5583 if (cnv == NULL || U_FAILURE(status)) { 5584 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status)); 5585 continue; 5586 } 5587 5588 if (!ucnv_isFixedWidth(cnv, &status)) { 5589 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]); 5590 } 5591 ucnv_close(cnv); 5592 } 5593 5594 for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) { 5595 cnv = ucnv_open(notFixedWidth[i], &status); 5596 if (cnv == NULL || U_FAILURE(status)) { 5597 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status)); 5598 continue; 5599 } 5600 5601 if (ucnv_isFixedWidth(cnv, &status)) { 5602 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]); 5603 } 5604 ucnv_close(cnv); 5605 } 5606 } 5607