1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2007, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ***************************************************************************/ 6 /***************************************************************************** 7 * 8 * File NCNVCBTS 9 * 10 * Modification History: 11 * Name Date Description 12 * Madhu Katragadda 06/23/2000 Tests for Conveter FallBack API and Functionality 13 ****************************************************************************** 14 */ 15 #include <stdio.h> 16 #include "unicode/uloc.h" 17 #include "unicode/ucnv.h" 18 #include "unicode/ucnv_err.h" 19 #include "cintltst.h" 20 #include "unicode/utypes.h" 21 #include "unicode/ustring.h" 22 #include "ncnvfbts.h" 23 #include "cmemory.h" 24 #include "cstring.h" 25 26 #if !UCONFIG_NO_LEGACY_CONVERSION 27 #define NEW_MAX_BUFFER 999 28 29 30 #define nct_min(x,y) ((x<y) ? x : y) 31 32 static int32_t gInBufferSize = 0; 33 static int32_t gOutBufferSize = 0; 34 static char gNuConvTestName[1024]; 35 36 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err) 37 { 38 if(cnv && cnv[0] == '@') { 39 return ucnv_openPackage("testdata", cnv+1, err); 40 } else { 41 return ucnv_open(cnv, err); 42 } 43 } 44 45 46 static void printSeq(const unsigned char* a, int len) 47 { 48 int i=0; 49 log_verbose("{"); 50 while (i<len) 51 log_verbose("0x%02x ", a[i++]); 52 log_verbose("}\n"); 53 } 54 55 static void printUSeq(const UChar* a, int len) 56 { 57 int i=0; 58 log_verbose("{U+"); 59 while (i<len) 60 log_verbose("0x%04x ", a[i++]); 61 log_verbose("}\n"); 62 } 63 64 static void printSeqErr(const unsigned char* a, int len) 65 { 66 int i=0; 67 fprintf(stderr, "{"); 68 while (i<len) 69 fprintf(stderr, "0x%02x ", a[i++]); 70 fprintf(stderr, "}\n"); 71 } 72 73 static void printUSeqErr(const UChar* a, int len) 74 { 75 int i=0; 76 fprintf(stderr, "{U+"); 77 while (i<len) 78 fprintf(stderr, "0x%04x ", a[i++]); 79 fprintf(stderr,"}\n"); 80 } 81 82 static void TestConverterFallBack(void) 83 { 84 TestConvertFallBackWithBufferSizes(10,10); 85 TestConvertFallBackWithBufferSizes(2,3); 86 TestConvertFallBackWithBufferSizes(3,2); 87 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,1); 88 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,2); 89 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,3); 90 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,4); 91 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,5); 92 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,6); 93 TestConvertFallBackWithBufferSizes(1,NEW_MAX_BUFFER); 94 TestConvertFallBackWithBufferSizes(2,NEW_MAX_BUFFER); 95 TestConvertFallBackWithBufferSizes(3,NEW_MAX_BUFFER); 96 TestConvertFallBackWithBufferSizes(4,NEW_MAX_BUFFER); 97 TestConvertFallBackWithBufferSizes(5,NEW_MAX_BUFFER); 98 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER); 99 100 } 101 102 103 void addTestConverterFallBack(TestNode** root); 104 105 void addTestConverterFallBack(TestNode** root) 106 { 107 addTest(root, &TestConverterFallBack, "tsconv/ncnvfbts/TestConverterFallBack"); 108 109 } 110 111 112 /* Note that this test already makes use of statics, so it's not really 113 multithread safe. 114 This convenience function lets us make the error messages actually useful. 115 */ 116 117 static void setNuConvTestName(const char *codepage, const char *direction) 118 { 119 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]", 120 codepage, 121 direction, 122 (int)gInBufferSize, 123 (int)gOutBufferSize); 124 } 125 126 127 static UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen, 128 const char *codepage, UBool fallback, const int32_t *expectOffsets) 129 { 130 131 132 UErrorCode status = U_ZERO_ERROR; 133 UConverter *conv = 0; 134 char junkout[NEW_MAX_BUFFER]; /* FIX */ 135 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 136 const UChar *src; 137 char *end; 138 char *targ; 139 int32_t *offs; 140 int i; 141 int32_t realBufferSize; 142 char *realBufferEnd; 143 const UChar *realSourceEnd; 144 const UChar *sourceLimit; 145 UBool checkOffsets = TRUE; 146 UBool doFlush; 147 UBool action=FALSE; 148 char *p; 149 150 151 for(i=0;i<NEW_MAX_BUFFER;i++) 152 junkout[i] = (char)0xF0; 153 for(i=0;i<NEW_MAX_BUFFER;i++) 154 junokout[i] = 0xFF; 155 setNuConvTestName(codepage, "FROM"); 156 157 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize, 158 gOutBufferSize); 159 160 conv = my_ucnv_open(codepage, &status); 161 if(U_FAILURE(status)) 162 { 163 log_data_err("Couldn't open converter %s\n",codepage); 164 return TRUE; 165 } 166 167 log_verbose("Converter opened..\n"); 168 /*----setting the callback routine----*/ 169 ucnv_setFallback (conv, fallback); 170 action = ucnv_usesFallback(conv); 171 if(action != fallback){ 172 log_err("FAIL: Error is setting fallback. Errocode=%s\n", myErrorName(status)); 173 } 174 /*------------------------*/ 175 src = source; 176 targ = junkout; 177 offs = junokout; 178 179 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 180 realBufferEnd = junkout + realBufferSize; 181 realSourceEnd = source + sourceLen; 182 183 if ( gOutBufferSize != realBufferSize ) 184 checkOffsets = FALSE; 185 186 if( gInBufferSize != NEW_MAX_BUFFER ) 187 checkOffsets = FALSE; 188 189 do 190 { 191 end = nct_min(targ + gOutBufferSize, realBufferEnd); 192 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd); 193 194 doFlush = (UBool)(sourceLimit == realSourceEnd); 195 196 if(targ == realBufferEnd) 197 { 198 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName); 199 return FALSE; 200 } 201 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE"); 202 203 204 status = U_ZERO_ERROR; 205 206 ucnv_fromUnicode (conv, 207 (char **)&targ, 208 (const char *)end, 209 &src, 210 sourceLimit, 211 checkOffsets ? offs : NULL, 212 doFlush, /* flush if we're at the end of the input data */ 213 &status); 214 215 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (sourceLimit < realSourceEnd) ); 216 217 if(U_FAILURE(status)) 218 { 219 log_err("Problem doing toUnicode, errcode %d %s\n", myErrorName(status), gNuConvTestName); 220 return FALSE; 221 } 222 223 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :", 224 sourceLen, targ-junkout); 225 if(VERBOSITY) 226 { 227 char junk[9999]; 228 char offset_str[9999]; 229 230 junk[0] = 0; 231 offset_str[0] = 0; 232 for(p = junkout;p<targ;p++) 233 { 234 sprintf(junk + uprv_strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p); 235 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]); 236 } 237 238 log_verbose(junk); 239 printSeq((const unsigned char*)expect, expectLen); 240 if ( checkOffsets ) 241 { 242 log_verbose("\nOffsets:"); 243 log_verbose(offset_str); 244 } 245 log_verbose("\n"); 246 } 247 ucnv_close(conv); 248 249 250 if(expectLen != targ-junkout) 251 { 252 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 253 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName); 254 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 255 printSeqErr((const unsigned char*)expect, expectLen); 256 return FALSE; 257 } 258 259 if (checkOffsets && (expectOffsets != 0) ) 260 { 261 log_verbose("\ncomparing %d offsets..\n", targ-junkout); 262 if(uprv_memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){ 263 log_err("\ndid not get the expected offsets while %s \n", gNuConvTestName); 264 log_err("Got : "); 265 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout)); 266 for(p=junkout;p<targ;p++) 267 log_err("%d, ", junokout[p-junkout]); 268 log_err("\nExpected: "); 269 for(i=0; i<(targ-junkout); i++) 270 log_err("%d,", expectOffsets[i]); 271 } 272 } 273 274 log_verbose("\n\ncomparing..\n"); 275 if(!memcmp(junkout, expect, expectLen)) 276 { 277 log_verbose("Matches!\n"); 278 return TRUE; 279 } 280 else 281 { 282 log_err("String does not match. %s\n", gNuConvTestName); 283 log_verbose("String does not match. %s\n", gNuConvTestName); 284 printSeqErr((const unsigned char*)junkout, expectLen); 285 printSeqErr((const unsigned char*)expect, expectLen); 286 return FALSE; 287 } 288 } 289 290 static UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen, 291 const char *codepage, UBool fallback, const int32_t *expectOffsets) 292 { 293 UErrorCode status = U_ZERO_ERROR; 294 UConverter *conv = 0; 295 UChar junkout[NEW_MAX_BUFFER]; /* FIX */ 296 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */ 297 const char *src; 298 const char *realSourceEnd; 299 const char *srcLimit; 300 UChar *targ; 301 UChar *end; 302 int32_t *offs; 303 int i; 304 UBool checkOffsets = TRUE; 305 char junk[9999]; 306 char offset_str[9999]; 307 UChar *p; 308 UBool action; 309 310 int32_t realBufferSize; 311 UChar *realBufferEnd; 312 313 314 for(i=0;i<NEW_MAX_BUFFER;i++) 315 junkout[i] = 0xFFFE; 316 317 for(i=0;i<NEW_MAX_BUFFER;i++) 318 junokout[i] = -1; 319 320 setNuConvTestName(codepage, "TO"); 321 322 log_verbose("\n========= %s\n", gNuConvTestName); 323 324 conv = my_ucnv_open(codepage, &status); 325 if(U_FAILURE(status)) 326 { 327 log_data_err("Couldn't open converter %s\n",gNuConvTestName); 328 return TRUE; /* because it has been logged */ 329 } 330 331 log_verbose("Converter opened..\n"); 332 333 src = (const char *)source; 334 targ = junkout; 335 offs = junokout; 336 337 realBufferSize = (sizeof(junkout)/sizeof(junkout[0])); 338 realBufferEnd = junkout + realBufferSize; 339 realSourceEnd = src + sourcelen; 340 /*----setting the fallback routine----*/ 341 ucnv_setFallback (conv, fallback); 342 action = ucnv_usesFallback(conv); 343 if(action != fallback){ 344 log_err("FAIL: Error is setting fallback. Errocode=%s\n", myErrorName(status)); 345 } 346 /*-------------------------------------*/ 347 if ( gOutBufferSize != realBufferSize ) 348 checkOffsets = FALSE; 349 350 if( gInBufferSize != NEW_MAX_BUFFER ) 351 checkOffsets = FALSE; 352 353 do 354 { 355 end = nct_min( targ + gOutBufferSize, realBufferEnd); 356 srcLimit = nct_min(realSourceEnd, src + gInBufferSize); 357 358 if(targ == realBufferEnd) 359 { 360 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName); 361 return FALSE; 362 } 363 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end); 364 365 366 367 status = U_ZERO_ERROR; 368 369 ucnv_toUnicode (conv, 370 &targ, 371 end, 372 (const char **)&src, 373 (const char *)srcLimit, 374 checkOffsets ? offs : NULL, 375 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */ 376 &status); 377 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (srcLimit < realSourceEnd) ); /* while we just need another buffer */ 378 379 380 if(U_FAILURE(status)) 381 { 382 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName); 383 return FALSE; 384 } 385 386 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :", 387 sourcelen, targ-junkout); 388 if(VERBOSITY) 389 { 390 391 junk[0] = 0; 392 offset_str[0] = 0; 393 394 for(p = junkout;p<targ;p++) 395 { 396 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p); 397 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]); 398 } 399 400 log_verbose(junk); 401 printUSeq(expect, expectlen); 402 if ( checkOffsets ) 403 { 404 log_verbose("\nOffsets:"); 405 log_verbose(offset_str); 406 } 407 log_verbose("\n"); 408 } 409 ucnv_close(conv); 410 411 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2); 412 413 if (checkOffsets && (expectOffsets != 0)) 414 { 415 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))) 416 { 417 log_err("\n\ndid not get the expected offsets while %s \n", gNuConvTestName); 418 log_err("\nGot : "); 419 for(p=junkout;p<targ;p++) 420 log_err("%d, ", junokout[p-junkout]); 421 log_err("\nExpected: "); 422 for(i=0; i<(targ-junkout); i++) 423 log_err("%d,", expectOffsets[i]); 424 log_err(""); 425 for(i=0; i<(targ-junkout); i++) 426 log_err("0x%04X,", junkout[i]); 427 log_err(""); 428 for(i=0; i<(src-(const char *)source); i++) 429 log_err("0x%04X,", (unsigned char)source[i]); 430 } 431 } 432 433 if(!memcmp(junkout, expect, expectlen*2)) 434 { 435 log_verbose("Matches!\n"); 436 return TRUE; 437 } 438 else 439 { 440 log_err("String does not match. %s\n", gNuConvTestName); 441 log_verbose("String does not match. %s\n", gNuConvTestName); 442 printUSeqErr(junkout, expectlen); 443 printf("\n"); 444 printUSeqErr(expect, expectlen); 445 return FALSE; 446 } 447 } 448 449 450 451 static void TestConvertFallBackWithBufferSizes(int32_t outsize, int32_t insize ) 452 { 453 454 static const UChar SBCSText[] = 455 { 0x0021, 0xFF01, 0x0022, 0xFF02, 0x0023, 0xFF03, 0x003A, 0xFF1A, 0x003B, 0xFF1B, 0x003C, 0xFF1C }; 456 /* 21, ?, 22, ?, 23, ?, 3a, ?, 3b, ?, 3c, ? SBCS*/ 457 static const uint8_t expectedNative[] = 458 { 0x21, 0x21, 0x22, 0x22, 0x23, 0x23, 0x3a, 0x3a, 0x3b, 0x3b, 0x3c, 0x3c}; 459 static const UChar retrievedSBCSText[]= 460 { 0x0021, 0x0021, 0x0022, 0x0022, 0x0023, 0x0023, 0x003A, 0x003A, 0x003B, 0x003B, 0x003C, 0x003C }; 461 static const int32_t toNativeOffs [] = 462 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b}; 463 static const int32_t fromNativeoffs [] = 464 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; 465 466 467 /* 1363 isn't DBCS, but it has the DBCS section */ 468 static const UChar DBCSText[] = 469 { 0x00a1, 0x00ad, 0x2010, 0x00b7, 0x30fb}; 470 static const uint8_t expectedIBM1363_DBCS[] = 471 { 0xa2, 0xae, 0xa1 ,0xa9, 0xa1, 0xa9,0xa1 ,0xa4, 0xa1, 0xa4}; 472 static const UChar retrievedDBCSText[]= 473 { 0x00a1, 0x2010, 0x2010, 0x30fb, 0x30fb }; 474 static const int32_t toIBM1363Offs_DBCS[] = 475 { 0x00, 0x00, 0x01,0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04}; 476 static const int32_t fromIBM1363offs_DBCS[] = 477 { 0, 2, 4, 6, 8}; 478 479 480 static const UChar MBCSText[] = 481 { 0x0001, 0x263a, 0x2013, 0x2014, 0x263b, 0x0002}; 482 static const uint8_t expectedIBM950[] = 483 { 0x01, 0x01, 0xa1, 0x56, 0xa1, 0x56, 0x02, 0x02}; 484 static const UChar retrievedMBCSText[]= 485 { 0x0001, 0x0001, 0x2014, 0x2014, 0x0002, 0x0002}; 486 static const int32_t toIBM950Offs [] = 487 { 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x05}; 488 static const int32_t fromIBM950offs [] = 489 { 0, 1, 2, 4, 6, 7}; 490 491 static const UChar MBCSText1363[] = 492 { 0x0005, 493 0xffe8, 494 0x0007, 495 0x2022, 496 0x005c, 497 0x00b7, 498 0x3016, 499 0x30fb, 500 0x9a36}; 501 static const uint8_t expectedIBM1363[] = 502 { 0x05, 503 0x05, 504 0x07, 505 0x07, 506 0x7f, 507 0xa1, 0xa4, 508 0xa1, 0xe0, 509 0xa1, 0xa4, 510 0xf5, 0xe2}; 511 static const UChar retrievedMBCSText1363[]= 512 { 0x0005, 0x0005, 0x0007, 0x0007, 0x001a, 0x30fb, 0x25a1, 0x30fb, 0x9a36}; 513 static const int32_t toIBM1363Offs [] = 514 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08}; 515 static const int32_t fromIBM1363offs [] = 516 { 0, 1, 2, 3, 4, 5, 7, 9, 11}; 517 518 519 520 static const char* nativeCodePage[]={ 521 /*NLCS Mapping*/ 522 "ibm-437", 523 "ibm-850", 524 "ibm-878", 525 "ibm-923", 526 "ibm-1051", 527 "ibm-1089", 528 "ibm-1250", 529 "ibm-1251", 530 "ibm-1253", 531 "ibm-1254", 532 "ibm-1255", 533 "ibm-1256", 534 "ibm-1257", 535 "ibm-1258", 536 "ibm-1276" 537 }; 538 539 int32_t i=0; 540 gInBufferSize = insize; 541 gOutBufferSize = outsize; 542 543 for(i=0; i<sizeof(nativeCodePage)/sizeof(nativeCodePage[0]); i++){ 544 log_verbose("Testing %s\n", nativeCodePage[i]); 545 if(!testConvertFromUnicode(SBCSText, sizeof(SBCSText)/sizeof(SBCSText[0]), 546 expectedNative, sizeof(expectedNative), nativeCodePage[i], TRUE, toNativeOffs )) 547 log_err("u-> %s(SBCS) with FallBack did not match.\n", nativeCodePage[i]); 548 549 if(!testConvertToUnicode(expectedNative, sizeof(expectedNative), 550 retrievedSBCSText, sizeof(retrievedSBCSText)/sizeof(retrievedSBCSText[0]), nativeCodePage[i], TRUE, fromNativeoffs )) 551 log_err("%s->u(SBCS) with Fallback did not match.\n", nativeCodePage[i]); 552 } 553 554 /*DBCS*/ 555 if(!testConvertFromUnicode(DBCSText, sizeof(DBCSText)/sizeof(DBCSText[0]), 556 expectedIBM1363_DBCS, sizeof(expectedIBM1363_DBCS), "ibm-1363", TRUE, toIBM1363Offs_DBCS )) 557 log_err("u-> ibm-1363(DBCS portion) with FallBack did not match.\n"); 558 559 if(!testConvertToUnicode(expectedIBM1363_DBCS, sizeof(expectedIBM1363_DBCS), 560 retrievedDBCSText, sizeof(retrievedDBCSText)/sizeof(retrievedDBCSText[0]),"ibm-1363", TRUE, fromIBM1363offs_DBCS )) 561 log_err("ibm-1363->u(DBCS portion) with Fallback did not match.\n"); 562 563 564 /*MBCS*/ 565 if(!testConvertFromUnicode(MBCSText, sizeof(MBCSText)/sizeof(MBCSText[0]), 566 expectedIBM950, sizeof(expectedIBM950), "ibm-950", TRUE, toIBM950Offs )) 567 log_err("u-> ibm-950(MBCS) with FallBack did not match.\n"); 568 569 if(!testConvertToUnicode(expectedIBM950, sizeof(expectedIBM950), 570 retrievedMBCSText, sizeof(retrievedMBCSText)/sizeof(retrievedMBCSText[0]),"ibm-950", TRUE, fromIBM950offs )) 571 log_err("ibm-950->u(MBCS) with Fallback did not match.\n"); 572 573 /*commented untill data table is available*/ 574 log_verbose("toUnicode fallback with fallback data for MBCS\n"); 575 { 576 const uint8_t IBM950input[] = { 577 0xf4, 0x87, 0xa4, 0x4a, 0xf4, 0x88, 0xa4, 0x4b, 578 0xf9, 0x92, 0xdc, 0xb0, }; 579 UChar expectedUnicodeText[]= { 0x5165, 0x5165, 0x516b, 0x516b, 0x9ef9, 0x9ef9}; 580 int32_t fromIBM950inputOffs [] = { 0, 2, 4, 6, 8, 10}; 581 /* for testing reverse fallback behavior */ 582 UChar expectedFallbackFalse[]= { 0x5165, 0x5165, 0x516b, 0x516b, 0x9ef9, 0x9ef9}; 583 584 if(!testConvertToUnicode(IBM950input, sizeof(IBM950input), 585 expectedUnicodeText, sizeof(expectedUnicodeText)/sizeof(expectedUnicodeText[0]),"ibm-950", TRUE, fromIBM950inputOffs )) 586 log_err("ibm-950->u(MBCS) with Fallback did not match.\n"); 587 if(!testConvertToUnicode(IBM950input, sizeof(IBM950input), 588 expectedFallbackFalse, sizeof(expectedFallbackFalse)/sizeof(expectedFallbackFalse[0]),"ibm-950", FALSE, fromIBM950inputOffs )) 589 log_err("ibm-950->u(MBCS) with Fallback did not match.\n"); 590 591 } 592 log_verbose("toUnicode fallback with fallback data for euc-tw\n"); 593 { 594 const uint8_t euc_tw_input[] = { 595 0xA7, 0xCC, 0x8E, 0xA2, 0xA1, 0xAB, 596 0xA8, 0xC7, 0xC8, 0xDE, 597 0xA8, 0xCD, 0x8E, 0xA2, 0xA2, 0xEA,}; 598 UChar expectedUnicodeText[]= { 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278}; 599 int32_t from_euc_tw_offs [] = { 0, 2, 6, 8, 10, 12}; 600 /* for testing reverse fallback behavior */ 601 UChar expectedFallbackFalse[]= { 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278}; 602 603 if(!testConvertToUnicode(euc_tw_input, sizeof(euc_tw_input), 604 expectedUnicodeText, sizeof(expectedUnicodeText)/sizeof(expectedUnicodeText[0]),"euc-tw", TRUE, from_euc_tw_offs )) 605 log_err("from euc-tw->u with Fallback did not match.\n"); 606 607 if(!testConvertToUnicode(euc_tw_input, sizeof(euc_tw_input), 608 expectedFallbackFalse, sizeof(expectedFallbackFalse)/sizeof(expectedFallbackFalse[0]),"euc-tw", FALSE, from_euc_tw_offs )) 609 log_err("from euc-tw->u with Fallback false did not match.\n"); 610 611 612 } 613 log_verbose("fromUnicode to euc-tw with fallback data euc-tw\n"); 614 { 615 UChar inputText[]= { 0x0001, 0x008e, 0x203e, 0x2223, 0xff5c, 0x5296, 616 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278, 0xEDEC}; 617 const uint8_t expected_euc_tw[] = { 618 0x01, 0x1a, 0xa2, 0xa3, 619 0xa2, 0xde, 0xa2, 0xde, 620 0x8e, 0xa2, 0xe5, 0xb9, 621 0x8e, 0xa2, 0xa1, 0xab, 0x8e, 0xa2, 0xa1, 0xab, 622 0xc8, 0xde, 0xc8, 0xde, 623 0x8e, 0xa2, 0xa2, 0xea, 0x8e, 0xa2, 0xa2, 0xea, 624 0x8e, 0xac, 0xc6, 0xf7}; 625 int32_t to_euc_tw_offs [] = { 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6, 626 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12}; 627 628 if(!testConvertFromUnicode(inputText, sizeof(inputText)/sizeof(inputText[0]), 629 expected_euc_tw, sizeof(expected_euc_tw), "euc-tw", TRUE, to_euc_tw_offs )) 630 log_err("u-> euc-tw with FallBack did not match.\n"); 631 632 } 633 634 /*MBCS 1363*/ 635 if(!testConvertFromUnicode(MBCSText1363, sizeof(MBCSText1363)/sizeof(MBCSText1363[0]), 636 expectedIBM1363, sizeof(expectedIBM1363), "ibm-1363", TRUE, toIBM1363Offs )) 637 log_err("u-> ibm-1363(MBCS) with FallBack did not match.\n"); 638 639 if(!testConvertToUnicode(expectedIBM1363, sizeof(expectedIBM1363), 640 retrievedMBCSText1363, sizeof(retrievedMBCSText1363)/sizeof(retrievedMBCSText1363[0]),"ibm-1363", TRUE, fromIBM1363offs )) 641 log_err("ibm-1363->u(MBCS) with Fallback did not match.\n"); 642 643 644 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm 645 which is test file for MBCS conversion with single-byte codepage data.*/ 646 { 647 648 /* MBCS with single byte codepage data test1.ucm*/ 649 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 0x0003}; 650 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0x08, 0xff,}; 651 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, 7}; 652 653 const uint8_t test1input[] = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09}; 654 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd, 0xfffe}; 655 int32_t fromtest1Offs[] = { 0, 1, 2, 3, 3, 4,5}; 656 657 /*from Unicode*/ 658 if(!testConvertFromUnicode(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 659 expectedtest1, sizeof(expectedtest1), "@test1", TRUE, totest1Offs )) 660 log_err("u-> test1(MBCS conversion with single-byte) did not match.\n"); 661 662 /*to Unicode*/ 663 if(!testConvertToUnicode(test1input, sizeof(test1input), 664 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test1", TRUE, fromtest1Offs )) 665 log_err("test1(MBCS conversion with single-byte) -> u did not match.\n"); 666 667 } 668 669 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm 670 which is test file for MBCS conversion with three-byte codepage data.*/ 671 { 672 673 /* MBCS with three byte codepage data test3.ucm*/ 674 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 0x000b, 0xd84d, 0xdc56, 0x000e, 0x0003, }; 675 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x07, 0xff, 0x01, 0x02, 0x0b, 0x01, 0x02, 0x0a, 0xff, 0xff,}; 676 int32_t totest3Offs[] = { 0, 1, 2, 3, 5, 7, 7, 7, 8, 8, 8, 10, 11}; 677 678 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 679 0x01, 0x02, 0x0e, 0x01, 0x02, 0x0d, 0x03, 0x01, 0x02, 0x0f,}; 680 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 681 0x000e, 0xd891, 0xdd67, 0xfffd, 0xfffd }; 682 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10, 13, 13, 16, 17}; 683 684 /*from Unicode*/ 685 if(!testConvertFromUnicode(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 686 expectedtest3, sizeof(expectedtest3), "@test3", TRUE, totest3Offs )) 687 log_err("u-> test3(MBCS conversion with three-byte) did not match.\n"); 688 689 /*to Unicode*/ 690 if(!testConvertToUnicode(test3input, sizeof(test3input), 691 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", TRUE, fromtest3Offs )) 692 log_err("test3(MBCS conversion with three-byte) -> u did not match.\n"); 693 694 } 695 696 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm 697 which is test file for MBCS conversion with four-byte codepage data.*/ 698 { 699 700 /* MBCS with three byte codepage data test4.ucm*/ 701 const UChar unicodeInput[] = 702 { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 703 0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0x000f}; 704 const uint8_t expectedtest4[] = 705 { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0xff, 706 0x01, 0x02, 0x03, 0x0a, 0xff, 0xff, 0xff}; 707 int32_t totest4Offs[] = 708 { 0, 1, 2, 3, 3, 3, 3, 4, 6, 8, 8, 8, 8, 10, 11, 13}; 709 710 const uint8_t test4input[] = 711 { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x08, 712 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0e, 0x01, 0x02, 0x03, 0x0d, 0x03, 0x01, 0x02, 0x03, 0x0c,}; 713 const UChar expectedUnicode[] = 714 { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 715 0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0x1a, 0xfffd}; 716 int32_t fromtest4Offs[] = 717 { 0, 1, 2, 3, 7, 7, 8, 8, 9, 9, 13, 17, 17, 21, 22,}; 718 719 /*from Unicode*/ 720 if(!testConvertFromUnicode(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 721 expectedtest4, sizeof(expectedtest4), "@test4", TRUE, totest4Offs )) 722 log_err("u-> test4(MBCS conversion with four-byte) did not match.\n"); 723 724 /*to Unicode*/ 725 if(!testConvertToUnicode(test4input, sizeof(test4input), 726 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", TRUE, fromtest4Offs )) 727 log_err("test4(MBCS conversion with four-byte) -> u did not match.\n"); 728 729 } 730 /* Test for jitterbug 509 EBCDIC_STATEFUL Converters*/ 731 { 732 const UChar unicodeInput[] = {0x00AF, 0x2013, 0x2223, 0x004C, 0x5F5D, 0xFF5E }; 733 const uint8_t expectedtest1[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1,0x0f }; 734 int32_t totest1Offs[] = {0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5 }; 735 const uint8_t test1input[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1 }; 736 const UChar expectedUnicode[] = {0x203e, 0x2014, 0xff5c, 0x004c, 0x5f5e, 0x223c }; 737 int32_t fromtest1Offs[] = {1, 3, 5, 8, 10, 12 }; 738 /*from Unicode*/ 739 if(!testConvertFromUnicode(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]), 740 expectedtest1, sizeof(expectedtest1), "ibm-1371", TRUE, totest1Offs )) 741 log_err("u-> ibm-1371(MBCS conversion with single-byte) did not match.,\n"); 742 /*to Unicode*/ 743 if(!testConvertToUnicode(test1input, sizeof(test1input), 744 expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "ibm-1371", TRUE, fromtest1Offs )) 745 log_err("ibm-1371(MBCS conversion with single-byte) -> u did not match.,\n"); 746 } 747 748 } 749 #else 750 void addTestConverterFallBack(TestNode** root) 751 { 752 /* test nothing... */ 753 754 } 755 #endif 756