1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /***************************************************************************** 7 * 8 * File CU_CAPITST.C 9 * 10 * Modification History: 11 * Name Description 12 * Madhu Katragadda Ported for C API 13 ****************************************************************************** 14 */ 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <ctype.h> 19 #include "unicode/uloc.h" 20 #include "unicode/ucnv.h" 21 #include "unicode/ucnv_err.h" 22 #include "unicode/putil.h" 23 #include "unicode/uset.h" 24 #include "unicode/ustring.h" 25 #include "ucnv_bld.h" /* for sizeof(UConverter) */ 26 #include "cmemory.h" /* for UAlignedMemory */ 27 #include "cintltst.h" 28 #include "ccapitst.h" 29 30 /* for not including "cstring.h" -begin*/ 31 #ifdef U_WINDOWS 32 # define ctest_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE _stricmp(str1, str2) 33 #elif defined(POSIX) 34 # define ctest_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE strcasecmp(str1, str2) 35 #else 36 # define ctest_stricmp(str1, str2) T_CString_stricmp(str1, str2) 37 #endif 38 39 static int U_EXPORT2 40 T_CString_stricmp(const char *str1, const char *str2) { 41 if(str1==NULL) { 42 if(str2==NULL) { 43 return 0; 44 } else { 45 return -1; 46 } 47 } else if(str2==NULL) { 48 return 1; 49 } else { 50 /* compare non-NULL strings lexically with lowercase */ 51 int rc; 52 unsigned char c1, c2; 53 for(;;) { 54 c1=(unsigned char)*str1; 55 c2=(unsigned char)*str2; 56 if(c1==0) { 57 if(c2==0) { 58 return 0; 59 } else { 60 return -1; 61 } 62 } else if(c2==0) { 63 return 1; 64 } else { 65 /* compare non-zero characters with lowercase */ 66 rc=(int)(unsigned char)tolower(c1)-(int)(unsigned char)tolower(c2); 67 if(rc!=0) { 68 return rc; 69 } 70 } 71 ++str1; 72 ++str2; 73 } 74 } 75 } 76 /* for not including "cstring.h" -end*/ 77 78 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 79 80 #define NUM_CODEPAGE 1 81 #define MAX_FILE_LEN 1024*20 82 #define UCS_FILE_NAME_SIZE 512 83 84 /*returns an action other than the one provided*/ 85 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA); 86 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA); 87 88 static UConverter * 89 cnv_open(const char *name, UErrorCode *pErrorCode) { 90 if(name!=NULL && name[0]=='*') { 91 return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode); 92 } else { 93 return ucnv_open(name, pErrorCode); 94 } 95 } 96 97 98 static void ListNames(void); 99 static void TestFlushCache(void); 100 static void TestDuplicateAlias(void); 101 static void TestCCSID(void); 102 static void TestJ932(void); 103 static void TestJ1968(void); 104 static void TestLMBCSMaxChar(void); 105 106 #if !UCONFIG_NO_LEGACY_CONVERSION 107 static void TestConvertSafeCloneCallback(void); 108 #endif 109 110 static void TestEBCDICSwapLFNL(void); 111 static void TestConvertEx(void); 112 static void TestConvertExFromUTF8(void); 113 static void TestConvertExFromUTF8_C5F0(void); 114 static void TestConvertAlgorithmic(void); 115 void TestDefaultConverterError(void); /* defined in cctest.c */ 116 static void TestToUCountPending(void); 117 static void TestFromUCountPending(void); 118 static void TestDefaultName(void); 119 static void TestCompareNames(void); 120 static void TestSubstString(void); 121 static void InvalidArguments(void); 122 static void TestGetName(void); 123 static void TestUTFBOM(void); 124 125 void addTestConvert(TestNode** root); 126 127 void addTestConvert(TestNode** root) 128 { 129 addTest(root, &ListNames, "tsconv/ccapitst/ListNames"); 130 addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert"); 131 addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache"); 132 addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias"); 133 addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias"); 134 addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone"); 135 #if !UCONFIG_NO_LEGACY_CONVERSION 136 addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback"); 137 #endif 138 addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID"); 139 addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932"); 140 addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968"); 141 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 142 addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar"); 143 #endif 144 addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL"); 145 addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx"); 146 addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8"); 147 addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0"); 148 addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic"); 149 addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError"); 150 #if !UCONFIG_NO_FILE_IO 151 addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending"); 152 addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending"); 153 #endif 154 addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName"); 155 addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames"); 156 addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString"); 157 addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments"); 158 addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName"); 159 addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM"); 160 } 161 162 static void ListNames(void) { 163 UErrorCode err = U_ZERO_ERROR; 164 int32_t testLong1 = 0; 165 const char* available_conv; 166 UEnumeration *allNamesEnum = NULL; 167 int32_t allNamesCount = 0; 168 uint16_t count; 169 170 log_verbose("Testing ucnv_openAllNames()..."); 171 allNamesEnum = ucnv_openAllNames(&err); 172 if(U_FAILURE(err)) { 173 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); 174 } 175 else { 176 const char *string = NULL; 177 int32_t len = 0; 178 int32_t count1 = 0; 179 int32_t count2 = 0; 180 allNamesCount = uenum_count(allNamesEnum, &err); 181 while ((string = uenum_next(allNamesEnum, &len, &err))) { 182 count1++; 183 log_verbose("read \"%s\", length %i\n", string, len); 184 } 185 if (U_FAILURE(err)) { 186 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err)); 187 err = U_ZERO_ERROR; 188 } 189 uenum_reset(allNamesEnum, &err); 190 while ((string = uenum_next(allNamesEnum, &len, &err))) { 191 count2++; 192 ucnv_close(ucnv_open(string, &err)); 193 log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable"); 194 err = U_ZERO_ERROR; 195 } 196 if (count1 != count2) { 197 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n"); 198 } 199 } 200 uenum_close(allNamesEnum); 201 err = U_ZERO_ERROR; 202 203 /*Tests ucnv_getAvailableName(), getAvialableCount()*/ 204 205 log_verbose("Testing ucnv_countAvailable()..."); 206 207 testLong1=ucnv_countAvailable(); 208 log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount); 209 210 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */ 211 212 available_conv = ucnv_getAvailableName(testLong1); 213 /*test ucnv_getAvailableName with err condition*/ 214 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 "); 215 available_conv = ucnv_getAvailableName(-1); 216 if(available_conv != NULL){ 217 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n"); 218 } 219 220 /* Test ucnv_countAliases() etc. */ 221 count = ucnv_countAliases("utf-8", &err); 222 if(U_FAILURE(err)) { 223 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); 224 } else if(count <= 0) { 225 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count); 226 } else { 227 /* try to get the aliases individually */ 228 const char *alias; 229 alias = ucnv_getAlias("utf-8", 0, &err); 230 if(U_FAILURE(err)) { 231 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err)); 232 } else if(strcmp("UTF-8", alias) != 0) { 233 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias); 234 } else { 235 uint16_t aliasNum; 236 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 237 alias = ucnv_getAlias("utf-8", aliasNum, &err); 238 if(U_FAILURE(err)) { 239 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 240 } else if(strlen(alias) > 20) { 241 /* sanity check */ 242 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias); 243 } else { 244 log_verbose("alias %d for utf-8: %s\n", aliasNum, alias); 245 } 246 } 247 if(U_SUCCESS(err)) { 248 /* try to fill an array with all aliases */ 249 const char **aliases; 250 aliases=(const char **)malloc(count * sizeof(const char *)); 251 if(aliases != 0) { 252 ucnv_getAliases("utf-8", aliases, &err); 253 if(U_FAILURE(err)) { 254 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err)); 255 } else { 256 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 257 /* compare the pointers with the ones returned individually */ 258 alias = ucnv_getAlias("utf-8", aliasNum, &err); 259 if(U_FAILURE(err)) { 260 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 261 } else if(aliases[aliasNum] != alias) { 262 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum); 263 } 264 } 265 } 266 free((char **)aliases); 267 } 268 } 269 } 270 } 271 } 272 273 274 static void TestConvert() 275 { 276 #if !UCONFIG_NO_LEGACY_CONVERSION 277 char myptr[4]; 278 char save[4]; 279 int32_t testLong1 = 0; 280 uint16_t rest = 0; 281 int32_t len = 0; 282 int32_t x = 0; 283 FILE* ucs_file_in = NULL; 284 UChar BOM = 0x0000; 285 UChar myUChar = 0x0000; 286 char* mytarget; /* [MAX_FILE_LEN] */ 287 char* mytarget_1; 288 char* mytarget_use; 289 UChar* consumedUni = NULL; 290 char* consumed = NULL; 291 char* output_cp_buffer; /* [MAX_FILE_LEN] */ 292 UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */ 293 UChar* ucs_file_buffer_use; 294 UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */ 295 UChar* my_ucs_file_buffer_1; 296 int8_t ii = 0; 297 int32_t j = 0; 298 uint16_t codepage_index = 0; 299 int32_t cp = 0; 300 UErrorCode err = U_ZERO_ERROR; 301 char ucs_file_name[UCS_FILE_NAME_SIZE]; 302 UConverterFromUCallback MIA1, MIA1_2; 303 UConverterToUCallback MIA2, MIA2_2; 304 const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2; 305 UConverter* someConverters[5]; 306 UConverter* myConverter = 0; 307 UChar* displayname = 0; 308 309 const char* locale; 310 311 UChar* uchar1 = 0; 312 UChar* uchar2 = 0; 313 UChar* uchar3 = 0; 314 int32_t targetcapacity2; 315 int32_t targetcapacity; 316 int32_t targetsize; 317 int32_t disnamelen; 318 319 const UChar* tmp_ucs_buf; 320 const UChar* tmp_consumedUni=NULL; 321 const char* tmp_mytarget_use; 322 const char* tmp_consumed; 323 324 /****************************************************************** 325 Checking Unicode -> ksc 326 ******************************************************************/ 327 328 const char* CodePagesToTest[NUM_CODEPAGE] = 329 { 330 "ibm-949_P110-1999" 331 332 333 }; 334 const uint16_t CodePageNumberToTest[NUM_CODEPAGE] = 335 { 336 949 337 }; 338 339 340 const int8_t CodePagesMinChars[NUM_CODEPAGE] = 341 { 342 1 343 344 }; 345 346 const int8_t CodePagesMaxChars[NUM_CODEPAGE] = 347 { 348 2 349 350 }; 351 352 const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] = 353 { 354 0xAFFE 355 }; 356 357 const char* CodePagesTestFiles[NUM_CODEPAGE] = 358 { 359 "uni-text.bin" 360 }; 361 362 363 const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] = 364 { 365 UCNV_IBM 366 367 }; 368 369 const char* CodePagesLocale[NUM_CODEPAGE] = 370 { 371 "ko_KR" 372 }; 373 374 UConverterFromUCallback oldFromUAction = NULL; 375 UConverterToUCallback oldToUAction = NULL; 376 const void* oldFromUContext = NULL; 377 const void* oldToUContext = NULL; 378 379 /* Allocate memory */ 380 mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0])); 381 output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0])); 382 ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0])); 383 my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0])); 384 385 ucs_file_buffer_use = ucs_file_buffer; 386 mytarget_1=mytarget; 387 mytarget_use = mytarget; 388 my_ucs_file_buffer_1=my_ucs_file_buffer; 389 390 /* flush the converter cache to get a consistent state before the flushing is tested */ 391 ucnv_flushCache(); 392 393 /*Testing ucnv_openU()*/ 394 { 395 UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/ 396 UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */ 397 UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */ 398 const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"}; 399 UChar illegalName[100]; 400 UConverter *converter=NULL; 401 err=U_ZERO_ERROR; 402 converter=ucnv_openU(converterName, &err); 403 if(U_FAILURE(err)){ 404 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err)); 405 } 406 ucnv_close(converter); 407 err=U_ZERO_ERROR; 408 converter=ucnv_openU(NULL, &err); 409 if(U_FAILURE(err)){ 410 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err)); 411 } 412 ucnv_close(converter); 413 /*testing with error value*/ 414 err=U_ILLEGAL_ARGUMENT_ERROR; 415 converter=ucnv_openU(converterName, &err); 416 if(!(converter == NULL)){ 417 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n"); 418 } 419 ucnv_close(converter); 420 err=U_ZERO_ERROR; 421 u_uastrcpy(illegalName, ""); 422 u_uastrcpy(illegalName, illegalNameChars); 423 ucnv_openU(illegalName, &err); 424 if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){ 425 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n"); 426 } 427 428 err=U_ZERO_ERROR; 429 ucnv_openU(firstSortedName, &err); 430 if(err!=U_FILE_ACCESS_ERROR){ 431 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n"); 432 } 433 434 err=U_ZERO_ERROR; 435 ucnv_openU(lastSortedName, &err); 436 if(err!=U_FILE_ACCESS_ERROR){ 437 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n"); 438 } 439 440 err=U_ZERO_ERROR; 441 } 442 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n"); 443 { 444 UConverter *cnv=NULL; 445 err=U_ZERO_ERROR; 446 cnv=ucnv_open("ibm-949,Madhu", &err); 447 if(U_FAILURE(err)){ 448 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err)); 449 } 450 ucnv_close(cnv); 451 452 } 453 /*Testing ucnv_convert()*/ 454 { 455 int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0; 456 const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00}; 457 const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00}; 458 char *target=0; 459 sourceLimit=sizeof(source)/sizeof(source[0]); 460 err=U_ZERO_ERROR; 461 targetLimit=0; 462 463 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err); 464 if(err == U_BUFFER_OVERFLOW_ERROR){ 465 err=U_ZERO_ERROR; 466 targetLimit=targetCapacity+1; 467 target=(char*)malloc(sizeof(char) * targetLimit); 468 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 469 } 470 if(U_FAILURE(err)){ 471 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err)); 472 } 473 else { 474 for(i=0; i<targetCapacity; i++){ 475 if(target[i] != expectedTarget[i]){ 476 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]); 477 } 478 } 479 480 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err); 481 if(U_FAILURE(err) || i!=7){ 482 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n", 483 u_errorName(err), i); 484 } 485 486 /*Test error conditions*/ 487 err=U_ZERO_ERROR; 488 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err); 489 if(i !=0){ 490 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n"); 491 } 492 493 err=U_ILLEGAL_ARGUMENT_ERROR; 494 sourceLimit=sizeof(source)/sizeof(source[0]); 495 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 496 if(i !=0 ){ 497 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n"); 498 } 499 500 err=U_ZERO_ERROR; 501 sourceLimit=sizeof(source)/sizeof(source[0]); 502 targetLimit=0; 503 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 504 if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){ 505 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n"); 506 } 507 err=U_ZERO_ERROR; 508 free(target); 509 } 510 } 511 512 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/ 513 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n"); 514 err=U_ILLEGAL_ARGUMENT_ERROR; 515 if(ucnv_open(NULL, &err) != NULL){ 516 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 517 } 518 if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){ 519 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 520 } 521 err=U_ZERO_ERROR; 522 523 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */ 524 log_verbose("\n---Testing ucnv_open default...\n"); 525 someConverters[0] = ucnv_open(NULL,&err); 526 someConverters[1] = ucnv_open(NULL,&err); 527 someConverters[2] = ucnv_open("utf8", &err); 528 someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err); 529 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */ 530 if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));} 531 532 /* Testing ucnv_getName()*/ 533 /*default code page */ 534 ucnv_getName(someConverters[0], &err); 535 if(U_FAILURE(err)) { 536 log_data_err("getName[0] failed\n"); 537 } else { 538 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err)); 539 } 540 ucnv_getName(someConverters[1], &err); 541 if(U_FAILURE(err)) { 542 log_data_err("getName[1] failed\n"); 543 } else { 544 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err)); 545 } 546 547 ucnv_close(someConverters[0]); 548 ucnv_close(someConverters[1]); 549 ucnv_close(someConverters[2]); 550 ucnv_close(someConverters[3]); 551 552 553 for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index) 554 { 555 int32_t i = 0; 556 557 err = U_ZERO_ERROR; 558 #ifdef U_TOPSRCDIR 559 strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING); 560 #else 561 strcpy(ucs_file_name, loadTestData(&err)); 562 563 if(U_FAILURE(err)){ 564 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err)); 565 return; 566 } 567 568 { 569 char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR); 570 571 if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){ 572 *(index+1)=0; 573 } 574 } 575 576 strcat(ucs_file_name,".."U_FILE_SEP_STRING); 577 #endif 578 strcat(ucs_file_name, CodePagesTestFiles[codepage_index]); 579 580 ucs_file_in = fopen(ucs_file_name,"rb"); 581 if (!ucs_file_in) 582 { 583 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); 584 return; 585 } 586 587 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/ 588 589 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */ 590 /* ucnv_flushCache(); */ 591 myConverter =ucnv_open( "ibm-949", &err); 592 if (!myConverter || U_FAILURE(err)) 593 { 594 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err)); 595 596 return; 597 } 598 599 /*testing for ucnv_getName() */ 600 log_verbose("Testing ucnv_getName()...\n"); 601 ucnv_getName(myConverter, &err); 602 if(U_FAILURE(err)) 603 log_err("Error in getName\n"); 604 else 605 { 606 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err)); 607 } 608 if (ctest_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) 609 log_err("getName failed\n"); 610 else 611 log_verbose("getName ok\n"); 612 /*Test getName with error condition*/ 613 { 614 const char* name=0; 615 err=U_ILLEGAL_ARGUMENT_ERROR; 616 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR"); 617 name=ucnv_getName(myConverter, &err); 618 if(name != NULL){ 619 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail"); 620 } 621 err=U_ZERO_ERROR; 622 } 623 624 625 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/ 626 627 log_verbose("Testing ucnv_getMaxCharSize()...\n"); 628 if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index]) 629 log_verbose("Max byte per character OK\n"); 630 else 631 log_err("Max byte per character failed\n"); 632 633 log_verbose("\n---Testing ucnv_getMinCharSize()...\n"); 634 if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index]) 635 log_verbose("Min byte per character OK\n"); 636 else 637 log_err("Min byte per character failed\n"); 638 639 640 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/ 641 log_verbose("\n---Testing ucnv_getSubstChars...\n"); 642 ii=4; 643 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 644 if (ii <= 0) { 645 log_err("ucnv_getSubstChars returned a negative number %d\n", ii); 646 } 647 648 for(x=0;x<ii;x++) 649 rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]); 650 if (rest==CodePagesSubstitutionChars[codepage_index]) 651 log_verbose("Substitution character ok\n"); 652 else 653 log_err("Substitution character failed.\n"); 654 655 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n"); 656 ucnv_setSubstChars(myConverter, myptr, ii, &err); 657 if (U_FAILURE(err)) 658 { 659 log_err("FAILURE! %s\n", myErrorName(err)); 660 } 661 ucnv_getSubstChars(myConverter,save, &ii, &err); 662 if (U_FAILURE(err)) 663 { 664 log_err("FAILURE! %s\n", myErrorName(err)); 665 } 666 667 if (strncmp(save, myptr, ii)) 668 log_err("Saved substitution character failed\n"); 669 else 670 log_verbose("Saved substitution character ok\n"); 671 672 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/ 673 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n"); 674 ii=1; 675 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 676 if(err != U_INDEX_OUTOFBOUNDS_ERROR){ 677 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err)); 678 } 679 err=U_ZERO_ERROR; 680 ii=4; 681 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 682 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n"); 683 ucnv_setSubstChars(myConverter, myptr, 0, &err); 684 if(err != U_ILLEGAL_ARGUMENT_ERROR){ 685 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err)); 686 } 687 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n"); 688 strcpy(myptr, "abc"); 689 ucnv_setSubstChars(myConverter, myptr, ii, &err); 690 err=U_ZERO_ERROR; 691 ucnv_getSubstChars(myConverter, save, &ii, &err); 692 if(strncmp(save, myptr, ii) == 0){ 693 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n"); 694 } 695 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n"); 696 err=U_ZERO_ERROR; 697 strcpy(myptr, "abc"); 698 ucnv_setSubstChars(myConverter, myptr, ii, &err); 699 err=U_ILLEGAL_ARGUMENT_ERROR; 700 ucnv_getSubstChars(myConverter, save, &ii, &err); 701 if(strncmp(save, myptr, ii) == 0){ 702 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n"); 703 } 704 err=U_ZERO_ERROR; 705 /*------*/ 706 707 #ifdef U_ENABLE_GENERIC_ISO_2022 708 /*resetState ucnv_reset()*/ 709 log_verbose("\n---Testing ucnv_reset()..\n"); 710 ucnv_reset(myConverter); 711 { 712 UChar32 c; 713 const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80}; 714 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 715 UConverter *cnv=ucnv_open("ISO_2022", &err); 716 if(U_FAILURE(err)) { 717 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 718 } 719 c=ucnv_getNextUChar(cnv, &source, limit, &err); 720 if((U_FAILURE(err) || c != (UChar32)0x0031)) { 721 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err)); 722 } 723 ucnv_reset(cnv); 724 ucnv_close(cnv); 725 726 } 727 #endif 728 729 /*getDisplayName*/ 730 log_verbose("\n---Testing ucnv_getDisplayName()...\n"); 731 locale=CodePagesLocale[codepage_index]; 732 len=0; 733 displayname=NULL; 734 disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err); 735 if(err==U_BUFFER_OVERFLOW_ERROR) { 736 err=U_ZERO_ERROR; 737 displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar)); 738 ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err); 739 if(U_FAILURE(err)) { 740 log_err("getDisplayName failed. The error is %s\n", myErrorName(err)); 741 } 742 else { 743 log_verbose(" getDisplayName o.k.\n"); 744 } 745 free(displayname); 746 displayname=NULL; 747 } 748 else { 749 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err)); 750 } 751 /*test ucnv_getDiaplayName with error condition*/ 752 err= U_ILLEGAL_ARGUMENT_ERROR; 753 len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err); 754 if( len !=0 ){ 755 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n"); 756 } 757 /*test ucnv_getDiaplayName with error condition*/ 758 err=U_ZERO_ERROR; 759 len=ucnv_getDisplayName(NULL,locale,NULL,0, &err); 760 if( len !=0 || U_SUCCESS(err)){ 761 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n"); 762 } 763 err=U_ZERO_ERROR; 764 765 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/ 766 ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context); 767 768 log_verbose("\n---Testing ucnv_setFromUCallBack...\n"); 769 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 770 if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context) 771 { 772 log_err("FAILURE! %s\n", myErrorName(err)); 773 } 774 775 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 776 if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM) 777 log_err("get From UCallBack failed\n"); 778 else 779 log_verbose("get From UCallBack ok\n"); 780 781 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n"); 782 ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err); 783 if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM) 784 { 785 log_err("FAILURE! %s\n", myErrorName(err)); 786 } 787 788 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 789 if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context) 790 log_err("get From UCallBack action failed\n"); 791 else 792 log_verbose("get From UCallBack action ok\n"); 793 794 /*testing ucnv_setToUCallBack with error conditions*/ 795 err=U_ILLEGAL_ARGUMENT_ERROR; 796 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n"); 797 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 798 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 799 if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){ 800 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 801 } 802 err=U_ZERO_ERROR; 803 804 805 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/ 806 ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context); 807 808 log_verbose("\n---Testing setTo UCallBack...\n"); 809 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err); 810 if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context) 811 { 812 log_err("FAILURE! %s\n", myErrorName(err)); 813 } 814 815 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 816 if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM) 817 log_err("To UCallBack failed\n"); 818 else 819 log_verbose("To UCallBack ok\n"); 820 821 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n"); 822 ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err); 823 if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM) 824 { log_err("FAILURE! %s\n", myErrorName(err)); } 825 826 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 827 if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context) 828 log_err("To UCallBack failed\n"); 829 else 830 log_verbose("To UCallBack ok\n"); 831 832 /*testing ucnv_setToUCallBack with error conditions*/ 833 err=U_ILLEGAL_ARGUMENT_ERROR; 834 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n"); 835 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err); 836 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 837 if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){ 838 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 839 } 840 err=U_ZERO_ERROR; 841 842 843 /*getcodepageid testing ucnv_getCCSID() */ 844 log_verbose("\n----Testing getCCSID....\n"); 845 cp = ucnv_getCCSID(myConverter,&err); 846 if (U_FAILURE(err)) 847 { 848 log_err("FAILURE!..... %s\n", myErrorName(err)); 849 } 850 if (cp != CodePageNumberToTest[codepage_index]) 851 log_err("Codepage number test failed\n"); 852 else 853 log_verbose("Codepage number test OK\n"); 854 855 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/ 856 err=U_ILLEGAL_ARGUMENT_ERROR; 857 if( ucnv_getCCSID(myConverter,&err) != -1){ 858 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n"); 859 } 860 err=U_ZERO_ERROR; 861 862 /*getCodepagePlatform testing ucnv_getPlatform()*/ 863 log_verbose("\n---Testing getCodepagePlatform ..\n"); 864 if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err)) 865 log_err("Platform codepage test failed\n"); 866 else 867 log_verbose("Platform codepage test ok\n"); 868 869 if (U_FAILURE(err)) 870 { 871 log_err("FAILURE! %s\n", myErrorName(err)); 872 } 873 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/ 874 err= U_ILLEGAL_ARGUMENT_ERROR; 875 if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){ 876 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n"); 877 } 878 err=U_ZERO_ERROR; 879 880 881 /*Reads the BOM*/ 882 fread(&BOM, sizeof(UChar), 1, ucs_file_in); 883 if (BOM!=0xFEFF && BOM!=0xFFFE) 884 { 885 log_err("File Missing BOM...Bailing!\n"); 886 return; 887 } 888 889 890 /*Reads in the file*/ 891 while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in))) 892 { 893 myUChar = ucs_file_buffer[i-1]; 894 895 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/ 896 } 897 898 myUChar = ucs_file_buffer[i-1]; 899 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/ 900 901 902 /*testing ucnv_fromUChars() and ucnv_toUChars() */ 903 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/ 904 905 uchar1=(UChar*)malloc(sizeof(UChar) * (i+1)); 906 u_uastrcpy(uchar1,""); 907 u_strncpy(uchar1,ucs_file_buffer,i); 908 uchar1[i] = 0; 909 910 uchar3=(UChar*)malloc(sizeof(UChar)*(i+1)); 911 u_uastrcpy(uchar3,""); 912 u_strncpy(uchar3,ucs_file_buffer,i); 913 uchar3[i] = 0; 914 915 /*Calls the Conversion Routine */ 916 testLong1 = MAX_FILE_LEN; 917 log_verbose("\n---Testing ucnv_fromUChars()\n"); 918 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 919 if (U_FAILURE(err)) 920 { 921 log_err("\nFAILURE...%s\n", myErrorName(err)); 922 } 923 else 924 log_verbose(" ucnv_fromUChars() o.k.\n"); 925 926 /*test the conversion routine */ 927 log_verbose("\n---Testing ucnv_toUChars()\n"); 928 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */ 929 targetcapacity2=0; 930 targetsize = ucnv_toUChars(myConverter, 931 NULL, 932 targetcapacity2, 933 output_cp_buffer, 934 strlen(output_cp_buffer), 935 &err); 936 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/ 937 938 if(err==U_BUFFER_OVERFLOW_ERROR) 939 { 940 err=U_ZERO_ERROR; 941 uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar)); 942 targetsize = ucnv_toUChars(myConverter, 943 uchar2, 944 targetsize+1, 945 output_cp_buffer, 946 strlen(output_cp_buffer), 947 &err); 948 949 if(U_FAILURE(err)) 950 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err)); 951 else 952 log_verbose(" ucnv_toUChars() o.k.\n"); 953 954 if(u_strcmp(uchar1,uchar2)!=0) 955 log_err("equality test failed with conversion routine\n"); 956 } 957 else 958 { 959 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n"); 960 } 961 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/ 962 err=U_ILLEGAL_ARGUMENT_ERROR; 963 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n"); 964 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 965 if (targetcapacity !=0) { 966 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 967 } 968 err=U_ZERO_ERROR; 969 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n"); 970 targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err); 971 if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) { 972 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n"); 973 } 974 err=U_ZERO_ERROR; 975 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n"); 976 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err); 977 if (targetcapacity !=0) { 978 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n"); 979 } 980 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n"); 981 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err); 982 if (err != U_BUFFER_OVERFLOW_ERROR) { 983 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n"); 984 } 985 /*toUChars with error conditions*/ 986 targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err); 987 if(targetsize != 0){ 988 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 989 } 990 err=U_ZERO_ERROR; 991 targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err); 992 if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){ 993 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n"); 994 } 995 err=U_ZERO_ERROR; 996 targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err); 997 if (targetsize !=0) { 998 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n"); 999 } 1000 targetcapacity2=0; 1001 targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err); 1002 if (err != U_STRING_NOT_TERMINATED_WARNING) { 1003 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n", 1004 u_errorName(err)); 1005 } 1006 err=U_ZERO_ERROR; 1007 /*-----*/ 1008 1009 1010 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */ 1011 /*Clean up re-usable vars*/ 1012 j=0; 1013 log_verbose("Testing ucnv_fromUnicode().....\n"); 1014 tmp_ucs_buf=ucs_file_buffer_use; 1015 ucnv_fromUnicode(myConverter, &mytarget_1, 1016 mytarget + MAX_FILE_LEN, 1017 &tmp_ucs_buf, 1018 ucs_file_buffer_use+i, 1019 NULL, 1020 TRUE, 1021 &err); 1022 consumedUni = (UChar*)tmp_consumedUni; 1023 1024 if (U_FAILURE(err)) 1025 { 1026 log_err("FAILURE! %s\n", myErrorName(err)); 1027 } 1028 else 1029 log_verbose("ucnv_fromUnicode() o.k.\n"); 1030 1031 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */ 1032 log_verbose("Testing ucnv_toUnicode().....\n"); 1033 tmp_mytarget_use=mytarget_use; 1034 tmp_consumed = consumed; 1035 ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1, 1036 my_ucs_file_buffer + MAX_FILE_LEN, 1037 &tmp_mytarget_use, 1038 mytarget_use + (mytarget_1 - mytarget), 1039 NULL, 1040 FALSE, 1041 &err); 1042 consumed = (char*)tmp_consumed; 1043 if (U_FAILURE(err)) 1044 { 1045 log_err("FAILURE! %s\n", myErrorName(err)); 1046 } 1047 else 1048 log_verbose("ucnv_toUnicode() o.k.\n"); 1049 1050 1051 log_verbose("\n---Testing RoundTrip ...\n"); 1052 1053 1054 u_strncpy(uchar3, my_ucs_file_buffer,i); 1055 uchar3[i] = 0; 1056 1057 if(u_strcmp(uchar1,uchar3)==0) 1058 log_verbose("Equality test o.k.\n"); 1059 else 1060 log_err("Equality test failed\n"); 1061 1062 /*sanity compare */ 1063 if(uchar2 == NULL) 1064 { 1065 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__); 1066 } 1067 else 1068 { 1069 if(u_strcmp(uchar2, uchar3)==0) 1070 log_verbose("Equality test o.k.\n"); 1071 else 1072 log_err("Equality test failed\n"); 1073 } 1074 1075 fclose(ucs_file_in); 1076 ucnv_close(myConverter); 1077 if (uchar1 != 0) free(uchar1); 1078 if (uchar2 != 0) free(uchar2); 1079 if (uchar3 != 0) free(uchar3); 1080 } 1081 1082 free((void*)mytarget); 1083 free((void*)output_cp_buffer); 1084 free((void*)ucs_file_buffer); 1085 free((void*)my_ucs_file_buffer); 1086 #endif 1087 } 1088 1089 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA) 1090 { 1091 return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP; 1092 } 1093 1094 1095 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA) 1096 { 1097 return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP; 1098 } 1099 1100 static void TestFlushCache(void) { 1101 #if !UCONFIG_NO_LEGACY_CONVERSION 1102 UErrorCode err = U_ZERO_ERROR; 1103 UConverter* someConverters[5]; 1104 int flushCount = 0; 1105 1106 /* flush the converter cache to get a consistent state before the flushing is tested */ 1107 ucnv_flushCache(); 1108 1109 /*Testing ucnv_open()*/ 1110 /* Note: These converters have been chosen because they do NOT 1111 encode the Latin characters (U+0041, ...), and therefore are 1112 highly unlikely to be chosen as system default codepages */ 1113 1114 someConverters[0] = ucnv_open("ibm-1047", &err); 1115 if (U_FAILURE(err)) { 1116 log_data_err("FAILURE! %s\n", myErrorName(err)); 1117 } 1118 1119 someConverters[1] = ucnv_open("ibm-1047", &err); 1120 if (U_FAILURE(err)) { 1121 log_data_err("FAILURE! %s\n", myErrorName(err)); 1122 } 1123 1124 someConverters[2] = ucnv_open("ibm-1047", &err); 1125 if (U_FAILURE(err)) { 1126 log_data_err("FAILURE! %s\n", myErrorName(err)); 1127 } 1128 1129 someConverters[3] = ucnv_open("gb18030", &err); 1130 if (U_FAILURE(err)) { 1131 log_data_err("FAILURE! %s\n", myErrorName(err)); 1132 } 1133 1134 someConverters[4] = ucnv_open("ibm-954", &err); 1135 if (U_FAILURE(err)) { 1136 log_data_err("FAILURE! %s\n", myErrorName(err)); 1137 } 1138 1139 1140 /* Testing ucnv_flushCache() */ 1141 log_verbose("\n---Testing ucnv_flushCache...\n"); 1142 if ((flushCount=ucnv_flushCache())==0) 1143 log_verbose("Flush cache ok\n"); 1144 else 1145 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1146 1147 /*testing ucnv_close() and ucnv_flushCache() */ 1148 ucnv_close(someConverters[0]); 1149 ucnv_close(someConverters[1]); 1150 1151 if ((flushCount=ucnv_flushCache())==0) 1152 log_verbose("Flush cache ok\n"); 1153 else 1154 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1155 1156 ucnv_close(someConverters[2]); 1157 ucnv_close(someConverters[3]); 1158 1159 if ((flushCount=ucnv_flushCache())==2) 1160 log_verbose("Flush cache ok\n"); /*because first, second and third are same */ 1161 else 1162 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n", 1163 __LINE__, 1164 flushCount); 1165 1166 ucnv_close(someConverters[4]); 1167 if ( (flushCount=ucnv_flushCache())==1) 1168 log_verbose("Flush cache ok\n"); 1169 else 1170 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount); 1171 #endif 1172 } 1173 1174 /** 1175 * Test the converter alias API, specifically the fuzzy matching of 1176 * alias names and the alias table integrity. Make sure each 1177 * converter has at least one alias (itself), and that its listed 1178 * aliases map back to itself. Check some hard-coded UTF-8 and 1179 * ISO_2022 aliases to make sure they work. 1180 */ 1181 static void TestAlias() { 1182 int32_t i, ncnv; 1183 UErrorCode status = U_ZERO_ERROR; 1184 1185 /* Predetermined aliases that we expect to map back to ISO_2022 1186 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */ 1187 const char* ISO_2022_NAMES[] = 1188 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2", 1189 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"}; 1190 int32_t ISO_2022_NAMES_LENGTH = 1191 sizeof(ISO_2022_NAMES) / sizeof(ISO_2022_NAMES[0]); 1192 const char *UTF8_NAMES[] = 1193 { "UTF-8", "utf-8", "utf8", "ibm-1208", 1194 "utf_8", "ibm1208", "cp1208" }; 1195 int32_t UTF8_NAMES_LENGTH = 1196 sizeof(UTF8_NAMES) / sizeof(UTF8_NAMES[0]); 1197 1198 struct { 1199 const char *name; 1200 const char *alias; 1201 } CONVERTERS_NAMES[] = { 1202 { "UTF-32BE", "UTF32_BigEndian" }, 1203 { "UTF-32LE", "UTF32_LittleEndian" }, 1204 { "UTF-32", "ISO-10646-UCS-4" }, 1205 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" }, 1206 { "UTF-32", "ucs-4" } 1207 }; 1208 int32_t CONVERTERS_NAMES_LENGTH = sizeof(CONVERTERS_NAMES) / sizeof(*CONVERTERS_NAMES); 1209 1210 /* When there are bugs in gencnval or in ucnv_io, converters can 1211 appear to have no aliases. */ 1212 ncnv = ucnv_countAvailable(); 1213 log_verbose("%d converters\n", ncnv); 1214 for (i=0; i<ncnv; ++i) { 1215 const char *name = ucnv_getAvailableName(i); 1216 const char *alias0; 1217 uint16_t na = ucnv_countAliases(name, &status); 1218 uint16_t j; 1219 UConverter *cnv; 1220 1221 if (na == 0) { 1222 log_err("FAIL: Converter \"%s\" (i=%d)" 1223 " has no aliases; expect at least one\n", 1224 name, i); 1225 continue; 1226 } 1227 cnv = ucnv_open(name, &status); 1228 if (U_FAILURE(status)) { 1229 log_data_err("FAIL: Converter \"%s\" (i=%d)" 1230 " can't be opened.\n", 1231 name, i); 1232 } 1233 else { 1234 if (strcmp(ucnv_getName(cnv, &status), name) != 0 1235 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) { 1236 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. " 1237 "The should be the same\n", 1238 name, ucnv_getName(cnv, &status)); 1239 } 1240 } 1241 ucnv_close(cnv); 1242 1243 status = U_ZERO_ERROR; 1244 alias0 = ucnv_getAlias(name, 0, &status); 1245 for (j=1; j<na; ++j) { 1246 const char *alias; 1247 /* Make sure each alias maps back to the the same list of 1248 aliases. Assume that if alias 0 is the same, the whole 1249 list is the same (this should always be true). */ 1250 const char *mapBack; 1251 1252 status = U_ZERO_ERROR; 1253 alias = ucnv_getAlias(name, j, &status); 1254 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1255 log_err("FAIL: Converter \"%s\"is ambiguous\n", name); 1256 } 1257 1258 if (alias == NULL) { 1259 log_err("FAIL: Converter \"%s\" -> " 1260 "alias[%d]=NULL\n", 1261 name, j); 1262 continue; 1263 } 1264 1265 mapBack = ucnv_getAlias(alias, 0, &status); 1266 1267 if (mapBack == NULL) { 1268 log_err("FAIL: Converter \"%s\" -> " 1269 "alias[%d]=\"%s\" -> " 1270 "alias[0]=NULL, exp. \"%s\"\n", 1271 name, j, alias, alias0); 1272 continue; 1273 } 1274 1275 if (0 != strcmp(alias0, mapBack)) { 1276 int32_t idx; 1277 UBool foundAlias = FALSE; 1278 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1279 /* Make sure that we only get this mismapping when there is 1280 an ambiguous alias, and the other converter has this alias too. */ 1281 for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) { 1282 if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) { 1283 foundAlias = TRUE; 1284 break; 1285 } 1286 } 1287 } 1288 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */ 1289 1290 if (!foundAlias) { 1291 log_err("FAIL: Converter \"%s\" -> " 1292 "alias[%d]=\"%s\" -> " 1293 "alias[0]=\"%s\", exp. \"%s\"\n", 1294 name, j, alias, mapBack, alias0); 1295 } 1296 } 1297 } 1298 } 1299 1300 1301 /* Check a list of predetermined aliases that we expect to map 1302 * back to ISO_2022 and UTF-8. */ 1303 for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) { 1304 const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status); 1305 if(!mapBack) { 1306 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]); 1307 continue; 1308 } 1309 if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) { 1310 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n", 1311 ISO_2022_NAMES[i], mapBack); 1312 } 1313 } 1314 1315 1316 for (i=1; i<UTF8_NAMES_LENGTH; ++i) { 1317 const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status); 1318 if(!mapBack) { 1319 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]); 1320 continue; 1321 } 1322 if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) { 1323 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n", 1324 UTF8_NAMES[i], mapBack); 1325 } 1326 } 1327 1328 /* 1329 * Check a list of predetermined aliases that we expect to map 1330 * back to predermined converter names. 1331 */ 1332 1333 for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) { 1334 const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status); 1335 if(!mapBack) { 1336 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name); 1337 continue; 1338 } 1339 if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) { 1340 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n", 1341 CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name); 1342 } 1343 } 1344 1345 } 1346 1347 static void TestDuplicateAlias(void) { 1348 const char *alias; 1349 UErrorCode status = U_ZERO_ERROR; 1350 1351 status = U_ZERO_ERROR; 1352 alias = ucnv_getStandardName("Shift_JIS", "IBM", &status); 1353 if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1354 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); 1355 } 1356 status = U_ZERO_ERROR; 1357 alias = ucnv_getStandardName("ibm-943", "IANA", &status); 1358 if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1359 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias); 1360 } 1361 status = U_ZERO_ERROR; 1362 alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status); 1363 if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) { 1364 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias); 1365 } 1366 } 1367 1368 1369 /* Test safe clone callback */ 1370 1371 static uint32_t TSCC_nextSerial() 1372 { 1373 static uint32_t n = 1; 1374 1375 return (n++); 1376 } 1377 1378 typedef struct 1379 { 1380 uint32_t magic; /* 0xC0FFEE to identify that the object is OK */ 1381 uint32_t serial; /* minted from nextSerial, above */ 1382 UBool wasClosed; /* close happened on the object */ 1383 } TSCCContext; 1384 1385 static TSCCContext *TSCC_clone(TSCCContext *ctx) 1386 { 1387 TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext)); 1388 1389 newCtx->serial = TSCC_nextSerial(); 1390 newCtx->wasClosed = 0; 1391 newCtx->magic = 0xC0FFEE; 1392 1393 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial); 1394 1395 return newCtx; 1396 } 1397 1398 static void TSCC_fromU(const void *context, 1399 UConverterFromUnicodeArgs *fromUArgs, 1400 const UChar* codeUnits, 1401 int32_t length, 1402 UChar32 codePoint, 1403 UConverterCallbackReason reason, 1404 UErrorCode * err) 1405 { 1406 TSCCContext *ctx = (TSCCContext*)context; 1407 UConverterFromUCallback junkFrom; 1408 1409 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter); 1410 1411 if(ctx->magic != 0xC0FFEE) { 1412 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1413 return; 1414 } 1415 1416 if(reason == UCNV_CLONE) { 1417 UErrorCode subErr = U_ZERO_ERROR; 1418 TSCCContext *newCtx; 1419 TSCCContext *junkCtx; 1420 TSCCContext **pjunkCtx = &junkCtx; 1421 1422 /* "recreate" it */ 1423 log_verbose("TSCC_fromU: cloning..\n"); 1424 newCtx = TSCC_clone(ctx); 1425 1426 if(newCtx == NULL) { 1427 log_err("TSCC_fromU: internal clone failed on %p\n", ctx); 1428 } 1429 1430 /* now, SET it */ 1431 ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1432 ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1433 1434 if(U_FAILURE(subErr)) { 1435 *err = subErr; 1436 } 1437 } 1438 1439 if(reason == UCNV_CLOSE) { 1440 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial); 1441 ctx->wasClosed = TRUE; 1442 } 1443 } 1444 1445 1446 static void TSCC_toU(const void *context, 1447 UConverterToUnicodeArgs *toUArgs, 1448 const char* codeUnits, 1449 int32_t length, 1450 UConverterCallbackReason reason, 1451 UErrorCode * err) 1452 { 1453 TSCCContext *ctx = (TSCCContext*)context; 1454 UConverterToUCallback junkFrom; 1455 1456 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter); 1457 1458 if(ctx->magic != 0xC0FFEE) { 1459 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1460 return; 1461 } 1462 1463 if(reason == UCNV_CLONE) { 1464 UErrorCode subErr = U_ZERO_ERROR; 1465 TSCCContext *newCtx; 1466 TSCCContext *junkCtx; 1467 TSCCContext **pjunkCtx = &junkCtx; 1468 1469 /* "recreate" it */ 1470 log_verbose("TSCC_toU: cloning..\n"); 1471 newCtx = TSCC_clone(ctx); 1472 1473 if(newCtx == NULL) { 1474 log_err("TSCC_toU: internal clone failed on %p\n", ctx); 1475 } 1476 1477 /* now, SET it */ 1478 ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1479 ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1480 1481 if(U_FAILURE(subErr)) { 1482 *err = subErr; 1483 } 1484 } 1485 1486 if(reason == UCNV_CLOSE) { 1487 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial); 1488 ctx->wasClosed = TRUE; 1489 } 1490 } 1491 1492 static void TSCC_init(TSCCContext *q) 1493 { 1494 q->magic = 0xC0FFEE; 1495 q->serial = TSCC_nextSerial(); 1496 q->wasClosed = 0; 1497 } 1498 1499 static void TSCC_print_log(TSCCContext *q, const char *name) 1500 { 1501 if(q==NULL) { 1502 log_verbose("TSCContext: %s is NULL!!\n", name); 1503 } else { 1504 if(q->magic != 0xC0FFEE) { 1505 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n", 1506 q,q->serial, q->magic); 1507 } 1508 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n", 1509 q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open"); 1510 } 1511 } 1512 1513 #if !UCONFIG_NO_LEGACY_CONVERSION 1514 static void TestConvertSafeCloneCallback() 1515 { 1516 UErrorCode err = U_ZERO_ERROR; 1517 TSCCContext from1, to1; 1518 TSCCContext *from2, *from3, *to2, *to3; 1519 TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3; 1520 char hunk[8192]; 1521 int32_t hunkSize = 8192; 1522 UConverterFromUCallback junkFrom; 1523 UConverterToUCallback junkTo; 1524 UConverter *conv1, *conv2 = NULL; 1525 1526 conv1 = ucnv_open("iso-8859-3", &err); 1527 1528 if(U_FAILURE(err)) { 1529 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err)); 1530 return; 1531 } 1532 1533 log_verbose("Opened conv1=%p\n", conv1); 1534 1535 TSCC_init(&from1); 1536 TSCC_init(&to1); 1537 1538 TSCC_print_log(&from1, "from1"); 1539 TSCC_print_log(&to1, "to1"); 1540 1541 ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err); 1542 log_verbose("Set from1 on conv1\n"); 1543 TSCC_print_log(&from1, "from1"); 1544 1545 ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err); 1546 log_verbose("Set to1 on conv1\n"); 1547 TSCC_print_log(&to1, "to1"); 1548 1549 conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err); 1550 if(U_FAILURE(err)) { 1551 log_err("safeClone failed: %s\n", u_errorName(err)); 1552 return; 1553 } 1554 log_verbose("Cloned to conv2=%p.\n", conv2); 1555 1556 /********** from *********************/ 1557 ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2); 1558 ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3); 1559 1560 TSCC_print_log(from2, "from2"); 1561 TSCC_print_log(from3, "from3(==from1)"); 1562 1563 if(from2 == NULL) { 1564 log_err("FAIL! from2 is null \n"); 1565 return; 1566 } 1567 1568 if(from3 == NULL) { 1569 log_err("FAIL! from3 is null \n"); 1570 return; 1571 } 1572 1573 if(from3 != (&from1) ) { 1574 log_err("FAIL! conv1's FROM context changed!\n"); 1575 } 1576 1577 if(from2 == (&from1) ) { 1578 log_err("FAIL! conv1's FROM context is the same as conv2's!\n"); 1579 } 1580 1581 if(from1.wasClosed) { 1582 log_err("FAIL! from1 is closed \n"); 1583 } 1584 1585 if(from2->wasClosed) { 1586 log_err("FAIL! from2 was closed\n"); 1587 } 1588 1589 /********** to *********************/ 1590 ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2); 1591 ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3); 1592 1593 TSCC_print_log(to2, "to2"); 1594 TSCC_print_log(to3, "to3(==to1)"); 1595 1596 if(to2 == NULL) { 1597 log_err("FAIL! to2 is null \n"); 1598 return; 1599 } 1600 1601 if(to3 == NULL) { 1602 log_err("FAIL! to3 is null \n"); 1603 return; 1604 } 1605 1606 if(to3 != (&to1) ) { 1607 log_err("FAIL! conv1's TO context changed!\n"); 1608 } 1609 1610 if(to2 == (&to1) ) { 1611 log_err("FAIL! conv1's TO context is the same as conv2's!\n"); 1612 } 1613 1614 if(to1.wasClosed) { 1615 log_err("FAIL! to1 is closed \n"); 1616 } 1617 1618 if(to2->wasClosed) { 1619 log_err("FAIL! to2 was closed\n"); 1620 } 1621 1622 /*************************************/ 1623 1624 ucnv_close(conv1); 1625 log_verbose("ucnv_closed (conv1)\n"); 1626 TSCC_print_log(&from1, "from1"); 1627 TSCC_print_log(from2, "from2"); 1628 TSCC_print_log(&to1, "to1"); 1629 TSCC_print_log(to2, "to2"); 1630 1631 if(from1.wasClosed == FALSE) { 1632 log_err("FAIL! from1 is NOT closed \n"); 1633 } 1634 1635 if(from2->wasClosed) { 1636 log_err("FAIL! from2 was closed\n"); 1637 } 1638 1639 if(to1.wasClosed == FALSE) { 1640 log_err("FAIL! to1 is NOT closed \n"); 1641 } 1642 1643 if(to2->wasClosed) { 1644 log_err("FAIL! to2 was closed\n"); 1645 } 1646 1647 ucnv_close(conv2); 1648 log_verbose("ucnv_closed (conv2)\n"); 1649 1650 TSCC_print_log(&from1, "from1"); 1651 TSCC_print_log(from2, "from2"); 1652 1653 if(from1.wasClosed == FALSE) { 1654 log_err("FAIL! from1 is NOT closed \n"); 1655 } 1656 1657 if(from2->wasClosed == FALSE) { 1658 log_err("FAIL! from2 was NOT closed\n"); 1659 } 1660 1661 TSCC_print_log(&to1, "to1"); 1662 TSCC_print_log(to2, "to2"); 1663 1664 if(to1.wasClosed == FALSE) { 1665 log_err("FAIL! to1 is NOT closed \n"); 1666 } 1667 1668 if(to2->wasClosed == FALSE) { 1669 log_err("FAIL! to2 was NOT closed\n"); 1670 } 1671 1672 if(to2 != (&to1)) { 1673 free(to2); /* to1 is stack based */ 1674 } 1675 if(from2 != (&from1)) { 1676 free(from2); /* from1 is stack based */ 1677 } 1678 } 1679 #endif 1680 1681 static UBool 1682 containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) { 1683 while(length>0) { 1684 if(*p!=b) { 1685 return TRUE; 1686 } 1687 ++p; 1688 --length; 1689 } 1690 return FALSE; 1691 } 1692 1693 static void TestConvertSafeClone() 1694 { 1695 /* one 'regular' & all the 'private stateful' converters */ 1696 static const char *const names[] = { 1697 #if !UCONFIG_NO_LEGACY_CONVERSION 1698 "ibm-1047", 1699 "ISO_2022,locale=zh,version=1", 1700 #endif 1701 "SCSU", 1702 #if !UCONFIG_NO_LEGACY_CONVERSION 1703 "HZ", 1704 "lmbcs", 1705 "ISCII,version=0", 1706 "ISO_2022,locale=kr,version=1", 1707 "ISO_2022,locale=jp,version=2", 1708 #endif 1709 "BOCU-1", 1710 "UTF-7", 1711 #if !UCONFIG_NO_LEGACY_CONVERSION 1712 "IMAP-mailbox-name", 1713 "ibm-1047-s390" 1714 #else 1715 "IMAP=mailbox-name" 1716 #endif 1717 }; 1718 1719 /* store the actual sizes of each converter */ 1720 int32_t actualSizes[LENGTHOF(names)]; 1721 1722 static const int32_t bufferSizes[] = { 1723 U_CNV_SAFECLONE_BUFFERSIZE, 1724 (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */ 1725 (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */ 1726 }; 1727 1728 char charBuffer[21]; /* Leave at an odd number for alignment testing */ 1729 uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE]; 1730 int32_t bufferSize, maxBufferSize; 1731 const char *maxName; 1732 UConverter * cnv, *cnv2; 1733 UErrorCode err; 1734 1735 char *pCharBuffer; 1736 const char *pConstCharBuffer; 1737 const char *charBufferLimit = charBuffer + sizeof(charBuffer)/sizeof(*charBuffer); 1738 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */ 1739 UChar uniCharBuffer[20]; 1740 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 }; 1741 const char *pCharSource = charSourceBuffer; 1742 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer); 1743 UChar *pUCharTarget = uniCharBuffer; 1744 UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer); 1745 const UChar * pUniBuffer; 1746 const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer); 1747 int32_t index, j; 1748 1749 err = U_ZERO_ERROR; 1750 cnv = ucnv_open(names[0], &err); 1751 if(U_SUCCESS(err)) { 1752 /* Check the various error & informational states: */ 1753 1754 /* Null status - just returns NULL */ 1755 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1756 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, 0)) 1757 { 1758 log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); 1759 } 1760 /* error status - should return 0 & keep error the same */ 1761 err = U_MEMORY_ALLOCATION_ERROR; 1762 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) 1763 { 1764 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); 1765 } 1766 err = U_ZERO_ERROR; 1767 1768 /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/ 1769 if (0 != ucnv_safeClone(cnv, buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) 1770 { 1771 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); 1772 } 1773 err = U_ZERO_ERROR; 1774 1775 /* buffer size pointer is 0 - fill in pbufferSize with a size */ 1776 bufferSize = 0; 1777 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) 1778 { 1779 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); 1780 } 1781 /* Verify our define is large enough */ 1782 if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize) 1783 { 1784 log_err("FAIL: Pre-calculated buffer size is too small\n"); 1785 } 1786 /* Verify we can use this run-time calculated size */ 1787 if (0 == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) 1788 { 1789 log_err("FAIL: Converter can't be cloned with run-time size\n"); 1790 } 1791 if (cnv2) { 1792 ucnv_close(cnv2); 1793 } 1794 1795 /* size one byte too small - should allocate & let us know */ 1796 --bufferSize; 1797 if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1798 { 1799 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); 1800 } 1801 if (cnv2) { 1802 ucnv_close(cnv2); 1803 } 1804 1805 err = U_ZERO_ERROR; 1806 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1807 1808 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ 1809 if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1810 { 1811 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); 1812 } 1813 if (cnv2) { 1814 ucnv_close(cnv2); 1815 } 1816 1817 err = U_ZERO_ERROR; 1818 1819 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ 1820 if (0 != ucnv_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) 1821 { 1822 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); 1823 } 1824 1825 ucnv_close(cnv); 1826 } 1827 1828 maxBufferSize = 0; 1829 maxName = ""; 1830 1831 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/ 1832 1833 for(j = 0; j < LENGTHOF(bufferSizes); ++j) { 1834 for (index = 0; index < LENGTHOF(names); index++) 1835 { 1836 err = U_ZERO_ERROR; 1837 cnv = ucnv_open(names[index], &err); 1838 if(U_FAILURE(err)) { 1839 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[index], u_errorName(err)); 1840 continue; 1841 } 1842 1843 if(j == 0) { 1844 /* preflight to get maxBufferSize */ 1845 actualSizes[index] = 0; 1846 ucnv_safeClone(cnv, NULL, &actualSizes[index], &err); 1847 if(actualSizes[index] > maxBufferSize) { 1848 maxBufferSize = actualSizes[index]; 1849 maxName = names[index]; 1850 } 1851 } 1852 1853 memset(buffer, 0xaa, sizeof(buffer)); 1854 1855 bufferSize = bufferSizes[j]; 1856 cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err); 1857 1858 /* close the original immediately to make sure that the clone works by itself */ 1859 ucnv_close(cnv); 1860 1861 if( actualSizes[index] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) && 1862 err == U_SAFECLONE_ALLOCATED_WARNING 1863 ) { 1864 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[index]); 1865 } 1866 1867 /* check if the clone function overwrote any bytes that it is not supposed to touch */ 1868 if(bufferSize <= bufferSizes[j]) { 1869 /* used the stack buffer */ 1870 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) || 1871 containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa) 1872 ) { 1873 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n", 1874 names[index], bufferSize, bufferSizes[j]); 1875 } 1876 } else { 1877 /* heap-allocated the clone */ 1878 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) { 1879 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n", 1880 names[index], bufferSize, bufferSizes[j]); 1881 } 1882 } 1883 1884 pCharBuffer = charBuffer; 1885 pUniBuffer = uniBuffer; 1886 1887 ucnv_fromUnicode(cnv2, 1888 &pCharBuffer, 1889 charBufferLimit, 1890 &pUniBuffer, 1891 uniBufferLimit, 1892 NULL, 1893 TRUE, 1894 &err); 1895 if(U_FAILURE(err)){ 1896 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err)); 1897 } 1898 ucnv_toUnicode(cnv2, 1899 &pUCharTarget, 1900 pUCharTargetLimit, 1901 &pCharSource, 1902 pCharSourceLimit, 1903 NULL, 1904 TRUE, 1905 &err 1906 ); 1907 1908 if(U_FAILURE(err)){ 1909 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err)); 1910 } 1911 1912 pConstCharBuffer = charBuffer; 1913 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err)) 1914 { 1915 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err)); 1916 } 1917 ucnv_close(cnv2); 1918 } 1919 } 1920 1921 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1922 sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1923 if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) { 1924 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1925 maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1926 } 1927 } 1928 1929 static void TestCCSID() { 1930 #if !UCONFIG_NO_LEGACY_CONVERSION 1931 UConverter *cnv; 1932 UErrorCode errorCode; 1933 int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 }; 1934 int32_t i, ccsid; 1935 1936 for(i=0; i<(int32_t)(sizeof(ccsids)/sizeof(int32_t)); ++i) { 1937 ccsid=ccsids[i]; 1938 1939 errorCode=U_ZERO_ERROR; 1940 cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode); 1941 if(U_FAILURE(errorCode)) { 1942 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode)); 1943 continue; 1944 } 1945 1946 if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) { 1947 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode)); 1948 } 1949 1950 /* skip gb18030(ccsid 1392) */ 1951 if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) { 1952 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode)); 1953 } 1954 1955 ucnv_close(cnv); 1956 } 1957 #endif 1958 } 1959 1960 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */ 1961 1962 /* CHUNK_SIZE defined in common\ucnv.c: */ 1963 #define CHUNK_SIZE 1024 1964 1965 static void bug1(void); 1966 static void bug2(void); 1967 static void bug3(void); 1968 1969 static void 1970 TestJ932(void) 1971 { 1972 bug1(); /* Unicode intermediate buffer straddle bug */ 1973 bug2(); /* pre-flighting size incorrect caused by simple overflow */ 1974 bug3(); /* pre-flighting size incorrect caused by expansion overflow */ 1975 } 1976 1977 /* 1978 * jitterbug 932: test chunking boundary conditions in 1979 1980 int32_t ucnv_convert(const char *toConverterName, 1981 const char *fromConverterName, 1982 char *target, 1983 int32_t targetSize, 1984 const char *source, 1985 int32_t sourceSize, 1986 UErrorCode * err) 1987 1988 * See discussions on the icu mailing list in 1989 * 2001-April with the subject "converter 'flush' question". 1990 * 1991 * Bug report and test code provided by Edward J. Batutis. 1992 */ 1993 static void bug1() 1994 { 1995 #if !UCONFIG_NO_LEGACY_CONVERSION 1996 char char_in[CHUNK_SIZE+32]; 1997 char char_out[CHUNK_SIZE*2]; 1998 1999 /* GB 18030 equivalent of U+10000 is 90308130 */ 2000 static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 }; 2001 2002 UErrorCode err = U_ZERO_ERROR; 2003 int32_t i, test_seq_len = sizeof(test_seq); 2004 2005 /* 2006 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward 2007 * until the straddle bug appears. I didn't want to hard-code everything so this test could 2008 * be expanded - however this is the only type of straddle bug I can think of at the moment - 2009 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no 2010 * other Unicode sequences cause a bug since combining sequences are not supported by the 2011 * converters. 2012 */ 2013 2014 for (i = test_seq_len; i >= 0; i--) { 2015 /* put character sequence into input buffer */ 2016 memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */ 2017 memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len); 2018 2019 /* do the conversion */ 2020 ucnv_convert("us-ascii", /* out */ 2021 "gb18030", /* in */ 2022 char_out, 2023 sizeof(char_out), 2024 char_in, 2025 sizeof(char_in), 2026 &err); 2027 2028 /* bug1: */ 2029 if (err == U_TRUNCATED_CHAR_FOUND) { 2030 /* this happens when surrogate pair straddles the intermediate buffer in 2031 * T_UConverter_fromCodepageToCodepage */ 2032 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n"); 2033 } 2034 } 2035 #endif 2036 } 2037 2038 /* bug2: pre-flighting loop bug: simple overflow causes bug */ 2039 static void bug2() 2040 { 2041 /* US-ASCII "1234567890" */ 2042 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 }; 2043 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 }; 2044 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30, 2045 0x00, 0x00, 0x00, 0x31, 2046 0x00, 0x00, 0x00, 0x32, 2047 0x00, 0x00, 0x00, 0x33, 2048 0x00, 0x00, 0x00, 0x34, 2049 0x00, 0x00, 0x00, 0x35, 2050 0x00, 0x00, 0x00, 0x36, 2051 0x00, 0x00, 0x00, 0x37, 2052 0x00, 0x00, 0x00, 0x38, 2053 0x00, 0x00, (char)0xf0, 0x00}; 2054 static char target[5]; 2055 2056 UErrorCode err = U_ZERO_ERROR; 2057 int32_t size; 2058 2059 /* do the conversion */ 2060 size = ucnv_convert("iso-8859-1", /* out */ 2061 "us-ascii", /* in */ 2062 target, 2063 sizeof(target), 2064 source, 2065 sizeof(source), 2066 &err); 2067 2068 if ( size != 10 ) { 2069 /* bug2: size is 5, should be 10 */ 2070 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size); 2071 } 2072 2073 err = U_ZERO_ERROR; 2074 /* do the conversion */ 2075 size = ucnv_convert("UTF-32BE", /* out */ 2076 "UTF-8", /* in */ 2077 target, 2078 sizeof(target), 2079 sourceUTF8, 2080 sizeof(sourceUTF8), 2081 &err); 2082 2083 if ( size != 32 ) { 2084 /* bug2: size is 5, should be 32 */ 2085 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size); 2086 } 2087 2088 err = U_ZERO_ERROR; 2089 /* do the conversion */ 2090 size = ucnv_convert("UTF-8", /* out */ 2091 "UTF-32BE", /* in */ 2092 target, 2093 sizeof(target), 2094 sourceUTF32, 2095 sizeof(sourceUTF32), 2096 &err); 2097 2098 if ( size != 12 ) { 2099 /* bug2: size is 5, should be 12 */ 2100 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size); 2101 } 2102 } 2103 2104 /* 2105 * bug3: when the characters expand going from source to target codepage 2106 * you get bug3 in addition to bug2 2107 */ 2108 static void bug3() 2109 { 2110 #if !UCONFIG_NO_LEGACY_CONVERSION 2111 char char_in[CHUNK_SIZE*4]; 2112 char target[5]; 2113 UErrorCode err = U_ZERO_ERROR; 2114 int32_t size; 2115 2116 /* 2117 * first get the buggy size from bug2 then 2118 * compare it to buggy size with an expansion 2119 */ 2120 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */ 2121 2122 /* do the conversion */ 2123 size = ucnv_convert("lmbcs", /* out */ 2124 "us-ascii", /* in */ 2125 target, 2126 sizeof(target), 2127 char_in, 2128 sizeof(char_in), 2129 &err); 2130 2131 if ( size != sizeof(char_in) ) { 2132 /* 2133 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer 2134 * in the converter?), should be CHUNK_SIZE*4 2135 * 2136 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize... 2137 */ 2138 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size); 2139 } 2140 2141 /* 2142 * now do the conversion with expansion 2143 * ascii 0x08 expands to 0x0F 0x28 in lmbcs 2144 */ 2145 memset(char_in, 8, sizeof(char_in)); 2146 err = U_ZERO_ERROR; 2147 2148 /* do the conversion */ 2149 size = ucnv_convert("lmbcs", /* out */ 2150 "us-ascii", /* in */ 2151 target, 2152 sizeof(target), 2153 char_in, 2154 sizeof(char_in), 2155 &err); 2156 2157 /* expect 2X expansion */ 2158 if ( size != sizeof(char_in) * 2 ) { 2159 /* 2160 * bug3: 2161 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05: 2162 */ 2163 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size); 2164 } 2165 #endif 2166 } 2167 2168 static void 2169 convertExStreaming(UConverter *srcCnv, UConverter *targetCnv, 2170 const char *src, int32_t srcLength, 2171 const char *expectTarget, int32_t expectTargetLength, 2172 int32_t chunkSize, 2173 const char *testName, 2174 UErrorCode expectCode) { 2175 UChar pivotBuffer[CHUNK_SIZE]; 2176 UChar *pivotSource, *pivotTarget; 2177 const UChar *pivotLimit; 2178 2179 char targetBuffer[CHUNK_SIZE]; 2180 char *target; 2181 const char *srcLimit, *finalSrcLimit, *targetLimit; 2182 2183 int32_t targetLength; 2184 2185 UBool flush; 2186 2187 UErrorCode errorCode; 2188 2189 /* setup */ 2190 if(chunkSize>CHUNK_SIZE) { 2191 chunkSize=CHUNK_SIZE; 2192 } 2193 2194 pivotSource=pivotTarget=pivotBuffer; 2195 pivotLimit=pivotBuffer+chunkSize; 2196 2197 finalSrcLimit=src+srcLength; 2198 target=targetBuffer; 2199 targetLimit=targetBuffer+chunkSize; 2200 2201 ucnv_resetToUnicode(srcCnv); 2202 ucnv_resetFromUnicode(targetCnv); 2203 2204 errorCode=U_ZERO_ERROR; 2205 flush=FALSE; 2206 2207 /* convert, streaming-style (both converters and pivot keep state) */ 2208 for(;;) { 2209 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */ 2210 if(src+chunkSize<=finalSrcLimit) { 2211 srcLimit=src+chunkSize; 2212 } else { 2213 srcLimit=finalSrcLimit; 2214 } 2215 ucnv_convertEx(targetCnv, srcCnv, 2216 &target, targetLimit, 2217 &src, srcLimit, 2218 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit, 2219 FALSE, flush, &errorCode); 2220 targetLength=(int32_t)(target-targetBuffer); 2221 if(target>targetLimit) { 2222 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n", 2223 testName, chunkSize, target, targetLimit); 2224 break; /* TODO: major problem! */ 2225 } 2226 if(errorCode==U_BUFFER_OVERFLOW_ERROR) { 2227 /* continue converting another chunk */ 2228 errorCode=U_ZERO_ERROR; 2229 if(targetLength+chunkSize<=sizeof(targetBuffer)) { 2230 targetLimit=target+chunkSize; 2231 } else { 2232 targetLimit=targetBuffer+sizeof(targetBuffer); 2233 } 2234 } else if(U_FAILURE(errorCode)) { 2235 /* failure */ 2236 break; 2237 } else if(flush) { 2238 /* all done */ 2239 break; 2240 } else if(src==finalSrcLimit && pivotSource==pivotTarget) { 2241 /* all consumed, now flush without input (separate from conversion for testing) */ 2242 flush=TRUE; 2243 } 2244 } 2245 2246 if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) { 2247 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n", 2248 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode)); 2249 } else if(targetLength!=expectTargetLength) { 2250 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n", 2251 testName, chunkSize, targetLength, expectTargetLength); 2252 } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) { 2253 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n", 2254 testName, chunkSize); 2255 } 2256 } 2257 2258 static void 2259 convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv, 2260 const char *src, int32_t srcLength, 2261 const char *expectTarget, int32_t expectTargetLength, 2262 const char *testName, 2263 UErrorCode expectCode) { 2264 convertExStreaming(srcCnv, targetCnv, 2265 src, srcLength, 2266 expectTarget, expectTargetLength, 2267 1, testName, expectCode); 2268 convertExStreaming(srcCnv, targetCnv, 2269 src, srcLength, 2270 expectTarget, expectTargetLength, 2271 3, testName, expectCode); 2272 convertExStreaming(srcCnv, targetCnv, 2273 src, srcLength, 2274 expectTarget, expectTargetLength, 2275 7, testName, expectCode); 2276 } 2277 2278 static void TestConvertEx() { 2279 #if !UCONFIG_NO_LEGACY_CONVERSION 2280 static const uint8_t 2281 utf8[]={ 2282 /* 4e00 30a1 ff61 0410 */ 2283 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2284 }, 2285 shiftJIS[]={ 2286 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2287 }, 2288 errorTarget[]={ 2289 /* 2290 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2291 * SUB, SUB, 0x40, SUB, SUB, 0x40 2292 */ 2293 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40 2294 }; 2295 2296 char srcBuffer[100], targetBuffer[100]; 2297 2298 const char *src; 2299 char *target; 2300 2301 UChar pivotBuffer[100]; 2302 UChar *pivotSource, *pivotTarget; 2303 2304 UConverter *cnv1, *cnv2; 2305 UErrorCode errorCode; 2306 2307 errorCode=U_ZERO_ERROR; 2308 cnv1=ucnv_open("UTF-8", &errorCode); 2309 if(U_FAILURE(errorCode)) { 2310 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode)); 2311 return; 2312 } 2313 2314 cnv2=ucnv_open("Shift-JIS", &errorCode); 2315 if(U_FAILURE(errorCode)) { 2316 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2317 ucnv_close(cnv1); 2318 return; 2319 } 2320 2321 /* test ucnv_convertEx() with streaming conversion style */ 2322 convertExMultiStreaming(cnv1, cnv2, 2323 (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS), 2324 "UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2325 2326 convertExMultiStreaming(cnv2, cnv1, 2327 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), 2328 "Shift-JIS -> UTF-8", U_ZERO_ERROR); 2329 2330 /* U_ZERO_ERROR because by default the SUB callbacks are set */ 2331 convertExMultiStreaming(cnv1, cnv2, 2332 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget), 2333 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2334 2335 /* test some simple conversions */ 2336 2337 /* NUL-terminated source and target */ 2338 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2339 memcpy(srcBuffer, utf8, sizeof(utf8)); 2340 srcBuffer[sizeof(utf8)]=0; 2341 src=srcBuffer; 2342 target=targetBuffer; 2343 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2344 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2345 if( errorCode!=U_ZERO_ERROR || 2346 target-targetBuffer!=sizeof(shiftJIS) || 2347 *target!=0 || 2348 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2349 ) { 2350 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n", 2351 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2352 } 2353 2354 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */ 2355 errorCode=U_AMBIGUOUS_ALIAS_WARNING; 2356 memset(targetBuffer, 0xff, sizeof(targetBuffer)); 2357 src=srcBuffer; 2358 target=targetBuffer; 2359 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL, 2360 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2361 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2362 target-targetBuffer!=sizeof(shiftJIS) || 2363 *target!=(char)0xff || 2364 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2365 ) { 2366 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n", 2367 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2368 } 2369 2370 /* bad arguments */ 2371 errorCode=U_MESSAGE_PARSE_ERROR; 2372 src=srcBuffer; 2373 target=targetBuffer; 2374 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2375 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2376 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2377 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2378 } 2379 2380 /* pivotLimit==pivotStart */ 2381 errorCode=U_ZERO_ERROR; 2382 pivotSource=pivotTarget=pivotBuffer; 2383 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2384 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, TRUE, TRUE, &errorCode); 2385 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2386 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode)); 2387 } 2388 2389 /* *pivotSource==NULL */ 2390 errorCode=U_ZERO_ERROR; 2391 pivotSource=NULL; 2392 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2393 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2394 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2395 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode)); 2396 } 2397 2398 /* *source==NULL */ 2399 errorCode=U_ZERO_ERROR; 2400 src=NULL; 2401 pivotSource=pivotBuffer; 2402 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2403 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2404 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2405 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode)); 2406 } 2407 2408 /* streaming conversion without a pivot buffer */ 2409 errorCode=U_ZERO_ERROR; 2410 src=srcBuffer; 2411 pivotSource=pivotBuffer; 2412 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2413 NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode); 2414 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2415 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode)); 2416 } 2417 2418 ucnv_close(cnv1); 2419 ucnv_close(cnv2); 2420 #endif 2421 } 2422 2423 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */ 2424 static const char *const badUTF8[]={ 2425 /* trail byte */ 2426 "\x80", 2427 2428 /* truncated multi-byte sequences */ 2429 "\xd0", 2430 "\xe0", 2431 "\xe1", 2432 "\xed", 2433 "\xee", 2434 "\xf0", 2435 "\xf1", 2436 "\xf4", 2437 "\xf8", 2438 "\xfc", 2439 2440 "\xe0\x80", 2441 "\xe0\xa0", 2442 "\xe1\x80", 2443 "\xed\x80", 2444 "\xed\xa0", 2445 "\xee\x80", 2446 "\xf0\x80", 2447 "\xf0\x90", 2448 "\xf1\x80", 2449 "\xf4\x80", 2450 "\xf4\x90", 2451 "\xf8\x80", 2452 "\xfc\x80", 2453 2454 "\xf0\x80\x80", 2455 "\xf0\x90\x80", 2456 "\xf1\x80\x80", 2457 "\xf4\x80\x80", 2458 "\xf4\x90\x80", 2459 "\xf8\x80\x80", 2460 "\xfc\x80\x80", 2461 2462 "\xf8\x80\x80\x80", 2463 "\xfc\x80\x80\x80", 2464 2465 "\xfc\x80\x80\x80\x80", 2466 2467 /* complete sequences but non-shortest forms or out of range etc. */ 2468 "\xc0\x80", 2469 "\xe0\x80\x80", 2470 "\xed\xa0\x80", 2471 "\xf0\x80\x80\x80", 2472 "\xf4\x90\x80\x80", 2473 "\xf8\x80\x80\x80\x80", 2474 "\xfc\x80\x80\x80\x80\x80", 2475 "\xfe", 2476 "\xff" 2477 }; 2478 2479 /* get some character that can be converted and convert it */ 2480 static UBool getTestChar(UConverter *cnv, const char *converterName, 2481 char charUTF8[4], int32_t *pCharUTF8Length, 2482 char char0[8], int32_t *pChar0Length, 2483 char char1[8], int32_t *pChar1Length) { 2484 UChar utf16[U16_MAX_LENGTH]; 2485 int32_t utf16Length; 2486 2487 const UChar *utf16Source; 2488 char *target; 2489 2490 USet *set; 2491 UChar32 c; 2492 UErrorCode errorCode; 2493 2494 errorCode=U_ZERO_ERROR; 2495 set=uset_open(1, 0); 2496 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2497 c=uset_charAt(set, uset_size(set)/2); 2498 uset_close(set); 2499 2500 utf16Length=0; 2501 U16_APPEND_UNSAFE(utf16, utf16Length, c); 2502 *pCharUTF8Length=0; 2503 U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c); 2504 2505 utf16Source=utf16; 2506 target=char0; 2507 ucnv_fromUnicode(cnv, 2508 &target, char0+sizeof(char0), 2509 &utf16Source, utf16+utf16Length, 2510 NULL, FALSE, &errorCode); 2511 *pChar0Length=(int32_t)(target-char0); 2512 2513 utf16Source=utf16; 2514 target=char1; 2515 ucnv_fromUnicode(cnv, 2516 &target, char1+sizeof(char1), 2517 &utf16Source, utf16+utf16Length, 2518 NULL, FALSE, &errorCode); 2519 *pChar1Length=(int32_t)(target-char1); 2520 2521 if(U_FAILURE(errorCode)) { 2522 log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode)); 2523 return FALSE; 2524 } 2525 return TRUE; 2526 } 2527 2528 static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2529 char charUTF8[4], int32_t charUTF8Length, 2530 char char0[8], int32_t char0Length, 2531 char char1[8], int32_t char1Length) { 2532 char utf8[16]; 2533 int32_t utf8Length; 2534 2535 char output[16]; 2536 int32_t outputLength; 2537 2538 char invalidChars[8]; 2539 int8_t invalidLength; 2540 2541 const char *source; 2542 char *target; 2543 2544 UChar pivotBuffer[8]; 2545 UChar *pivotSource, *pivotTarget; 2546 2547 UErrorCode errorCode; 2548 int32_t i; 2549 2550 /* test truncated sequences */ 2551 errorCode=U_ZERO_ERROR; 2552 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2553 2554 memcpy(utf8, charUTF8, charUTF8Length); 2555 2556 for(i=0; i<LENGTHOF(badUTF8); ++i) { 2557 /* truncated sequence? */ 2558 int32_t length=strlen(badUTF8[i]); 2559 if(length>=(1+U8_COUNT_TRAIL_BYTES(badUTF8[i][0]))) { 2560 continue; 2561 } 2562 2563 /* assemble a string with the test character and the truncated sequence */ 2564 memcpy(utf8+charUTF8Length, badUTF8[i], length); 2565 utf8Length=charUTF8Length+length; 2566 2567 /* convert and check the invalidChars */ 2568 source=utf8; 2569 target=output; 2570 pivotSource=pivotTarget=pivotBuffer; 2571 errorCode=U_ZERO_ERROR; 2572 ucnv_convertEx(cnv, utf8Cnv, 2573 &target, output+sizeof(output), 2574 &source, utf8+utf8Length, 2575 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer), 2576 TRUE, TRUE, /* reset & flush */ 2577 &errorCode); 2578 outputLength=(int32_t)(target-output); 2579 if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) { 2580 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i); 2581 continue; 2582 } 2583 2584 errorCode=U_ZERO_ERROR; 2585 invalidLength=(int8_t)sizeof(invalidChars); 2586 ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode); 2587 if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) { 2588 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i); 2589 } 2590 } 2591 } 2592 2593 static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2594 char charUTF8[4], int32_t charUTF8Length, 2595 char char0[8], int32_t char0Length, 2596 char char1[8], int32_t char1Length) { 2597 char utf8[600], expect[600]; 2598 int32_t utf8Length, expectLength; 2599 2600 char testName[32]; 2601 2602 UErrorCode errorCode; 2603 int32_t i; 2604 2605 errorCode=U_ZERO_ERROR; 2606 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode); 2607 2608 /* 2609 * assemble an input string with the test character between each 2610 * bad sequence, 2611 * and an expected string with repeated test character output 2612 */ 2613 memcpy(utf8, charUTF8, charUTF8Length); 2614 utf8Length=charUTF8Length; 2615 2616 memcpy(expect, char0, char0Length); 2617 expectLength=char0Length; 2618 2619 for(i=0; i<LENGTHOF(badUTF8); ++i) { 2620 int32_t length=strlen(badUTF8[i]); 2621 memcpy(utf8+utf8Length, badUTF8[i], length); 2622 utf8Length+=length; 2623 2624 memcpy(utf8+utf8Length, charUTF8, charUTF8Length); 2625 utf8Length+=charUTF8Length; 2626 2627 memcpy(expect+expectLength, char1, char1Length); 2628 expectLength+=char1Length; 2629 } 2630 2631 /* expect that each bad UTF-8 sequence is detected and skipped */ 2632 strcpy(testName, "from bad UTF-8 to "); 2633 strcat(testName, converterName); 2634 2635 convertExMultiStreaming(utf8Cnv, cnv, 2636 utf8, utf8Length, 2637 expect, expectLength, 2638 testName, 2639 U_ZERO_ERROR); 2640 } 2641 2642 /* Test illegal UTF-8 input. */ 2643 static void TestConvertExFromUTF8() { 2644 static const char *const converterNames[]={ 2645 #if !UCONFIG_NO_LEGACY_CONVERSION 2646 "windows-1252", 2647 "shift-jis", 2648 #endif 2649 "us-ascii", 2650 "iso-8859-1", 2651 "utf-8" 2652 }; 2653 2654 UConverter *utf8Cnv, *cnv; 2655 UErrorCode errorCode; 2656 int32_t i; 2657 2658 /* fromUnicode versions of some character, from initial state and later */ 2659 char charUTF8[4], char0[8], char1[8]; 2660 int32_t charUTF8Length, char0Length, char1Length; 2661 2662 errorCode=U_ZERO_ERROR; 2663 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2664 if(U_FAILURE(errorCode)) { 2665 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2666 return; 2667 } 2668 2669 for(i=0; i<LENGTHOF(converterNames); ++i) { 2670 errorCode=U_ZERO_ERROR; 2671 cnv=ucnv_open(converterNames[i], &errorCode); 2672 if(U_FAILURE(errorCode)) { 2673 log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode)); 2674 continue; 2675 } 2676 if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) { 2677 continue; 2678 } 2679 testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2680 testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2681 ucnv_close(cnv); 2682 } 2683 ucnv_close(utf8Cnv); 2684 } 2685 2686 static void TestConvertExFromUTF8_C5F0() { 2687 static const char *const converterNames[]={ 2688 #if !UCONFIG_NO_LEGACY_CONVERSION 2689 "windows-1251", 2690 "shift-jis", 2691 #endif 2692 "us-ascii", 2693 "iso-8859-1", 2694 "utf-8" 2695 }; 2696 2697 UConverter *utf8Cnv, *cnv; 2698 UErrorCode errorCode; 2699 int32_t i; 2700 2701 static const char bad_utf8[2]={ 0xC5, 0xF0 }; 2702 /* Expect "��" (2x U+FFFD as decimal NCRs) */ 2703 static const char twoNCRs[16]={ 2704 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B, 2705 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B 2706 }; 2707 static const char twoFFFD[6]={ 2708 (char)0xef, (char)0xbf, (char)0xbd, 2709 (char)0xef, (char)0xbf, (char)0xbd 2710 }; 2711 const char *expected; 2712 int32_t expectedLength; 2713 char dest[20]; /* longer than longest expectedLength */ 2714 2715 const char *src; 2716 char *target; 2717 2718 UChar pivotBuffer[128]; 2719 UChar *pivotSource, *pivotTarget; 2720 2721 errorCode=U_ZERO_ERROR; 2722 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2723 if(U_FAILURE(errorCode)) { 2724 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2725 return; 2726 } 2727 2728 for(i=0; i<LENGTHOF(converterNames); ++i) { 2729 errorCode=U_ZERO_ERROR; 2730 cnv=ucnv_open(converterNames[i], &errorCode); 2731 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 2732 NULL, NULL, &errorCode); 2733 if(U_FAILURE(errorCode)) { 2734 log_data_err("unable to open %s converter - %s\n", 2735 converterNames[i], u_errorName(errorCode)); 2736 continue; 2737 } 2738 src=bad_utf8; 2739 target=dest; 2740 uprv_memset(dest, 9, sizeof(dest)); 2741 if(i==LENGTHOF(converterNames)-1) { 2742 /* conversion to UTF-8 yields two U+FFFD directly */ 2743 expected=twoFFFD; 2744 expectedLength=6; 2745 } else { 2746 /* conversion to a non-Unicode charset yields two NCRs */ 2747 expected=twoNCRs; 2748 expectedLength=16; 2749 } 2750 pivotBuffer[0]=0; 2751 pivotBuffer[1]=1; 2752 pivotBuffer[2]=2; 2753 pivotSource=pivotTarget=pivotBuffer; 2754 ucnv_convertEx( 2755 cnv, utf8Cnv, 2756 &target, dest+expectedLength, 2757 &src, bad_utf8+sizeof(bad_utf8), 2758 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer), 2759 TRUE, TRUE, &errorCode); 2760 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 || 2761 target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) || 2762 dest[expectedLength]!=9 2763 ) { 2764 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]); 2765 } 2766 ucnv_close(cnv); 2767 } 2768 ucnv_close(utf8Cnv); 2769 } 2770 2771 static void 2772 TestConvertAlgorithmic() { 2773 #if !UCONFIG_NO_LEGACY_CONVERSION 2774 static const uint8_t 2775 utf8[]={ 2776 /* 4e00 30a1 ff61 0410 */ 2777 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2778 }, 2779 shiftJIS[]={ 2780 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2781 }, 2782 /*errorTarget[]={*/ 2783 /* 2784 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2785 * SUB, SUB, 0x40, SUB, SUB, 0x40 2786 */ 2787 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/ 2788 /*},*/ 2789 utf16[]={ 2790 0xfe, 0xff /* BOM only, no text */ 2791 }, 2792 utf32[]={ 2793 0xff, 0xfe, 0, 0 /* BOM only, no text */ 2794 }; 2795 2796 char target[100], utf8NUL[100], shiftJISNUL[100]; 2797 2798 UConverter *cnv; 2799 UErrorCode errorCode; 2800 2801 int32_t length; 2802 2803 errorCode=U_ZERO_ERROR; 2804 cnv=ucnv_open("Shift-JIS", &errorCode); 2805 if(U_FAILURE(errorCode)) { 2806 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2807 ucnv_close(cnv); 2808 return; 2809 } 2810 2811 memcpy(utf8NUL, utf8, sizeof(utf8)); 2812 utf8NUL[sizeof(utf8)]=0; 2813 memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS)); 2814 shiftJISNUL[sizeof(shiftJIS)]=0; 2815 2816 /* 2817 * The to/from algorithmic convenience functions share a common implementation, 2818 * so we need not test all permutations of them. 2819 */ 2820 2821 /* length in, not terminated out */ 2822 errorCode=U_ZERO_ERROR; 2823 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode); 2824 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2825 length!=sizeof(shiftJIS) || 2826 memcmp(target, shiftJIS, length)!=0 2827 ) { 2828 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n", 2829 u_errorName(errorCode), length, sizeof(shiftJIS)); 2830 } 2831 2832 /* terminated in and out */ 2833 memset(target, 0x55, sizeof(target)); 2834 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2835 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode); 2836 if( errorCode!=U_ZERO_ERROR || 2837 length!=sizeof(utf8) || 2838 memcmp(target, utf8, length)!=0 2839 ) { 2840 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n", 2841 u_errorName(errorCode), length, sizeof(shiftJIS)); 2842 } 2843 2844 /* empty string, some target buffer */ 2845 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2846 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode); 2847 if( errorCode!=U_ZERO_ERROR || 2848 length!=0 2849 ) { 2850 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n", 2851 u_errorName(errorCode), length); 2852 } 2853 2854 /* pseudo-empty string, no target buffer */ 2855 errorCode=U_ZERO_ERROR; 2856 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2857 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2858 length!=0 2859 ) { 2860 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2861 u_errorName(errorCode), length); 2862 } 2863 2864 errorCode=U_ZERO_ERROR; 2865 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode); 2866 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2867 length!=0 2868 ) { 2869 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2870 u_errorName(errorCode), length); 2871 } 2872 2873 /* bad arguments */ 2874 errorCode=U_MESSAGE_PARSE_ERROR; 2875 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2876 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2877 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2878 } 2879 2880 /* source==NULL */ 2881 errorCode=U_ZERO_ERROR; 2882 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode); 2883 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2884 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode)); 2885 } 2886 2887 /* illegal alg. type */ 2888 errorCode=U_ZERO_ERROR; 2889 length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode); 2890 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2891 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode)); 2892 } 2893 ucnv_close(cnv); 2894 #endif 2895 } 2896 2897 static void TestLMBCSMaxChar(void) { 2898 static const struct { 2899 int8_t maxSize; 2900 const char *name; 2901 } converter[] = { 2902 /* some non-LMBCS converters - perfect test setup here */ 2903 { 1, "US-ASCII"}, 2904 { 1, "ISO-8859-1"}, 2905 2906 { 2, "UTF-16"}, 2907 { 2, "UTF-16BE"}, 2908 { 3, "UTF-8"}, 2909 { 3, "CESU-8"}, 2910 { 3, "SCSU"}, 2911 { 4, "UTF-32"}, 2912 { 4, "UTF-7"}, 2913 { 4, "IMAP-mailbox-name"}, 2914 { 4, "BOCU-1"}, 2915 2916 { 1, "windows-1256"}, 2917 { 2, "Shift-JIS"}, 2918 { 2, "ibm-16684"}, 2919 { 3, "ibm-930"}, 2920 { 3, "ibm-1390"}, 2921 { 4, "*test3"}, 2922 { 16,"*test4"}, 2923 2924 { 4, "ISCII"}, 2925 { 4, "HZ"}, 2926 2927 { 3, "ISO-2022"}, 2928 { 3, "ISO-2022-KR"}, 2929 { 6, "ISO-2022-JP"}, 2930 { 8, "ISO-2022-CN"}, 2931 2932 /* LMBCS */ 2933 { 3, "LMBCS-1"}, 2934 { 3, "LMBCS-2"}, 2935 { 3, "LMBCS-3"}, 2936 { 3, "LMBCS-4"}, 2937 { 3, "LMBCS-5"}, 2938 { 3, "LMBCS-6"}, 2939 { 3, "LMBCS-8"}, 2940 { 3, "LMBCS-11"}, 2941 { 3, "LMBCS-16"}, 2942 { 3, "LMBCS-17"}, 2943 { 3, "LMBCS-18"}, 2944 { 3, "LMBCS-19"} 2945 }; 2946 int32_t idx; 2947 2948 for (idx = 0; idx < LENGTHOF(converter); idx++) { 2949 UErrorCode status = U_ZERO_ERROR; 2950 UConverter *cnv = cnv_open(converter[idx].name, &status); 2951 if (U_FAILURE(status)) { 2952 continue; 2953 } 2954 if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) { 2955 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n", 2956 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv)); 2957 } 2958 ucnv_close(cnv); 2959 } 2960 2961 /* mostly test that the macro compiles */ 2962 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) { 2963 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n"); 2964 } 2965 } 2966 2967 2968 static void TestJ1968(void) { 2969 UErrorCode err = U_ZERO_ERROR; 2970 UConverter *cnv; 2971 char myConvName[] = "My really really really really really really really really really really really" 2972 " really really really really really really really really really really really" 2973 " really really really really really really really really long converter name"; 2974 UChar myConvNameU[sizeof(myConvName)]; 2975 2976 u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName)); 2977 2978 err = U_ZERO_ERROR; 2979 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0; 2980 cnv = ucnv_openU(myConvNameU, &err); 2981 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2982 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2983 } 2984 2985 err = U_ZERO_ERROR; 2986 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 2987 cnv = ucnv_openU(myConvNameU, &err); 2988 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2989 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2990 } 2991 2992 err = U_ZERO_ERROR; 2993 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 2994 cnv = ucnv_openU(myConvNameU, &err); 2995 if (cnv || err != U_FILE_ACCESS_ERROR) { 2996 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2997 } 2998 2999 3000 3001 3002 err = U_ZERO_ERROR; 3003 cnv = ucnv_open(myConvName, &err); 3004 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3005 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3006 } 3007 3008 err = U_ZERO_ERROR; 3009 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ','; 3010 cnv = ucnv_open(myConvName, &err); 3011 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3012 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3013 } 3014 3015 err = U_ZERO_ERROR; 3016 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 3017 cnv = ucnv_open(myConvName, &err); 3018 if (cnv || err != U_FILE_ACCESS_ERROR) { 3019 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3020 } 3021 3022 err = U_ZERO_ERROR; 3023 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 3024 strncpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7); 3025 cnv = ucnv_open(myConvName, &err); 3026 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3027 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3028 } 3029 3030 /* The comma isn't really a part of the converter name. */ 3031 err = U_ZERO_ERROR; 3032 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 3033 cnv = ucnv_open(myConvName, &err); 3034 if (cnv || err != U_FILE_ACCESS_ERROR) { 3035 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3036 } 3037 3038 err = U_ZERO_ERROR; 3039 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' '; 3040 cnv = ucnv_open(myConvName, &err); 3041 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3042 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3043 } 3044 3045 err = U_ZERO_ERROR; 3046 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 3047 cnv = ucnv_open(myConvName, &err); 3048 if (cnv || err != U_FILE_ACCESS_ERROR) { 3049 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3050 } 3051 3052 } 3053 3054 #if !UCONFIG_NO_LEGACY_CONVERSION 3055 static void 3056 testSwap(const char *name, UBool swap) { 3057 /* 3058 * Test Unicode text. 3059 * Contains characters that are the highest for some of the 3060 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the 3061 * tables copies the entire tables. 3062 */ 3063 static const UChar text[]={ 3064 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a 3065 }; 3066 3067 UChar uNormal[32], uSwapped[32]; 3068 char normal[32], swapped[32]; 3069 const UChar *pcu; 3070 UChar *pu; 3071 char *pc; 3072 int32_t i, normalLength, swappedLength; 3073 UChar u; 3074 char c; 3075 3076 const char *swappedName; 3077 UConverter *cnv, *swapCnv; 3078 UErrorCode errorCode; 3079 3080 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */ 3081 3082 /* open both the normal and the LF/NL-swapping converters */ 3083 strcpy(swapped, name); 3084 strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING); 3085 3086 errorCode=U_ZERO_ERROR; 3087 swapCnv=ucnv_open(swapped, &errorCode); 3088 cnv=ucnv_open(name, &errorCode); 3089 if(U_FAILURE(errorCode)) { 3090 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode)); 3091 goto cleanup; 3092 } 3093 3094 /* the name must contain the swap option if and only if we expect the converter to swap */ 3095 swappedName=ucnv_getName(swapCnv, &errorCode); 3096 if(U_FAILURE(errorCode)) { 3097 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode)); 3098 goto cleanup; 3099 } 3100 3101 pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING); 3102 if(swap != (pc!=NULL)) { 3103 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap); 3104 goto cleanup; 3105 } 3106 3107 /* convert to EBCDIC */ 3108 pcu=text; 3109 pc=normal; 3110 ucnv_fromUnicode(cnv, &pc, normal+LENGTHOF(normal), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); 3111 normalLength=(int32_t)(pc-normal); 3112 3113 pcu=text; 3114 pc=swapped; 3115 ucnv_fromUnicode(swapCnv, &pc, swapped+LENGTHOF(swapped), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); 3116 swappedLength=(int32_t)(pc-swapped); 3117 3118 if(U_FAILURE(errorCode)) { 3119 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode)); 3120 goto cleanup; 3121 } 3122 3123 /* compare EBCDIC output */ 3124 if(normalLength!=swappedLength) { 3125 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3126 goto cleanup; 3127 } 3128 for(i=0; i<normalLength; ++i) { 3129 /* swap EBCDIC LF/NL for comparison */ 3130 c=normal[i]; 3131 if(swap) { 3132 if(c==0x15) { 3133 c=0x25; 3134 } else if(c==0x25) { 3135 c=0x15; 3136 } 3137 } 3138 3139 if(c!=swapped[i]) { 3140 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]); 3141 goto cleanup; 3142 } 3143 } 3144 3145 /* convert back to Unicode (may not roundtrip) */ 3146 pc=normal; 3147 pu=uNormal; 3148 ucnv_toUnicode(cnv, &pu, uNormal+LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); 3149 normalLength=(int32_t)(pu-uNormal); 3150 3151 pc=normal; 3152 pu=uSwapped; 3153 ucnv_toUnicode(swapCnv, &pu, uSwapped+LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); 3154 swappedLength=(int32_t)(pu-uSwapped); 3155 3156 if(U_FAILURE(errorCode)) { 3157 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode)); 3158 goto cleanup; 3159 } 3160 3161 /* compare EBCDIC output */ 3162 if(normalLength!=swappedLength) { 3163 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3164 goto cleanup; 3165 } 3166 for(i=0; i<normalLength; ++i) { 3167 /* swap EBCDIC LF/NL for comparison */ 3168 u=uNormal[i]; 3169 if(swap) { 3170 if(u==0xa) { 3171 u=0x85; 3172 } else if(u==0x85) { 3173 u=0xa; 3174 } 3175 } 3176 3177 if(u!=uSwapped[i]) { 3178 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]); 3179 goto cleanup; 3180 } 3181 } 3182 3183 /* clean up */ 3184 cleanup: 3185 ucnv_close(cnv); 3186 ucnv_close(swapCnv); 3187 } 3188 3189 static void 3190 TestEBCDICSwapLFNL() { 3191 static const struct { 3192 const char *name; 3193 UBool swap; 3194 } tests[]={ 3195 { "ibm-37", TRUE }, 3196 { "ibm-1047", TRUE }, 3197 { "ibm-1140", TRUE }, 3198 { "ibm-930", TRUE }, 3199 { "iso-8859-3", FALSE } 3200 }; 3201 3202 int i; 3203 3204 for(i=0; i<LENGTHOF(tests); ++i) { 3205 testSwap(tests[i].name, tests[i].swap); 3206 } 3207 } 3208 #else 3209 static void 3210 TestEBCDICSwapLFNL() { 3211 /* test nothing... */ 3212 } 3213 #endif 3214 3215 static const UVersionInfo ICU_34 = {3,4,0,0}; 3216 3217 static void TestFromUCountPending(){ 3218 #if !UCONFIG_NO_LEGACY_CONVERSION 3219 UErrorCode status = U_ZERO_ERROR; 3220 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */ 3221 static const struct { 3222 UChar input[6]; 3223 int32_t len; 3224 int32_t exp; 3225 }fromUnicodeTests[] = { 3226 /*m:n conversion*/ 3227 {{0xdbc4},1,1}, 3228 {{ 0xdbc4, 0xde34, 0xd84d},3,1}, 3229 {{ 0xdbc4, 0xde34, 0xd900},3,3}, 3230 }; 3231 int i; 3232 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3233 if(U_FAILURE(status)){ 3234 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3235 return; 3236 } 3237 for(i=0; i<LENGTHOF(fromUnicodeTests); ++i) { 3238 char tgt[10]; 3239 char* target = tgt; 3240 char* targetLimit = target + 10; 3241 const UChar* source = fromUnicodeTests[i].input; 3242 const UChar* sourceLimit = source + fromUnicodeTests[i].len; 3243 int32_t len = 0; 3244 ucnv_reset(cnv); 3245 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3246 len = ucnv_fromUCountPending(cnv, &status); 3247 if(U_FAILURE(status)){ 3248 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3249 status = U_ZERO_ERROR; 3250 continue; 3251 } 3252 if(len != fromUnicodeTests[i].exp){ 3253 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n"); 3254 } 3255 } 3256 status = U_ZERO_ERROR; 3257 { 3258 /* 3259 * The converter has to read the tail before it knows that 3260 * only head alone matches. 3261 * At the end, the output for head will overflow the target, 3262 * middle will be pending, and tail will not have been consumed. 3263 */ 3264 /* 3265 \U00101234 -> x (<U101234> \x07 |0) 3266 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0) 3267 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0) 3268 \U00060007 -> unassigned 3269 */ 3270 static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */ 3271 static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */ 3272 static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */ 3273 char tgt[10]; 3274 char* target = tgt; 3275 char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */ 3276 const UChar* source = head; 3277 const UChar* sourceLimit = source + u_strlen(head); 3278 int32_t len = 0; 3279 ucnv_reset(cnv); 3280 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3281 len = ucnv_fromUCountPending(cnv, &status); 3282 if(U_FAILURE(status)){ 3283 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3284 status = U_ZERO_ERROR; 3285 } 3286 if(len!=4){ 3287 log_err("ucnv_fromUInputHeld did not return correct length for head\n"); 3288 } 3289 source = middle; 3290 sourceLimit = source + u_strlen(middle); 3291 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3292 len = ucnv_fromUCountPending(cnv, &status); 3293 if(U_FAILURE(status)){ 3294 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3295 status = U_ZERO_ERROR; 3296 } 3297 if(len!=5){ 3298 log_err("ucnv_fromUInputHeld did not return correct length for middle\n"); 3299 } 3300 source = tail; 3301 sourceLimit = source + u_strlen(tail); 3302 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3303 if(status != U_BUFFER_OVERFLOW_ERROR){ 3304 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3305 } 3306 status = U_ZERO_ERROR; 3307 len = ucnv_fromUCountPending(cnv, &status); 3308 /* middle[1] is pending, tail has not been consumed */ 3309 if(U_FAILURE(status)){ 3310 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status)); 3311 } 3312 if(len!=1){ 3313 log_err("ucnv_fromUInputHeld did not return correct length for tail\n"); 3314 } 3315 } 3316 ucnv_close(cnv); 3317 #endif 3318 } 3319 3320 static void 3321 TestToUCountPending(){ 3322 #if !UCONFIG_NO_LEGACY_CONVERSION 3323 UErrorCode status = U_ZERO_ERROR; 3324 static const struct { 3325 char input[6]; 3326 int32_t len; 3327 int32_t exp; 3328 }toUnicodeTests[] = { 3329 /*m:n conversion*/ 3330 {{0x05, 0x01, 0x02},3,3}, 3331 {{0x01, 0x02},2,2}, 3332 {{0x07, 0x00, 0x01, 0x02},4,4}, 3333 }; 3334 3335 int i; 3336 UConverterToUCallback *oldToUAction= NULL; 3337 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3338 if(U_FAILURE(status)){ 3339 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3340 return; 3341 } 3342 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3343 for(i=0; i<LENGTHOF(toUnicodeTests); ++i) { 3344 UChar tgt[10]; 3345 UChar* target = tgt; 3346 UChar* targetLimit = target + 20; 3347 const char* source = toUnicodeTests[i].input; 3348 const char* sourceLimit = source + toUnicodeTests[i].len; 3349 int32_t len = 0; 3350 ucnv_reset(cnv); 3351 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3352 len = ucnv_toUCountPending(cnv,&status); 3353 if(U_FAILURE(status)){ 3354 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3355 status = U_ZERO_ERROR; 3356 continue; 3357 } 3358 if(len != toUnicodeTests[i].exp){ 3359 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n"); 3360 } 3361 } 3362 status = U_ZERO_ERROR; 3363 ucnv_close(cnv); 3364 3365 { 3366 /* 3367 * The converter has to read the tail before it knows that 3368 * only head alone matches. 3369 * At the end, the output for head will overflow the target, 3370 * mid will be pending, and tail will not have been consumed. 3371 */ 3372 char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00}; 3373 char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 }; 3374 char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 }; 3375 /* 3376 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0) 3377 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0) 3378 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3) 3379 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar") 3380 */ 3381 UChar tgt[10]; 3382 UChar* target = tgt; 3383 UChar* targetLimit = target + 1; /* expect overflow from converting */ 3384 const char* source = head; 3385 const char* sourceLimit = source + strlen(head); 3386 int32_t len = 0; 3387 cnv = ucnv_openPackage(loadTestData(&status), "test4", &status); 3388 if(U_FAILURE(status)){ 3389 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3390 return; 3391 } 3392 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3393 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3394 len = ucnv_toUCountPending(cnv,&status); 3395 if(U_FAILURE(status)){ 3396 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3397 } 3398 if(len != 4){ 3399 log_err("Did not get the expected len for head.\n"); 3400 } 3401 source=mid; 3402 sourceLimit = source+strlen(mid); 3403 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3404 len = ucnv_toUCountPending(cnv,&status); 3405 if(U_FAILURE(status)){ 3406 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3407 } 3408 if(len != 8){ 3409 log_err("Did not get the expected len for mid.\n"); 3410 } 3411 3412 source=tail; 3413 sourceLimit = source+strlen(tail); 3414 targetLimit = target; 3415 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3416 if(status != U_BUFFER_OVERFLOW_ERROR){ 3417 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3418 } 3419 status = U_ZERO_ERROR; 3420 len = ucnv_toUCountPending(cnv,&status); 3421 /* mid[4] is pending, tail has not been consumed */ 3422 if(U_FAILURE(status)){ 3423 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status)); 3424 } 3425 if(len != 4){ 3426 log_err("Did not get the expected len for tail.\n"); 3427 } 3428 ucnv_close(cnv); 3429 } 3430 #endif 3431 } 3432 3433 static void TestOneDefaultNameChange(const char *name, const char *expected) { 3434 UErrorCode status = U_ZERO_ERROR; 3435 UConverter *cnv; 3436 ucnv_setDefaultName(name); 3437 if(strcmp(ucnv_getDefaultName(), expected)==0) 3438 log_verbose("setDefaultName of %s works.\n", name); 3439 else 3440 log_err("setDefaultName of %s failed\n", name); 3441 cnv=ucnv_open(NULL, &status); 3442 if (U_FAILURE(status) || cnv == NULL) { 3443 log_err("opening the default converter of %s failed\n", name); 3444 return; 3445 } 3446 if(strcmp(ucnv_getName(cnv, &status), expected)==0) 3447 log_verbose("ucnv_getName of %s works.\n", name); 3448 else 3449 log_err("ucnv_getName of %s failed\n", name); 3450 ucnv_close(cnv); 3451 } 3452 3453 static void TestDefaultName(void) { 3454 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/ 3455 static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; 3456 strcpy(defaultName, ucnv_getDefaultName()); 3457 3458 log_verbose("getDefaultName returned %s\n", defaultName); 3459 3460 /*change the default name by setting it */ 3461 TestOneDefaultNameChange("UTF-8", "UTF-8"); 3462 #if U_CHARSET_IS_UTF8 3463 TestOneDefaultNameChange("ISCII,version=1", "UTF-8"); 3464 TestOneDefaultNameChange("ISCII,version=2", "UTF-8"); 3465 TestOneDefaultNameChange("ISO-8859-1", "UTF-8"); 3466 #else 3467 # if !UCONFIG_NO_LEGACY_CONVERSION 3468 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1"); 3469 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2"); 3470 # endif 3471 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1"); 3472 #endif 3473 3474 /*set the default name back*/ 3475 ucnv_setDefaultName(defaultName); 3476 } 3477 3478 /* Test that ucnv_compareNames() matches names according to spec. ----------- */ 3479 3480 static U_INLINE int 3481 sign(int n) { 3482 if(n==0) { 3483 return 0; 3484 } else if(n<0) { 3485 return -1; 3486 } else /* n>0 */ { 3487 return 1; 3488 } 3489 } 3490 3491 static void 3492 compareNames(const char **names) { 3493 const char *relation, *name1, *name2; 3494 int rel, result; 3495 3496 relation=*names++; 3497 if(*relation=='=') { 3498 rel = 0; 3499 } else if(*relation=='<') { 3500 rel = -1; 3501 } else { 3502 rel = 1; 3503 } 3504 3505 name1=*names++; 3506 if(name1==NULL) { 3507 return; 3508 } 3509 while((name2=*names++)!=NULL) { 3510 result=ucnv_compareNames(name1, name2); 3511 if(sign(result)!=rel) { 3512 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel); 3513 } 3514 name1=name2; 3515 } 3516 } 3517 3518 static void 3519 TestCompareNames() { 3520 static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL }; 3521 static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL }; 3522 static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL }; 3523 static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL }; 3524 3525 compareNames(equalUTF8); 3526 compareNames(equalIBM); 3527 compareNames(lessMac); 3528 compareNames(lessUTF080); 3529 } 3530 3531 static void 3532 TestSubstString() { 3533 static const UChar surrogate[1]={ 0xd900 }; 3534 char buffer[16]; 3535 3536 static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3537 static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3538 UConverter *cnv; 3539 UErrorCode errorCode; 3540 int32_t length; 3541 int8_t len8; 3542 3543 /* UTF-16/32: test that the BOM is output before the sub character */ 3544 errorCode=U_ZERO_ERROR; 3545 cnv=ucnv_open("UTF-16", &errorCode); 3546 if(U_FAILURE(errorCode)) { 3547 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode)); 3548 return; 3549 } 3550 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3551 ucnv_close(cnv); 3552 if(U_FAILURE(errorCode) || 3553 length!=4 || 3554 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3555 ) { 3556 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n"); 3557 } 3558 3559 errorCode=U_ZERO_ERROR; 3560 cnv=ucnv_open("UTF-32", &errorCode); 3561 if(U_FAILURE(errorCode)) { 3562 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode)); 3563 return; 3564 } 3565 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3566 ucnv_close(cnv); 3567 if(U_FAILURE(errorCode) || 3568 length!=8 || 3569 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3570 ) { 3571 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n"); 3572 } 3573 3574 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */ 3575 errorCode=U_ZERO_ERROR; 3576 cnv=ucnv_open("ISO-8859-1", &errorCode); 3577 if(U_FAILURE(errorCode)) { 3578 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode)); 3579 return; 3580 } 3581 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode); 3582 if(U_FAILURE(errorCode)) { 3583 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode)); 3584 } else { 3585 len8 = sizeof(buffer); 3586 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3587 /* Stateless converter, we expect the string converted to charset bytes. */ 3588 if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) { 3589 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode)); 3590 } 3591 } 3592 ucnv_close(cnv); 3593 3594 #if !UCONFIG_NO_LEGACY_CONVERSION 3595 errorCode=U_ZERO_ERROR; 3596 cnv=ucnv_open("HZ", &errorCode); 3597 if(U_FAILURE(errorCode)) { 3598 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode)); 3599 return; 3600 } 3601 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode); 3602 if(U_FAILURE(errorCode)) { 3603 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode)); 3604 } else { 3605 len8 = sizeof(buffer); 3606 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3607 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */ 3608 if(U_FAILURE(errorCode) || len8!=0) { 3609 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode)); 3610 } 3611 } 3612 ucnv_close(cnv); 3613 #endif 3614 /* 3615 * Further testing of ucnv_setSubstString() is done via intltest convert. 3616 * We do not test edge cases of illegal arguments and similar because the 3617 * function implementation uses all of its parameters in calls to other 3618 * functions with UErrorCode parameters. 3619 */ 3620 } 3621 3622 static void 3623 InvalidArguments() { 3624 UConverter *cnv; 3625 UErrorCode errorCode; 3626 char charBuffer[2] = {1, 1}; 3627 char ucharAsCharBuffer[2] = {2, 2}; 3628 char *charsPtr = charBuffer; 3629 UChar *ucharsPtr = (UChar *)ucharAsCharBuffer; 3630 UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1); 3631 3632 errorCode=U_ZERO_ERROR; 3633 cnv=ucnv_open("UTF-8", &errorCode); 3634 if(U_FAILURE(errorCode)) { 3635 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode)); 3636 return; 3637 } 3638 3639 errorCode=U_ZERO_ERROR; 3640 /* This one should fail because an incomplete UChar is being passed in */ 3641 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode); 3642 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3643 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3644 } 3645 3646 errorCode=U_ZERO_ERROR; 3647 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3648 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode); 3649 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3650 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3651 } 3652 3653 errorCode=U_ZERO_ERROR; 3654 /* This one should fail because an incomplete UChar is being passed in */ 3655 ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3656 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3657 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3658 } 3659 3660 errorCode=U_ZERO_ERROR; 3661 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3662 ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3663 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3664 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3665 } 3666 3667 if (charBuffer[0] != 1 || charBuffer[1] != 1 3668 || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2) 3669 { 3670 log_err("Data was incorrectly written to buffers\n"); 3671 } 3672 3673 ucnv_close(cnv); 3674 } 3675 3676 static void TestGetName() { 3677 static const char *const names[] = { 3678 "Unicode", "UTF-16", 3679 "UnicodeBigUnmarked", "UTF-16BE", 3680 "UnicodeBig", "UTF-16BE,version=1", 3681 "UnicodeLittleUnmarked", "UTF-16LE", 3682 "UnicodeLittle", "UTF-16LE,version=1", 3683 "x-UTF-16LE-BOM", "UTF-16LE,version=1" 3684 }; 3685 int32_t i; 3686 for(i = 0; i < LENGTHOF(names); i += 2) { 3687 UErrorCode errorCode = U_ZERO_ERROR; 3688 UConverter *cnv = ucnv_open(names[i], &errorCode); 3689 if(U_SUCCESS(errorCode)) { 3690 const char *name = ucnv_getName(cnv, &errorCode); 3691 if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) { 3692 log_err("ucnv_getName(%s) = %s != %s -- %s\n", 3693 names[i], name, names[i+1], u_errorName(errorCode)); 3694 } 3695 ucnv_close(cnv); 3696 } 3697 } 3698 } 3699 3700 static void TestUTFBOM() { 3701 static const UChar a16[] = { 0x61 }; 3702 static const char *const names[] = { 3703 "UTF-16", 3704 "UTF-16,version=1", 3705 "UTF-16BE", 3706 "UnicodeBig", 3707 "UTF-16LE", 3708 "UnicodeLittle" 3709 }; 3710 static const uint8_t expected[][5] = { 3711 #if U_IS_BIG_ENDIAN 3712 { 4, 0xfe, 0xff, 0, 0x61 }, 3713 { 4, 0xfe, 0xff, 0, 0x61 }, 3714 #else 3715 { 4, 0xff, 0xfe, 0x61, 0 }, 3716 { 4, 0xff, 0xfe, 0x61, 0 }, 3717 #endif 3718 3719 { 2, 0, 0x61 }, 3720 { 4, 0xfe, 0xff, 0, 0x61 }, 3721 3722 { 2, 0x61, 0 }, 3723 { 4, 0xff, 0xfe, 0x61, 0 } 3724 }; 3725 3726 char bytes[10]; 3727 int32_t i; 3728 3729 for(i = 0; i < LENGTHOF(names); ++i) { 3730 UErrorCode errorCode = U_ZERO_ERROR; 3731 UConverter *cnv = ucnv_open(names[i], &errorCode); 3732 int32_t length = 0; 3733 const uint8_t *exp = expected[i]; 3734 if (U_FAILURE(errorCode)) { 3735 log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode)); 3736 continue; 3737 } 3738 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode); 3739 3740 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) { 3741 log_err("unexpected %s BOM writing behavior -- %s\n", 3742 names[i], u_errorName(errorCode)); 3743 } 3744 ucnv_close(cnv); 3745 } 3746 } 3747