1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2009, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /***************************************************************************** 7 * 8 * File CU_CAPITST.C 9 * 10 * Modification History: 11 * Name Description 12 * Madhu Katragadda Ported for C API 13 ****************************************************************************** 14 */ 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <ctype.h> 19 #include "unicode/uloc.h" 20 #include "unicode/ucnv.h" 21 #include "unicode/ucnv_err.h" 22 #include "unicode/putil.h" 23 #include "unicode/uset.h" 24 #include "unicode/ustring.h" 25 #include "ucnv_bld.h" /* for sizeof(UConverter) */ 26 #include "cmemory.h" /* for UAlignedMemory */ 27 #include "cintltst.h" 28 #include "ccapitst.h" 29 30 /* for not including "cstring.h" -begin*/ 31 #ifdef U_WINDOWS 32 # define ctest_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE _stricmp(str1, str2) 33 #elif defined(POSIX) 34 # define ctest_stricmp(str1, str2) U_STANDARD_CPP_NAMESPACE strcasecmp(str1, str2) 35 #else 36 # define ctest_stricmp(str1, str2) T_CString_stricmp(str1, str2) 37 #endif 38 39 static int U_EXPORT2 40 T_CString_stricmp(const char *str1, const char *str2) { 41 if(str1==NULL) { 42 if(str2==NULL) { 43 return 0; 44 } else { 45 return -1; 46 } 47 } else if(str2==NULL) { 48 return 1; 49 } else { 50 /* compare non-NULL strings lexically with lowercase */ 51 int rc; 52 unsigned char c1, c2; 53 for(;;) { 54 c1=(unsigned char)*str1; 55 c2=(unsigned char)*str2; 56 if(c1==0) { 57 if(c2==0) { 58 return 0; 59 } else { 60 return -1; 61 } 62 } else if(c2==0) { 63 return 1; 64 } else { 65 /* compare non-zero characters with lowercase */ 66 rc=(int)(unsigned char)tolower(c1)-(int)(unsigned char)tolower(c2); 67 if(rc!=0) { 68 return rc; 69 } 70 } 71 ++str1; 72 ++str2; 73 } 74 } 75 } 76 /* for not including "cstring.h" -end*/ 77 78 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 79 80 #define NUM_CODEPAGE 1 81 #define MAX_FILE_LEN 1024*20 82 #define UCS_FILE_NAME_SIZE 512 83 84 /*returns an action other than the one provided*/ 85 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA); 86 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA); 87 88 static UConverter * 89 cnv_open(const char *name, UErrorCode *pErrorCode) { 90 if(name!=NULL && name[0]=='*') { 91 return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode); 92 } else { 93 return ucnv_open(name, pErrorCode); 94 } 95 } 96 97 98 static void ListNames(void); 99 static void TestFlushCache(void); 100 static void TestDuplicateAlias(void); 101 static void TestCCSID(void); 102 static void TestJ932(void); 103 static void TestJ1968(void); 104 static void TestLMBCSMaxChar(void); 105 106 #if !UCONFIG_NO_LEGACY_CONVERSION 107 static void TestConvertSafeCloneCallback(void); 108 #endif 109 110 static void TestEBCDICSwapLFNL(void); 111 static void TestConvertEx(void); 112 static void TestConvertExFromUTF8(void); 113 static void TestConvertAlgorithmic(void); 114 void TestDefaultConverterError(void); /* defined in cctest.c */ 115 static void TestToUCountPending(void); 116 static void TestFromUCountPending(void); 117 static void TestDefaultName(void); 118 static void TestCompareNames(void); 119 static void TestSubstString(void); 120 static void InvalidArguments(void); 121 static void TestGetName(void); 122 static void TestUTFBOM(void); 123 124 void addTestConvert(TestNode** root); 125 126 void addTestConvert(TestNode** root) 127 { 128 addTest(root, &ListNames, "tsconv/ccapitst/ListNames"); 129 addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert"); 130 addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache"); 131 addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias"); 132 addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias"); 133 addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone"); 134 #if !UCONFIG_NO_LEGACY_CONVERSION 135 addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback"); 136 #endif 137 addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID"); 138 addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932"); 139 addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968"); 140 addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar"); 141 addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL"); 142 addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx"); 143 addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8"); 144 addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic"); 145 addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError"); 146 addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending"); 147 addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending"); 148 addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName"); 149 addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames"); 150 addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString"); 151 addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments"); 152 addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName"); 153 addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM"); 154 } 155 156 static void ListNames(void) { 157 UErrorCode err = U_ZERO_ERROR; 158 int32_t testLong1 = 0; 159 const char* available_conv; 160 UEnumeration *allNamesEnum = NULL; 161 int32_t allNamesCount = 0; 162 uint16_t count; 163 164 log_verbose("Testing ucnv_openAllNames()..."); 165 allNamesEnum = ucnv_openAllNames(&err); 166 if(U_FAILURE(err)) { 167 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); 168 } 169 else { 170 const char *string = NULL; 171 int32_t len = 0; 172 int32_t count1 = 0; 173 int32_t count2 = 0; 174 allNamesCount = uenum_count(allNamesEnum, &err); 175 while ((string = uenum_next(allNamesEnum, &len, &err))) { 176 count1++; 177 log_verbose("read \"%s\", length %i\n", string, len); 178 } 179 if (U_FAILURE(err)) { 180 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err)); 181 err = U_ZERO_ERROR; 182 } 183 uenum_reset(allNamesEnum, &err); 184 while ((string = uenum_next(allNamesEnum, &len, &err))) { 185 count2++; 186 ucnv_close(ucnv_open(string, &err)); 187 log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable"); 188 err = U_ZERO_ERROR; 189 } 190 if (count1 != count2) { 191 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n"); 192 } 193 } 194 uenum_close(allNamesEnum); 195 err = U_ZERO_ERROR; 196 197 /*Tests ucnv_getAvailableName(), getAvialableCount()*/ 198 199 log_verbose("Testing ucnv_countAvailable()..."); 200 201 testLong1=ucnv_countAvailable(); 202 log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount); 203 204 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */ 205 206 available_conv = ucnv_getAvailableName(testLong1); 207 /*test ucnv_getAvailableName with err condition*/ 208 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 "); 209 available_conv = ucnv_getAvailableName(-1); 210 if(available_conv != NULL){ 211 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n"); 212 } 213 214 /* Test ucnv_countAliases() etc. */ 215 count = ucnv_countAliases("utf-8", &err); 216 if(U_FAILURE(err)) { 217 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); 218 } else if(count <= 0) { 219 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count); 220 } else { 221 /* try to get the aliases individually */ 222 const char *alias; 223 alias = ucnv_getAlias("utf-8", 0, &err); 224 if(U_FAILURE(err)) { 225 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err)); 226 } else if(strcmp("UTF-8", alias) != 0) { 227 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias); 228 } else { 229 uint16_t aliasNum; 230 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 231 alias = ucnv_getAlias("utf-8", aliasNum, &err); 232 if(U_FAILURE(err)) { 233 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 234 } else if(strlen(alias) > 20) { 235 /* sanity check */ 236 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias); 237 } else { 238 log_verbose("alias %d for utf-8: %s\n", aliasNum, alias); 239 } 240 } 241 if(U_SUCCESS(err)) { 242 /* try to fill an array with all aliases */ 243 const char **aliases; 244 aliases=(const char **)malloc(count * sizeof(const char *)); 245 if(aliases != 0) { 246 ucnv_getAliases("utf-8", aliases, &err); 247 if(U_FAILURE(err)) { 248 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err)); 249 } else { 250 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 251 /* compare the pointers with the ones returned individually */ 252 alias = ucnv_getAlias("utf-8", aliasNum, &err); 253 if(U_FAILURE(err)) { 254 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 255 } else if(aliases[aliasNum] != alias) { 256 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum); 257 } 258 } 259 } 260 free((char **)aliases); 261 } 262 } 263 } 264 } 265 } 266 267 268 static void TestConvert() 269 { 270 #if !UCONFIG_NO_LEGACY_CONVERSION 271 char myptr[4]; 272 char save[4]; 273 int32_t testLong1 = 0; 274 uint16_t rest = 0; 275 int32_t len = 0; 276 int32_t x = 0; 277 FILE* ucs_file_in = NULL; 278 UChar BOM = 0x0000; 279 UChar myUChar = 0x0000; 280 char* mytarget; /* [MAX_FILE_LEN] */ 281 char* mytarget_1; 282 char* mytarget_use; 283 UChar* consumedUni = NULL; 284 char* consumed = NULL; 285 char* output_cp_buffer; /* [MAX_FILE_LEN] */ 286 UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */ 287 UChar* ucs_file_buffer_use; 288 UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */ 289 UChar* my_ucs_file_buffer_1; 290 int8_t ii = 0; 291 int32_t j = 0; 292 uint16_t codepage_index = 0; 293 int32_t cp = 0; 294 UErrorCode err = U_ZERO_ERROR; 295 char ucs_file_name[UCS_FILE_NAME_SIZE]; 296 UConverterFromUCallback MIA1, MIA1_2; 297 UConverterToUCallback MIA2, MIA2_2; 298 const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2; 299 UConverter* someConverters[5]; 300 UConverter* myConverter = 0; 301 UChar* displayname = 0; 302 303 const char* locale; 304 305 UChar* uchar1 = 0; 306 UChar* uchar2 = 0; 307 UChar* uchar3 = 0; 308 int32_t targetcapacity2; 309 int32_t targetcapacity; 310 int32_t targetsize; 311 int32_t disnamelen; 312 313 const UChar* tmp_ucs_buf; 314 const UChar* tmp_consumedUni=NULL; 315 const char* tmp_mytarget_use; 316 const char* tmp_consumed; 317 318 /****************************************************************** 319 Checking Unicode -> ksc 320 ******************************************************************/ 321 322 const char* CodePagesToTest[NUM_CODEPAGE] = 323 { 324 "ibm-949_P110-1999" 325 326 327 }; 328 const uint16_t CodePageNumberToTest[NUM_CODEPAGE] = 329 { 330 949 331 }; 332 333 334 const int8_t CodePagesMinChars[NUM_CODEPAGE] = 335 { 336 1 337 338 }; 339 340 const int8_t CodePagesMaxChars[NUM_CODEPAGE] = 341 { 342 2 343 344 }; 345 346 const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] = 347 { 348 0xAFFE 349 }; 350 351 const char* CodePagesTestFiles[NUM_CODEPAGE] = 352 { 353 "uni-text.bin" 354 }; 355 356 357 const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] = 358 { 359 UCNV_IBM 360 361 }; 362 363 const char* CodePagesLocale[NUM_CODEPAGE] = 364 { 365 "ko_KR" 366 }; 367 368 UConverterFromUCallback oldFromUAction = NULL; 369 UConverterToUCallback oldToUAction = NULL; 370 const void* oldFromUContext = NULL; 371 const void* oldToUContext = NULL; 372 373 /* Allocate memory */ 374 mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0])); 375 output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0])); 376 ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0])); 377 my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0])); 378 379 ucs_file_buffer_use = ucs_file_buffer; 380 mytarget_1=mytarget; 381 mytarget_use = mytarget; 382 my_ucs_file_buffer_1=my_ucs_file_buffer; 383 384 /* flush the converter cache to get a consistent state before the flushing is tested */ 385 ucnv_flushCache(); 386 387 /*Testing ucnv_openU()*/ 388 { 389 UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/ 390 UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */ 391 UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */ 392 const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"}; 393 UChar illegalName[100]; 394 UConverter *converter=NULL; 395 err=U_ZERO_ERROR; 396 converter=ucnv_openU(converterName, &err); 397 if(U_FAILURE(err)){ 398 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err)); 399 } 400 ucnv_close(converter); 401 err=U_ZERO_ERROR; 402 converter=ucnv_openU(NULL, &err); 403 if(U_FAILURE(err)){ 404 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err)); 405 } 406 ucnv_close(converter); 407 /*testing with error value*/ 408 err=U_ILLEGAL_ARGUMENT_ERROR; 409 converter=ucnv_openU(converterName, &err); 410 if(!(converter == NULL)){ 411 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n"); 412 } 413 ucnv_close(converter); 414 err=U_ZERO_ERROR; 415 u_uastrcpy(illegalName, ""); 416 u_uastrcpy(illegalName, illegalNameChars); 417 ucnv_openU(illegalName, &err); 418 if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){ 419 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n"); 420 } 421 422 err=U_ZERO_ERROR; 423 ucnv_openU(firstSortedName, &err); 424 if(err!=U_FILE_ACCESS_ERROR){ 425 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n"); 426 } 427 428 err=U_ZERO_ERROR; 429 ucnv_openU(lastSortedName, &err); 430 if(err!=U_FILE_ACCESS_ERROR){ 431 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n"); 432 } 433 434 err=U_ZERO_ERROR; 435 } 436 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n"); 437 { 438 UConverter *cnv=NULL; 439 err=U_ZERO_ERROR; 440 cnv=ucnv_open("ibm-949,Madhu", &err); 441 if(U_FAILURE(err)){ 442 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err)); 443 } 444 ucnv_close(cnv); 445 446 } 447 /*Testing ucnv_convert()*/ 448 { 449 int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0; 450 const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00}; 451 const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00}; 452 char *target=0; 453 sourceLimit=sizeof(source)/sizeof(source[0]); 454 err=U_ZERO_ERROR; 455 targetLimit=0; 456 457 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err); 458 if(err == U_BUFFER_OVERFLOW_ERROR){ 459 err=U_ZERO_ERROR; 460 targetLimit=targetCapacity+1; 461 target=(char*)malloc(sizeof(char) * targetLimit); 462 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 463 } 464 if(U_FAILURE(err)){ 465 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err)); 466 } 467 else { 468 for(i=0; i<targetCapacity; i++){ 469 if(target[i] != expectedTarget[i]){ 470 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]); 471 } 472 } 473 474 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err); 475 if(U_FAILURE(err) || i!=7){ 476 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n", 477 u_errorName(err), i); 478 } 479 480 /*Test error conditions*/ 481 err=U_ZERO_ERROR; 482 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err); 483 if(i !=0){ 484 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n"); 485 } 486 487 err=U_ILLEGAL_ARGUMENT_ERROR; 488 sourceLimit=sizeof(source)/sizeof(source[0]); 489 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 490 if(i !=0 ){ 491 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n"); 492 } 493 494 err=U_ZERO_ERROR; 495 sourceLimit=sizeof(source)/sizeof(source[0]); 496 targetLimit=0; 497 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 498 if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){ 499 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n"); 500 } 501 err=U_ZERO_ERROR; 502 free(target); 503 } 504 } 505 506 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/ 507 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n"); 508 err=U_ILLEGAL_ARGUMENT_ERROR; 509 if(ucnv_open(NULL, &err) != NULL){ 510 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 511 } 512 if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){ 513 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 514 } 515 err=U_ZERO_ERROR; 516 517 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */ 518 log_verbose("\n---Testing ucnv_open default...\n"); 519 someConverters[0] = ucnv_open(NULL,&err); 520 someConverters[1] = ucnv_open(NULL,&err); 521 someConverters[2] = ucnv_open("utf8", &err); 522 someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err); 523 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */ 524 if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));} 525 526 /* Testing ucnv_getName()*/ 527 /*default code page */ 528 ucnv_getName(someConverters[0], &err); 529 if(U_FAILURE(err)) { 530 log_data_err("getName[0] failed\n"); 531 } else { 532 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err)); 533 } 534 ucnv_getName(someConverters[1], &err); 535 if(U_FAILURE(err)) { 536 log_data_err("getName[1] failed\n"); 537 } else { 538 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err)); 539 } 540 541 ucnv_close(someConverters[0]); 542 ucnv_close(someConverters[1]); 543 ucnv_close(someConverters[2]); 544 ucnv_close(someConverters[3]); 545 546 547 for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index) 548 { 549 int32_t i = 0; 550 551 err = U_ZERO_ERROR; 552 #ifdef U_TOPSRCDIR 553 strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING); 554 #else 555 strcpy(ucs_file_name, loadTestData(&err)); 556 557 if(U_FAILURE(err)){ 558 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err)); 559 return; 560 } 561 562 { 563 char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR); 564 565 if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){ 566 *(index+1)=0; 567 } 568 } 569 570 strcat(ucs_file_name,".."U_FILE_SEP_STRING); 571 #endif 572 strcat(ucs_file_name, CodePagesTestFiles[codepage_index]); 573 574 ucs_file_in = fopen(ucs_file_name,"rb"); 575 if (!ucs_file_in) 576 { 577 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); 578 return; 579 } 580 581 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/ 582 583 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */ 584 /* ucnv_flushCache(); */ 585 myConverter =ucnv_open( "ibm-949", &err); 586 if (!myConverter || U_FAILURE(err)) 587 { 588 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err)); 589 590 return; 591 } 592 593 /*testing for ucnv_getName() */ 594 log_verbose("Testing ucnv_getName()...\n"); 595 ucnv_getName(myConverter, &err); 596 if(U_FAILURE(err)) 597 log_err("Error in getName\n"); 598 else 599 { 600 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err)); 601 } 602 if (ctest_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) 603 log_err("getName failed\n"); 604 else 605 log_verbose("getName ok\n"); 606 /*Test getName with error condition*/ 607 { 608 const char* name=0; 609 err=U_ILLEGAL_ARGUMENT_ERROR; 610 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR"); 611 name=ucnv_getName(myConverter, &err); 612 if(name != NULL){ 613 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail"); 614 } 615 err=U_ZERO_ERROR; 616 } 617 618 619 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/ 620 621 log_verbose("Testing ucnv_getMaxCharSize()...\n"); 622 if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index]) 623 log_verbose("Max byte per character OK\n"); 624 else 625 log_err("Max byte per character failed\n"); 626 627 log_verbose("\n---Testing ucnv_getMinCharSize()...\n"); 628 if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index]) 629 log_verbose("Min byte per character OK\n"); 630 else 631 log_err("Min byte per character failed\n"); 632 633 634 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/ 635 log_verbose("\n---Testing ucnv_getSubstChars...\n"); 636 ii=4; 637 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 638 if (ii <= 0) { 639 log_err("ucnv_getSubstChars returned a negative number %d\n", ii); 640 } 641 642 for(x=0;x<ii;x++) 643 rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]); 644 if (rest==CodePagesSubstitutionChars[codepage_index]) 645 log_verbose("Substitution character ok\n"); 646 else 647 log_err("Substitution character failed.\n"); 648 649 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n"); 650 ucnv_setSubstChars(myConverter, myptr, ii, &err); 651 if (U_FAILURE(err)) 652 { 653 log_err("FAILURE! %s\n", myErrorName(err)); 654 } 655 ucnv_getSubstChars(myConverter,save, &ii, &err); 656 if (U_FAILURE(err)) 657 { 658 log_err("FAILURE! %s\n", myErrorName(err)); 659 } 660 661 if (strncmp(save, myptr, ii)) 662 log_err("Saved substitution character failed\n"); 663 else 664 log_verbose("Saved substitution character ok\n"); 665 666 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/ 667 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n"); 668 ii=1; 669 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 670 if(err != U_INDEX_OUTOFBOUNDS_ERROR){ 671 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err)); 672 } 673 err=U_ZERO_ERROR; 674 ii=4; 675 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 676 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n"); 677 ucnv_setSubstChars(myConverter, myptr, 0, &err); 678 if(err != U_ILLEGAL_ARGUMENT_ERROR){ 679 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err)); 680 } 681 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n"); 682 strcpy(myptr, "abc"); 683 ucnv_setSubstChars(myConverter, myptr, ii, &err); 684 err=U_ZERO_ERROR; 685 ucnv_getSubstChars(myConverter, save, &ii, &err); 686 if(strncmp(save, myptr, ii) == 0){ 687 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n"); 688 } 689 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n"); 690 err=U_ZERO_ERROR; 691 strcpy(myptr, "abc"); 692 ucnv_setSubstChars(myConverter, myptr, ii, &err); 693 err=U_ILLEGAL_ARGUMENT_ERROR; 694 ucnv_getSubstChars(myConverter, save, &ii, &err); 695 if(strncmp(save, myptr, ii) == 0){ 696 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n"); 697 } 698 err=U_ZERO_ERROR; 699 /*------*/ 700 701 #ifdef U_ENABLE_GENERIC_ISO_2022 702 /*resetState ucnv_reset()*/ 703 log_verbose("\n---Testing ucnv_reset()..\n"); 704 ucnv_reset(myConverter); 705 { 706 UChar32 c; 707 const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80}; 708 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 709 UConverter *cnv=ucnv_open("ISO_2022", &err); 710 if(U_FAILURE(err)) { 711 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 712 } 713 c=ucnv_getNextUChar(cnv, &source, limit, &err); 714 if((U_FAILURE(err) || c != (UChar32)0x0031)) { 715 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err)); 716 } 717 ucnv_reset(cnv); 718 ucnv_close(cnv); 719 720 } 721 #endif 722 723 /*getDisplayName*/ 724 log_verbose("\n---Testing ucnv_getDisplayName()...\n"); 725 locale=CodePagesLocale[codepage_index]; 726 len=0; 727 displayname=NULL; 728 disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err); 729 if(err==U_BUFFER_OVERFLOW_ERROR) { 730 err=U_ZERO_ERROR; 731 displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar)); 732 ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err); 733 if(U_FAILURE(err)) { 734 log_err("getDisplayName failed. The error is %s\n", myErrorName(err)); 735 } 736 else { 737 log_verbose(" getDisplayName o.k.\n"); 738 } 739 free(displayname); 740 displayname=NULL; 741 } 742 else { 743 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err)); 744 } 745 /*test ucnv_getDiaplayName with error condition*/ 746 err= U_ILLEGAL_ARGUMENT_ERROR; 747 len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err); 748 if( len !=0 ){ 749 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n"); 750 } 751 /*test ucnv_getDiaplayName with error condition*/ 752 err=U_ZERO_ERROR; 753 len=ucnv_getDisplayName(NULL,locale,NULL,0, &err); 754 if( len !=0 || U_SUCCESS(err)){ 755 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n"); 756 } 757 err=U_ZERO_ERROR; 758 759 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/ 760 ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context); 761 762 log_verbose("\n---Testing ucnv_setFromUCallBack...\n"); 763 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 764 if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context) 765 { 766 log_err("FAILURE! %s\n", myErrorName(err)); 767 } 768 769 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 770 if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM) 771 log_err("get From UCallBack failed\n"); 772 else 773 log_verbose("get From UCallBack ok\n"); 774 775 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n"); 776 ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err); 777 if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM) 778 { 779 log_err("FAILURE! %s\n", myErrorName(err)); 780 } 781 782 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 783 if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context) 784 log_err("get From UCallBack action failed\n"); 785 else 786 log_verbose("get From UCallBack action ok\n"); 787 788 /*testing ucnv_setToUCallBack with error conditions*/ 789 err=U_ILLEGAL_ARGUMENT_ERROR; 790 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n"); 791 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 792 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 793 if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){ 794 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 795 } 796 err=U_ZERO_ERROR; 797 798 799 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/ 800 ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context); 801 802 log_verbose("\n---Testing setTo UCallBack...\n"); 803 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err); 804 if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context) 805 { 806 log_err("FAILURE! %s\n", myErrorName(err)); 807 } 808 809 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 810 if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM) 811 log_err("To UCallBack failed\n"); 812 else 813 log_verbose("To UCallBack ok\n"); 814 815 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n"); 816 ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err); 817 if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM) 818 { log_err("FAILURE! %s\n", myErrorName(err)); } 819 820 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 821 if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context) 822 log_err("To UCallBack failed\n"); 823 else 824 log_verbose("To UCallBack ok\n"); 825 826 /*testing ucnv_setToUCallBack with error conditions*/ 827 err=U_ILLEGAL_ARGUMENT_ERROR; 828 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n"); 829 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err); 830 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 831 if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){ 832 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 833 } 834 err=U_ZERO_ERROR; 835 836 837 /*getcodepageid testing ucnv_getCCSID() */ 838 log_verbose("\n----Testing getCCSID....\n"); 839 cp = ucnv_getCCSID(myConverter,&err); 840 if (U_FAILURE(err)) 841 { 842 log_err("FAILURE!..... %s\n", myErrorName(err)); 843 } 844 if (cp != CodePageNumberToTest[codepage_index]) 845 log_err("Codepage number test failed\n"); 846 else 847 log_verbose("Codepage number test OK\n"); 848 849 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/ 850 err=U_ILLEGAL_ARGUMENT_ERROR; 851 if( ucnv_getCCSID(myConverter,&err) != -1){ 852 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n"); 853 } 854 err=U_ZERO_ERROR; 855 856 /*getCodepagePlatform testing ucnv_getPlatform()*/ 857 log_verbose("\n---Testing getCodepagePlatform ..\n"); 858 if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err)) 859 log_err("Platform codepage test failed\n"); 860 else 861 log_verbose("Platform codepage test ok\n"); 862 863 if (U_FAILURE(err)) 864 { 865 log_err("FAILURE! %s\n", myErrorName(err)); 866 } 867 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/ 868 err= U_ILLEGAL_ARGUMENT_ERROR; 869 if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){ 870 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n"); 871 } 872 err=U_ZERO_ERROR; 873 874 875 /*Reads the BOM*/ 876 fread(&BOM, sizeof(UChar), 1, ucs_file_in); 877 if (BOM!=0xFEFF && BOM!=0xFFFE) 878 { 879 log_err("File Missing BOM...Bailing!\n"); 880 return; 881 } 882 883 884 /*Reads in the file*/ 885 while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in))) 886 { 887 myUChar = ucs_file_buffer[i-1]; 888 889 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/ 890 } 891 892 myUChar = ucs_file_buffer[i-1]; 893 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/ 894 895 896 /*testing ucnv_fromUChars() and ucnv_toUChars() */ 897 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/ 898 899 uchar1=(UChar*)malloc(sizeof(UChar) * (i+1)); 900 u_uastrcpy(uchar1,""); 901 u_strncpy(uchar1,ucs_file_buffer,i); 902 uchar1[i] = 0; 903 904 uchar3=(UChar*)malloc(sizeof(UChar)*(i+1)); 905 u_uastrcpy(uchar3,""); 906 u_strncpy(uchar3,ucs_file_buffer,i); 907 uchar3[i] = 0; 908 909 /*Calls the Conversion Routine */ 910 testLong1 = MAX_FILE_LEN; 911 log_verbose("\n---Testing ucnv_fromUChars()\n"); 912 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 913 if (U_FAILURE(err)) 914 { 915 log_err("\nFAILURE...%s\n", myErrorName(err)); 916 } 917 else 918 log_verbose(" ucnv_fromUChars() o.k.\n"); 919 920 /*test the conversion routine */ 921 log_verbose("\n---Testing ucnv_toUChars()\n"); 922 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */ 923 targetcapacity2=0; 924 targetsize = ucnv_toUChars(myConverter, 925 NULL, 926 targetcapacity2, 927 output_cp_buffer, 928 strlen(output_cp_buffer), 929 &err); 930 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/ 931 932 if(err==U_BUFFER_OVERFLOW_ERROR) 933 { 934 err=U_ZERO_ERROR; 935 uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar)); 936 targetsize = ucnv_toUChars(myConverter, 937 uchar2, 938 targetsize+1, 939 output_cp_buffer, 940 strlen(output_cp_buffer), 941 &err); 942 943 if(U_FAILURE(err)) 944 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err)); 945 else 946 log_verbose(" ucnv_toUChars() o.k.\n"); 947 948 if(u_strcmp(uchar1,uchar2)!=0) 949 log_err("equality test failed with conversion routine\n"); 950 } 951 else 952 { 953 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n"); 954 } 955 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/ 956 err=U_ILLEGAL_ARGUMENT_ERROR; 957 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n"); 958 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 959 if (targetcapacity !=0) { 960 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 961 } 962 err=U_ZERO_ERROR; 963 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n"); 964 targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err); 965 if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) { 966 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n"); 967 } 968 err=U_ZERO_ERROR; 969 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n"); 970 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err); 971 if (targetcapacity !=0) { 972 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n"); 973 } 974 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n"); 975 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err); 976 if (err != U_BUFFER_OVERFLOW_ERROR) { 977 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n"); 978 } 979 /*toUChars with error conditions*/ 980 targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err); 981 if(targetsize != 0){ 982 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 983 } 984 err=U_ZERO_ERROR; 985 targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err); 986 if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){ 987 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n"); 988 } 989 err=U_ZERO_ERROR; 990 targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err); 991 if (targetsize !=0) { 992 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n"); 993 } 994 targetcapacity2=0; 995 targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err); 996 if (err != U_STRING_NOT_TERMINATED_WARNING) { 997 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n", 998 u_errorName(err)); 999 } 1000 err=U_ZERO_ERROR; 1001 /*-----*/ 1002 1003 1004 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */ 1005 /*Clean up re-usable vars*/ 1006 j=0; 1007 log_verbose("Testing ucnv_fromUnicode().....\n"); 1008 tmp_ucs_buf=ucs_file_buffer_use; 1009 ucnv_fromUnicode(myConverter, &mytarget_1, 1010 mytarget + MAX_FILE_LEN, 1011 &tmp_ucs_buf, 1012 ucs_file_buffer_use+i, 1013 NULL, 1014 TRUE, 1015 &err); 1016 consumedUni = (UChar*)tmp_consumedUni; 1017 1018 if (U_FAILURE(err)) 1019 { 1020 log_err("FAILURE! %s\n", myErrorName(err)); 1021 } 1022 else 1023 log_verbose("ucnv_fromUnicode() o.k.\n"); 1024 1025 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */ 1026 log_verbose("Testing ucnv_toUnicode().....\n"); 1027 tmp_mytarget_use=mytarget_use; 1028 tmp_consumed = consumed; 1029 ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1, 1030 my_ucs_file_buffer + MAX_FILE_LEN, 1031 &tmp_mytarget_use, 1032 mytarget_use + (mytarget_1 - mytarget), 1033 NULL, 1034 FALSE, 1035 &err); 1036 consumed = (char*)tmp_consumed; 1037 if (U_FAILURE(err)) 1038 { 1039 log_err("FAILURE! %s\n", myErrorName(err)); 1040 } 1041 else 1042 log_verbose("ucnv_toUnicode() o.k.\n"); 1043 1044 1045 log_verbose("\n---Testing RoundTrip ...\n"); 1046 1047 1048 u_strncpy(uchar3, my_ucs_file_buffer,i); 1049 uchar3[i] = 0; 1050 1051 if(u_strcmp(uchar1,uchar3)==0) 1052 log_verbose("Equality test o.k.\n"); 1053 else 1054 log_err("Equality test failed\n"); 1055 1056 /*sanity compare */ 1057 if(uchar2 == NULL) 1058 { 1059 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__); 1060 } 1061 else 1062 { 1063 if(u_strcmp(uchar2, uchar3)==0) 1064 log_verbose("Equality test o.k.\n"); 1065 else 1066 log_err("Equality test failed\n"); 1067 } 1068 1069 fclose(ucs_file_in); 1070 ucnv_close(myConverter); 1071 if (uchar1 != 0) free(uchar1); 1072 if (uchar2 != 0) free(uchar2); 1073 if (uchar3 != 0) free(uchar3); 1074 } 1075 1076 free((void*)mytarget); 1077 free((void*)output_cp_buffer); 1078 free((void*)ucs_file_buffer); 1079 free((void*)my_ucs_file_buffer); 1080 #endif 1081 } 1082 1083 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA) 1084 { 1085 return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP; 1086 } 1087 1088 1089 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA) 1090 { 1091 return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP; 1092 } 1093 1094 static void TestFlushCache(void) { 1095 #if !UCONFIG_NO_LEGACY_CONVERSION 1096 UErrorCode err = U_ZERO_ERROR; 1097 UConverter* someConverters[5]; 1098 int flushCount = 0; 1099 1100 /* flush the converter cache to get a consistent state before the flushing is tested */ 1101 ucnv_flushCache(); 1102 1103 /*Testing ucnv_open()*/ 1104 /* Note: These converters have been chosen because they do NOT 1105 encode the Latin characters (U+0041, ...), and therefore are 1106 highly unlikely to be chosen as system default codepages */ 1107 1108 someConverters[0] = ucnv_open("ibm-1047", &err); 1109 if (U_FAILURE(err)) { 1110 log_data_err("FAILURE! %s\n", myErrorName(err)); 1111 } 1112 1113 someConverters[1] = ucnv_open("ibm-1047", &err); 1114 if (U_FAILURE(err)) { 1115 log_data_err("FAILURE! %s\n", myErrorName(err)); 1116 } 1117 1118 someConverters[2] = ucnv_open("ibm-1047", &err); 1119 if (U_FAILURE(err)) { 1120 log_data_err("FAILURE! %s\n", myErrorName(err)); 1121 } 1122 1123 someConverters[3] = ucnv_open("gb18030", &err); 1124 if (U_FAILURE(err)) { 1125 log_data_err("FAILURE! %s\n", myErrorName(err)); 1126 } 1127 1128 someConverters[4] = ucnv_open("ibm-954", &err); 1129 if (U_FAILURE(err)) { 1130 log_data_err("FAILURE! %s\n", myErrorName(err)); 1131 } 1132 1133 1134 /* Testing ucnv_flushCache() */ 1135 log_verbose("\n---Testing ucnv_flushCache...\n"); 1136 if ((flushCount=ucnv_flushCache())==0) 1137 log_verbose("Flush cache ok\n"); 1138 else 1139 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1140 1141 /*testing ucnv_close() and ucnv_flushCache() */ 1142 ucnv_close(someConverters[0]); 1143 ucnv_close(someConverters[1]); 1144 1145 if ((flushCount=ucnv_flushCache())==0) 1146 log_verbose("Flush cache ok\n"); 1147 else 1148 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1149 1150 ucnv_close(someConverters[2]); 1151 ucnv_close(someConverters[3]); 1152 1153 if ((flushCount=ucnv_flushCache())==2) 1154 log_verbose("Flush cache ok\n"); /*because first, second and third are same */ 1155 else 1156 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n", 1157 __LINE__, 1158 flushCount); 1159 1160 ucnv_close(someConverters[4]); 1161 if ( (flushCount=ucnv_flushCache())==1) 1162 log_verbose("Flush cache ok\n"); 1163 else 1164 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount); 1165 #endif 1166 } 1167 1168 /** 1169 * Test the converter alias API, specifically the fuzzy matching of 1170 * alias names and the alias table integrity. Make sure each 1171 * converter has at least one alias (itself), and that its listed 1172 * aliases map back to itself. Check some hard-coded UTF-8 and 1173 * ISO_2022 aliases to make sure they work. 1174 */ 1175 static void TestAlias() { 1176 int32_t i, ncnv; 1177 UErrorCode status = U_ZERO_ERROR; 1178 1179 /* Predetermined aliases that we expect to map back to ISO_2022 1180 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */ 1181 const char* ISO_2022_NAMES[] = 1182 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2", 1183 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"}; 1184 int32_t ISO_2022_NAMES_LENGTH = 1185 sizeof(ISO_2022_NAMES) / sizeof(ISO_2022_NAMES[0]); 1186 const char *UTF8_NAMES[] = 1187 { "UTF-8", "utf-8", "utf8", "ibm-1208", 1188 "utf_8", "ibm1208", "cp1208" }; 1189 int32_t UTF8_NAMES_LENGTH = 1190 sizeof(UTF8_NAMES) / sizeof(UTF8_NAMES[0]); 1191 1192 struct { 1193 const char *name; 1194 const char *alias; 1195 } CONVERTERS_NAMES[] = { 1196 { "UTF-32BE", "UTF32_BigEndian" }, 1197 { "UTF-32LE", "UTF32_LittleEndian" }, 1198 { "UTF-32", "ISO-10646-UCS-4" }, 1199 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" }, 1200 { "UTF-32", "ucs-4" } 1201 }; 1202 int32_t CONVERTERS_NAMES_LENGTH = sizeof(CONVERTERS_NAMES) / sizeof(*CONVERTERS_NAMES); 1203 1204 /* When there are bugs in gencnval or in ucnv_io, converters can 1205 appear to have no aliases. */ 1206 ncnv = ucnv_countAvailable(); 1207 log_verbose("%d converters\n", ncnv); 1208 for (i=0; i<ncnv; ++i) { 1209 const char *name = ucnv_getAvailableName(i); 1210 const char *alias0; 1211 uint16_t na = ucnv_countAliases(name, &status); 1212 uint16_t j; 1213 UConverter *cnv; 1214 1215 if (na == 0) { 1216 log_err("FAIL: Converter \"%s\" (i=%d)" 1217 " has no aliases; expect at least one\n", 1218 name, i); 1219 continue; 1220 } 1221 cnv = ucnv_open(name, &status); 1222 if (U_FAILURE(status)) { 1223 log_data_err("FAIL: Converter \"%s\" (i=%d)" 1224 " can't be opened.\n", 1225 name, i); 1226 } 1227 else { 1228 if (strcmp(ucnv_getName(cnv, &status), name) != 0 1229 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) { 1230 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. " 1231 "The should be the same\n", 1232 name, ucnv_getName(cnv, &status)); 1233 } 1234 } 1235 ucnv_close(cnv); 1236 1237 status = U_ZERO_ERROR; 1238 alias0 = ucnv_getAlias(name, 0, &status); 1239 for (j=1; j<na; ++j) { 1240 const char *alias; 1241 /* Make sure each alias maps back to the the same list of 1242 aliases. Assume that if alias 0 is the same, the whole 1243 list is the same (this should always be true). */ 1244 const char *mapBack; 1245 1246 status = U_ZERO_ERROR; 1247 alias = ucnv_getAlias(name, j, &status); 1248 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1249 log_err("FAIL: Converter \"%s\"is ambiguous\n", name); 1250 } 1251 1252 if (alias == NULL) { 1253 log_err("FAIL: Converter \"%s\" -> " 1254 "alias[%d]=NULL\n", 1255 name, j); 1256 continue; 1257 } 1258 1259 mapBack = ucnv_getAlias(alias, 0, &status); 1260 1261 if (mapBack == NULL) { 1262 log_err("FAIL: Converter \"%s\" -> " 1263 "alias[%d]=\"%s\" -> " 1264 "alias[0]=NULL, exp. \"%s\"\n", 1265 name, j, alias, alias0); 1266 continue; 1267 } 1268 1269 if (0 != strcmp(alias0, mapBack)) { 1270 int32_t idx; 1271 UBool foundAlias = FALSE; 1272 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1273 /* Make sure that we only get this mismapping when there is 1274 an ambiguous alias, and the other converter has this alias too. */ 1275 for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) { 1276 if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) { 1277 foundAlias = TRUE; 1278 break; 1279 } 1280 } 1281 } 1282 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */ 1283 1284 if (!foundAlias) { 1285 log_err("FAIL: Converter \"%s\" -> " 1286 "alias[%d]=\"%s\" -> " 1287 "alias[0]=\"%s\", exp. \"%s\"\n", 1288 name, j, alias, mapBack, alias0); 1289 } 1290 } 1291 } 1292 } 1293 1294 1295 /* Check a list of predetermined aliases that we expect to map 1296 * back to ISO_2022 and UTF-8. */ 1297 for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) { 1298 const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status); 1299 if(!mapBack) { 1300 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]); 1301 continue; 1302 } 1303 if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) { 1304 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n", 1305 ISO_2022_NAMES[i], mapBack); 1306 } 1307 } 1308 1309 1310 for (i=1; i<UTF8_NAMES_LENGTH; ++i) { 1311 const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status); 1312 if(!mapBack) { 1313 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]); 1314 continue; 1315 } 1316 if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) { 1317 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n", 1318 UTF8_NAMES[i], mapBack); 1319 } 1320 } 1321 1322 /* 1323 * Check a list of predetermined aliases that we expect to map 1324 * back to predermined converter names. 1325 */ 1326 1327 for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) { 1328 const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status); 1329 if(!mapBack) { 1330 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name); 1331 continue; 1332 } 1333 if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) { 1334 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n", 1335 CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name); 1336 } 1337 } 1338 1339 } 1340 1341 static void TestDuplicateAlias(void) { 1342 const char *alias; 1343 UErrorCode status = U_ZERO_ERROR; 1344 1345 status = U_ZERO_ERROR; 1346 alias = ucnv_getStandardName("Shift_JIS", "IBM", &status); 1347 if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1348 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); 1349 } 1350 status = U_ZERO_ERROR; 1351 alias = ucnv_getStandardName("ibm-943", "IANA", &status); 1352 if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1353 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias); 1354 } 1355 status = U_ZERO_ERROR; 1356 alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status); 1357 if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) { 1358 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias); 1359 } 1360 } 1361 1362 1363 /* Test safe clone callback */ 1364 1365 static uint32_t TSCC_nextSerial() 1366 { 1367 static uint32_t n = 1; 1368 1369 return (n++); 1370 } 1371 1372 typedef struct 1373 { 1374 uint32_t magic; /* 0xC0FFEE to identify that the object is OK */ 1375 uint32_t serial; /* minted from nextSerial, above */ 1376 UBool wasClosed; /* close happened on the object */ 1377 } TSCCContext; 1378 1379 static TSCCContext *TSCC_clone(TSCCContext *ctx) 1380 { 1381 TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext)); 1382 1383 newCtx->serial = TSCC_nextSerial(); 1384 newCtx->wasClosed = 0; 1385 newCtx->magic = 0xC0FFEE; 1386 1387 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial); 1388 1389 return newCtx; 1390 } 1391 1392 static void TSCC_fromU(const void *context, 1393 UConverterFromUnicodeArgs *fromUArgs, 1394 const UChar* codeUnits, 1395 int32_t length, 1396 UChar32 codePoint, 1397 UConverterCallbackReason reason, 1398 UErrorCode * err) 1399 { 1400 TSCCContext *ctx = (TSCCContext*)context; 1401 UConverterFromUCallback junkFrom; 1402 1403 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter); 1404 1405 if(ctx->magic != 0xC0FFEE) { 1406 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1407 return; 1408 } 1409 1410 if(reason == UCNV_CLONE) { 1411 UErrorCode subErr = U_ZERO_ERROR; 1412 TSCCContext *newCtx; 1413 TSCCContext *junkCtx; 1414 TSCCContext **pjunkCtx = &junkCtx; 1415 1416 /* "recreate" it */ 1417 log_verbose("TSCC_fromU: cloning..\n"); 1418 newCtx = TSCC_clone(ctx); 1419 1420 if(newCtx == NULL) { 1421 log_err("TSCC_fromU: internal clone failed on %p\n", ctx); 1422 } 1423 1424 /* now, SET it */ 1425 ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1426 ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1427 1428 if(U_FAILURE(subErr)) { 1429 *err = subErr; 1430 } 1431 } 1432 1433 if(reason == UCNV_CLOSE) { 1434 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial); 1435 ctx->wasClosed = TRUE; 1436 } 1437 } 1438 1439 1440 static void TSCC_toU(const void *context, 1441 UConverterToUnicodeArgs *toUArgs, 1442 const char* codeUnits, 1443 int32_t length, 1444 UConverterCallbackReason reason, 1445 UErrorCode * err) 1446 { 1447 TSCCContext *ctx = (TSCCContext*)context; 1448 UConverterToUCallback junkFrom; 1449 1450 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter); 1451 1452 if(ctx->magic != 0xC0FFEE) { 1453 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1454 return; 1455 } 1456 1457 if(reason == UCNV_CLONE) { 1458 UErrorCode subErr = U_ZERO_ERROR; 1459 TSCCContext *newCtx; 1460 TSCCContext *junkCtx; 1461 TSCCContext **pjunkCtx = &junkCtx; 1462 1463 /* "recreate" it */ 1464 log_verbose("TSCC_toU: cloning..\n"); 1465 newCtx = TSCC_clone(ctx); 1466 1467 if(newCtx == NULL) { 1468 log_err("TSCC_toU: internal clone failed on %p\n", ctx); 1469 } 1470 1471 /* now, SET it */ 1472 ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1473 ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1474 1475 if(U_FAILURE(subErr)) { 1476 *err = subErr; 1477 } 1478 } 1479 1480 if(reason == UCNV_CLOSE) { 1481 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial); 1482 ctx->wasClosed = TRUE; 1483 } 1484 } 1485 1486 static void TSCC_init(TSCCContext *q) 1487 { 1488 q->magic = 0xC0FFEE; 1489 q->serial = TSCC_nextSerial(); 1490 q->wasClosed = 0; 1491 } 1492 1493 static void TSCC_print_log(TSCCContext *q, const char *name) 1494 { 1495 if(q==NULL) { 1496 log_verbose("TSCContext: %s is NULL!!\n", name); 1497 } else { 1498 if(q->magic != 0xC0FFEE) { 1499 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n", 1500 q,q->serial, q->magic); 1501 } 1502 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n", 1503 q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open"); 1504 } 1505 } 1506 1507 #if !UCONFIG_NO_LEGACY_CONVERSION 1508 static void TestConvertSafeCloneCallback() 1509 { 1510 UErrorCode err = U_ZERO_ERROR; 1511 TSCCContext from1, to1; 1512 TSCCContext *from2, *from3, *to2, *to3; 1513 TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3; 1514 char hunk[8192]; 1515 int32_t hunkSize = 8192; 1516 UConverterFromUCallback junkFrom; 1517 UConverterToUCallback junkTo; 1518 UConverter *conv1, *conv2 = NULL; 1519 1520 conv1 = ucnv_open("iso-8859-3", &err); 1521 1522 if(U_FAILURE(err)) { 1523 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err)); 1524 return; 1525 } 1526 1527 log_verbose("Opened conv1=%p\n", conv1); 1528 1529 TSCC_init(&from1); 1530 TSCC_init(&to1); 1531 1532 TSCC_print_log(&from1, "from1"); 1533 TSCC_print_log(&to1, "to1"); 1534 1535 ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err); 1536 log_verbose("Set from1 on conv1\n"); 1537 TSCC_print_log(&from1, "from1"); 1538 1539 ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err); 1540 log_verbose("Set to1 on conv1\n"); 1541 TSCC_print_log(&to1, "to1"); 1542 1543 conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err); 1544 if(U_FAILURE(err)) { 1545 log_err("safeClone failed: %s\n", u_errorName(err)); 1546 return; 1547 } 1548 log_verbose("Cloned to conv2=%p.\n", conv2); 1549 1550 /********** from *********************/ 1551 ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2); 1552 ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3); 1553 1554 TSCC_print_log(from2, "from2"); 1555 TSCC_print_log(from3, "from3(==from1)"); 1556 1557 if(from2 == NULL) { 1558 log_err("FAIL! from2 is null \n"); 1559 return; 1560 } 1561 1562 if(from3 == NULL) { 1563 log_err("FAIL! from3 is null \n"); 1564 return; 1565 } 1566 1567 if(from3 != (&from1) ) { 1568 log_err("FAIL! conv1's FROM context changed!\n"); 1569 } 1570 1571 if(from2 == (&from1) ) { 1572 log_err("FAIL! conv1's FROM context is the same as conv2's!\n"); 1573 } 1574 1575 if(from1.wasClosed) { 1576 log_err("FAIL! from1 is closed \n"); 1577 } 1578 1579 if(from2->wasClosed) { 1580 log_err("FAIL! from2 was closed\n"); 1581 } 1582 1583 /********** to *********************/ 1584 ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2); 1585 ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3); 1586 1587 TSCC_print_log(to2, "to2"); 1588 TSCC_print_log(to3, "to3(==to1)"); 1589 1590 if(to2 == NULL) { 1591 log_err("FAIL! to2 is null \n"); 1592 return; 1593 } 1594 1595 if(to3 == NULL) { 1596 log_err("FAIL! to3 is null \n"); 1597 return; 1598 } 1599 1600 if(to3 != (&to1) ) { 1601 log_err("FAIL! conv1's TO context changed!\n"); 1602 } 1603 1604 if(to2 == (&to1) ) { 1605 log_err("FAIL! conv1's TO context is the same as conv2's!\n"); 1606 } 1607 1608 if(to1.wasClosed) { 1609 log_err("FAIL! to1 is closed \n"); 1610 } 1611 1612 if(to2->wasClosed) { 1613 log_err("FAIL! to2 was closed\n"); 1614 } 1615 1616 /*************************************/ 1617 1618 ucnv_close(conv1); 1619 log_verbose("ucnv_closed (conv1)\n"); 1620 TSCC_print_log(&from1, "from1"); 1621 TSCC_print_log(from2, "from2"); 1622 TSCC_print_log(&to1, "to1"); 1623 TSCC_print_log(to2, "to2"); 1624 1625 if(from1.wasClosed == FALSE) { 1626 log_err("FAIL! from1 is NOT closed \n"); 1627 } 1628 1629 if(from2->wasClosed) { 1630 log_err("FAIL! from2 was closed\n"); 1631 } 1632 1633 if(to1.wasClosed == FALSE) { 1634 log_err("FAIL! to1 is NOT closed \n"); 1635 } 1636 1637 if(to2->wasClosed) { 1638 log_err("FAIL! to2 was closed\n"); 1639 } 1640 1641 ucnv_close(conv2); 1642 log_verbose("ucnv_closed (conv2)\n"); 1643 1644 TSCC_print_log(&from1, "from1"); 1645 TSCC_print_log(from2, "from2"); 1646 1647 if(from1.wasClosed == FALSE) { 1648 log_err("FAIL! from1 is NOT closed \n"); 1649 } 1650 1651 if(from2->wasClosed == FALSE) { 1652 log_err("FAIL! from2 was NOT closed\n"); 1653 } 1654 1655 TSCC_print_log(&to1, "to1"); 1656 TSCC_print_log(to2, "to2"); 1657 1658 if(to1.wasClosed == FALSE) { 1659 log_err("FAIL! to1 is NOT closed \n"); 1660 } 1661 1662 if(to2->wasClosed == FALSE) { 1663 log_err("FAIL! to2 was NOT closed\n"); 1664 } 1665 1666 if(to2 != (&to1)) { 1667 free(to2); /* to1 is stack based */ 1668 } 1669 if(from2 != (&from1)) { 1670 free(from2); /* from1 is stack based */ 1671 } 1672 } 1673 #endif 1674 1675 static UBool 1676 containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) { 1677 while(length>0) { 1678 if(*p!=b) { 1679 return TRUE; 1680 } 1681 ++p; 1682 --length; 1683 } 1684 return FALSE; 1685 } 1686 1687 static void TestConvertSafeClone() 1688 { 1689 /* one 'regular' & all the 'private stateful' converters */ 1690 static const char *const names[] = { 1691 #if !UCONFIG_NO_LEGACY_CONVERSION 1692 "ibm-1047", 1693 "ISO_2022,locale=zh,version=1", 1694 #endif 1695 "SCSU", 1696 #if !UCONFIG_NO_LEGACY_CONVERSION 1697 "HZ", 1698 "lmbcs", 1699 "ISCII,version=0", 1700 "ISO_2022,locale=kr,version=1", 1701 "ISO_2022,locale=jp,version=2", 1702 #endif 1703 "BOCU-1", 1704 "UTF-7", 1705 #if !UCONFIG_NO_LEGACY_CONVERSION 1706 "IMAP-mailbox-name", 1707 "ibm-1047-s390" 1708 #else 1709 "IMAP=mailbox-name" 1710 #endif 1711 }; 1712 1713 /* store the actual sizes of each converter */ 1714 int32_t actualSizes[LENGTHOF(names)]; 1715 1716 static const int32_t bufferSizes[] = { 1717 U_CNV_SAFECLONE_BUFFERSIZE, 1718 (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */ 1719 (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */ 1720 }; 1721 1722 char charBuffer[21]; /* Leave at an odd number for alignment testing */ 1723 uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE]; 1724 int32_t bufferSize, maxBufferSize; 1725 const char *maxName; 1726 UConverter * cnv, *cnv2; 1727 UErrorCode err; 1728 1729 char *pCharBuffer; 1730 const char *pConstCharBuffer; 1731 const char *charBufferLimit = charBuffer + sizeof(charBuffer)/sizeof(*charBuffer); 1732 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */ 1733 UChar uniCharBuffer[20]; 1734 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 }; 1735 const char *pCharSource = charSourceBuffer; 1736 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer); 1737 UChar *pUCharTarget = uniCharBuffer; 1738 UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer); 1739 const UChar * pUniBuffer; 1740 const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer); 1741 int32_t index, j; 1742 1743 err = U_ZERO_ERROR; 1744 cnv = ucnv_open(names[0], &err); 1745 if(U_SUCCESS(err)) { 1746 /* Check the various error & informational states: */ 1747 1748 /* Null status - just returns NULL */ 1749 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1750 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, 0)) 1751 { 1752 log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); 1753 } 1754 /* error status - should return 0 & keep error the same */ 1755 err = U_MEMORY_ALLOCATION_ERROR; 1756 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) 1757 { 1758 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); 1759 } 1760 err = U_ZERO_ERROR; 1761 1762 /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/ 1763 if (0 != ucnv_safeClone(cnv, buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) 1764 { 1765 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); 1766 } 1767 err = U_ZERO_ERROR; 1768 1769 /* buffer size pointer is 0 - fill in pbufferSize with a size */ 1770 bufferSize = 0; 1771 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) 1772 { 1773 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); 1774 } 1775 /* Verify our define is large enough */ 1776 if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize) 1777 { 1778 log_err("FAIL: Pre-calculated buffer size is too small\n"); 1779 } 1780 /* Verify we can use this run-time calculated size */ 1781 if (0 == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) 1782 { 1783 log_err("FAIL: Converter can't be cloned with run-time size\n"); 1784 } 1785 if (cnv2) { 1786 ucnv_close(cnv2); 1787 } 1788 1789 /* size one byte too small - should allocate & let us know */ 1790 --bufferSize; 1791 if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1792 { 1793 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); 1794 } 1795 if (cnv2) { 1796 ucnv_close(cnv2); 1797 } 1798 1799 err = U_ZERO_ERROR; 1800 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1801 1802 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ 1803 if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1804 { 1805 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); 1806 } 1807 if (cnv2) { 1808 ucnv_close(cnv2); 1809 } 1810 1811 err = U_ZERO_ERROR; 1812 1813 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ 1814 if (0 != ucnv_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) 1815 { 1816 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); 1817 } 1818 1819 ucnv_close(cnv); 1820 } 1821 1822 maxBufferSize = 0; 1823 maxName = ""; 1824 1825 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/ 1826 1827 for(j = 0; j < LENGTHOF(bufferSizes); ++j) { 1828 for (index = 0; index < LENGTHOF(names); index++) 1829 { 1830 err = U_ZERO_ERROR; 1831 cnv = ucnv_open(names[index], &err); 1832 if(U_FAILURE(err)) { 1833 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[index], u_errorName(err)); 1834 continue; 1835 } 1836 1837 if(j == 0) { 1838 /* preflight to get maxBufferSize */ 1839 actualSizes[index] = 0; 1840 ucnv_safeClone(cnv, NULL, &actualSizes[index], &err); 1841 if(actualSizes[index] > maxBufferSize) { 1842 maxBufferSize = actualSizes[index]; 1843 maxName = names[index]; 1844 } 1845 } 1846 1847 memset(buffer, 0xaa, sizeof(buffer)); 1848 1849 bufferSize = bufferSizes[j]; 1850 cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err); 1851 1852 /* close the original immediately to make sure that the clone works by itself */ 1853 ucnv_close(cnv); 1854 1855 if( actualSizes[index] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) && 1856 err == U_SAFECLONE_ALLOCATED_WARNING 1857 ) { 1858 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[index]); 1859 } 1860 1861 /* check if the clone function overwrote any bytes that it is not supposed to touch */ 1862 if(bufferSize <= bufferSizes[j]) { 1863 /* used the stack buffer */ 1864 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) || 1865 containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa) 1866 ) { 1867 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n", 1868 names[index], bufferSize, bufferSizes[j]); 1869 } 1870 } else { 1871 /* heap-allocated the clone */ 1872 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) { 1873 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n", 1874 names[index], bufferSize, bufferSizes[j]); 1875 } 1876 } 1877 1878 pCharBuffer = charBuffer; 1879 pUniBuffer = uniBuffer; 1880 1881 ucnv_fromUnicode(cnv2, 1882 &pCharBuffer, 1883 charBufferLimit, 1884 &pUniBuffer, 1885 uniBufferLimit, 1886 NULL, 1887 TRUE, 1888 &err); 1889 if(U_FAILURE(err)){ 1890 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err)); 1891 } 1892 ucnv_toUnicode(cnv2, 1893 &pUCharTarget, 1894 pUCharTargetLimit, 1895 &pCharSource, 1896 pCharSourceLimit, 1897 NULL, 1898 TRUE, 1899 &err 1900 ); 1901 1902 if(U_FAILURE(err)){ 1903 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err)); 1904 } 1905 1906 pConstCharBuffer = charBuffer; 1907 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err)) 1908 { 1909 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err)); 1910 } 1911 ucnv_close(cnv2); 1912 } 1913 } 1914 1915 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1916 sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1917 if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) { 1918 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1919 maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1920 } 1921 } 1922 1923 static void TestCCSID() { 1924 #if !UCONFIG_NO_LEGACY_CONVERSION 1925 UConverter *cnv; 1926 UErrorCode errorCode; 1927 int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 }; 1928 int32_t i, ccsid; 1929 1930 for(i=0; i<(int32_t)(sizeof(ccsids)/sizeof(int32_t)); ++i) { 1931 ccsid=ccsids[i]; 1932 1933 errorCode=U_ZERO_ERROR; 1934 cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode); 1935 if(U_FAILURE(errorCode)) { 1936 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode)); 1937 continue; 1938 } 1939 1940 if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) { 1941 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode)); 1942 } 1943 1944 /* skip gb18030(ccsid 1392) */ 1945 if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) { 1946 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode)); 1947 } 1948 1949 ucnv_close(cnv); 1950 } 1951 #endif 1952 } 1953 1954 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */ 1955 1956 /* CHUNK_SIZE defined in common\ucnv.c: */ 1957 #define CHUNK_SIZE 1024 1958 1959 static void bug1(void); 1960 static void bug2(void); 1961 static void bug3(void); 1962 1963 static void 1964 TestJ932(void) 1965 { 1966 bug1(); /* Unicode intermediate buffer straddle bug */ 1967 bug2(); /* pre-flighting size incorrect caused by simple overflow */ 1968 bug3(); /* pre-flighting size incorrect caused by expansion overflow */ 1969 } 1970 1971 /* 1972 * jitterbug 932: test chunking boundary conditions in 1973 1974 int32_t ucnv_convert(const char *toConverterName, 1975 const char *fromConverterName, 1976 char *target, 1977 int32_t targetSize, 1978 const char *source, 1979 int32_t sourceSize, 1980 UErrorCode * err) 1981 1982 * See discussions on the icu mailing list in 1983 * 2001-April with the subject "converter 'flush' question". 1984 * 1985 * Bug report and test code provided by Edward J. Batutis. 1986 */ 1987 static void bug1() 1988 { 1989 #if !UCONFIG_NO_LEGACY_CONVERSION 1990 char char_in[CHUNK_SIZE+32]; 1991 char char_out[CHUNK_SIZE*2]; 1992 1993 /* GB 18030 equivalent of U+10000 is 90308130 */ 1994 static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 }; 1995 1996 UErrorCode err = U_ZERO_ERROR; 1997 int32_t i, test_seq_len = sizeof(test_seq); 1998 1999 /* 2000 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward 2001 * until the straddle bug appears. I didn't want to hard-code everything so this test could 2002 * be expanded - however this is the only type of straddle bug I can think of at the moment - 2003 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no 2004 * other Unicode sequences cause a bug since combining sequences are not supported by the 2005 * converters. 2006 */ 2007 2008 for (i = test_seq_len; i >= 0; i--) { 2009 /* put character sequence into input buffer */ 2010 memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */ 2011 memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len); 2012 2013 /* do the conversion */ 2014 ucnv_convert("us-ascii", /* out */ 2015 "gb18030", /* in */ 2016 char_out, 2017 sizeof(char_out), 2018 char_in, 2019 sizeof(char_in), 2020 &err); 2021 2022 /* bug1: */ 2023 if (err == U_TRUNCATED_CHAR_FOUND) { 2024 /* this happens when surrogate pair straddles the intermediate buffer in 2025 * T_UConverter_fromCodepageToCodepage */ 2026 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n"); 2027 } 2028 } 2029 #endif 2030 } 2031 2032 /* bug2: pre-flighting loop bug: simple overflow causes bug */ 2033 static void bug2() 2034 { 2035 /* US-ASCII "1234567890" */ 2036 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 }; 2037 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 }; 2038 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30, 2039 0x00, 0x00, 0x00, 0x31, 2040 0x00, 0x00, 0x00, 0x32, 2041 0x00, 0x00, 0x00, 0x33, 2042 0x00, 0x00, 0x00, 0x34, 2043 0x00, 0x00, 0x00, 0x35, 2044 0x00, 0x00, 0x00, 0x36, 2045 0x00, 0x00, 0x00, 0x37, 2046 0x00, 0x00, 0x00, 0x38, 2047 0x00, 0x00, (char)0xf0, 0x00}; 2048 static char target[5]; 2049 2050 UErrorCode err = U_ZERO_ERROR; 2051 int32_t size; 2052 2053 /* do the conversion */ 2054 size = ucnv_convert("iso-8859-1", /* out */ 2055 "us-ascii", /* in */ 2056 target, 2057 sizeof(target), 2058 source, 2059 sizeof(source), 2060 &err); 2061 2062 if ( size != 10 ) { 2063 /* bug2: size is 5, should be 10 */ 2064 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size); 2065 } 2066 2067 err = U_ZERO_ERROR; 2068 /* do the conversion */ 2069 size = ucnv_convert("UTF-32BE", /* out */ 2070 "UTF-8", /* in */ 2071 target, 2072 sizeof(target), 2073 sourceUTF8, 2074 sizeof(sourceUTF8), 2075 &err); 2076 2077 if ( size != 32 ) { 2078 /* bug2: size is 5, should be 32 */ 2079 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size); 2080 } 2081 2082 err = U_ZERO_ERROR; 2083 /* do the conversion */ 2084 size = ucnv_convert("UTF-8", /* out */ 2085 "UTF-32BE", /* in */ 2086 target, 2087 sizeof(target), 2088 sourceUTF32, 2089 sizeof(sourceUTF32), 2090 &err); 2091 2092 if ( size != 12 ) { 2093 /* bug2: size is 5, should be 12 */ 2094 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size); 2095 } 2096 } 2097 2098 /* 2099 * bug3: when the characters expand going from source to target codepage 2100 * you get bug3 in addition to bug2 2101 */ 2102 static void bug3() 2103 { 2104 #if !UCONFIG_NO_LEGACY_CONVERSION 2105 char char_in[CHUNK_SIZE*4]; 2106 char target[5]; 2107 UErrorCode err = U_ZERO_ERROR; 2108 int32_t size; 2109 2110 /* 2111 * first get the buggy size from bug2 then 2112 * compare it to buggy size with an expansion 2113 */ 2114 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */ 2115 2116 /* do the conversion */ 2117 size = ucnv_convert("lmbcs", /* out */ 2118 "us-ascii", /* in */ 2119 target, 2120 sizeof(target), 2121 char_in, 2122 sizeof(char_in), 2123 &err); 2124 2125 if ( size != sizeof(char_in) ) { 2126 /* 2127 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer 2128 * in the converter?), should be CHUNK_SIZE*4 2129 * 2130 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize... 2131 */ 2132 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size); 2133 } 2134 2135 /* 2136 * now do the conversion with expansion 2137 * ascii 0x08 expands to 0x0F 0x28 in lmbcs 2138 */ 2139 memset(char_in, 8, sizeof(char_in)); 2140 err = U_ZERO_ERROR; 2141 2142 /* do the conversion */ 2143 size = ucnv_convert("lmbcs", /* out */ 2144 "us-ascii", /* in */ 2145 target, 2146 sizeof(target), 2147 char_in, 2148 sizeof(char_in), 2149 &err); 2150 2151 /* expect 2X expansion */ 2152 if ( size != sizeof(char_in) * 2 ) { 2153 /* 2154 * bug3: 2155 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05: 2156 */ 2157 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size); 2158 } 2159 #endif 2160 } 2161 2162 static void 2163 convertExStreaming(UConverter *srcCnv, UConverter *targetCnv, 2164 const char *src, int32_t srcLength, 2165 const char *expectTarget, int32_t expectTargetLength, 2166 int32_t chunkSize, 2167 const char *testName, 2168 UErrorCode expectCode) { 2169 UChar pivotBuffer[CHUNK_SIZE]; 2170 UChar *pivotSource, *pivotTarget; 2171 const UChar *pivotLimit; 2172 2173 char targetBuffer[CHUNK_SIZE]; 2174 char *target; 2175 const char *srcLimit, *finalSrcLimit, *targetLimit; 2176 2177 int32_t targetLength; 2178 2179 UBool flush; 2180 2181 UErrorCode errorCode; 2182 2183 /* setup */ 2184 if(chunkSize>CHUNK_SIZE) { 2185 chunkSize=CHUNK_SIZE; 2186 } 2187 2188 pivotSource=pivotTarget=pivotBuffer; 2189 pivotLimit=pivotBuffer+chunkSize; 2190 2191 finalSrcLimit=src+srcLength; 2192 target=targetBuffer; 2193 targetLimit=targetBuffer+chunkSize; 2194 2195 ucnv_resetToUnicode(srcCnv); 2196 ucnv_resetFromUnicode(targetCnv); 2197 2198 errorCode=U_ZERO_ERROR; 2199 flush=FALSE; 2200 2201 /* convert, streaming-style (both converters and pivot keep state) */ 2202 for(;;) { 2203 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */ 2204 if(src+chunkSize<=finalSrcLimit) { 2205 srcLimit=src+chunkSize; 2206 } else { 2207 srcLimit=finalSrcLimit; 2208 } 2209 ucnv_convertEx(targetCnv, srcCnv, 2210 &target, targetLimit, 2211 &src, srcLimit, 2212 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit, 2213 FALSE, flush, &errorCode); 2214 targetLength=(int32_t)(target-targetBuffer); 2215 if(target>targetLimit) { 2216 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n", 2217 testName, chunkSize, target, targetLimit); 2218 break; /* TODO: major problem! */ 2219 } 2220 if(errorCode==U_BUFFER_OVERFLOW_ERROR) { 2221 /* continue converting another chunk */ 2222 errorCode=U_ZERO_ERROR; 2223 if(targetLength+chunkSize<=sizeof(targetBuffer)) { 2224 targetLimit=target+chunkSize; 2225 } else { 2226 targetLimit=targetBuffer+sizeof(targetBuffer); 2227 } 2228 } else if(U_FAILURE(errorCode)) { 2229 /* failure */ 2230 break; 2231 } else if(flush) { 2232 /* all done */ 2233 break; 2234 } else if(src==finalSrcLimit && pivotSource==pivotTarget) { 2235 /* all consumed, now flush without input (separate from conversion for testing) */ 2236 flush=TRUE; 2237 } 2238 } 2239 2240 if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) { 2241 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n", 2242 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode)); 2243 } else if(targetLength!=expectTargetLength) { 2244 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n", 2245 testName, chunkSize, targetLength, expectTargetLength); 2246 } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) { 2247 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n", 2248 testName, chunkSize); 2249 } 2250 } 2251 2252 static void 2253 convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv, 2254 const char *src, int32_t srcLength, 2255 const char *expectTarget, int32_t expectTargetLength, 2256 const char *testName, 2257 UErrorCode expectCode) { 2258 convertExStreaming(srcCnv, targetCnv, 2259 src, srcLength, 2260 expectTarget, expectTargetLength, 2261 1, testName, expectCode); 2262 convertExStreaming(srcCnv, targetCnv, 2263 src, srcLength, 2264 expectTarget, expectTargetLength, 2265 3, testName, expectCode); 2266 convertExStreaming(srcCnv, targetCnv, 2267 src, srcLength, 2268 expectTarget, expectTargetLength, 2269 7, testName, expectCode); 2270 } 2271 2272 static void TestConvertEx() { 2273 #if !UCONFIG_NO_LEGACY_CONVERSION 2274 static const uint8_t 2275 utf8[]={ 2276 /* 4e00 30a1 ff61 0410 */ 2277 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2278 }, 2279 shiftJIS[]={ 2280 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2281 }, 2282 errorTarget[]={ 2283 /* 2284 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2285 * SUB, SUB, 0x40, SUB, SUB, 0x40 2286 */ 2287 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40 2288 }; 2289 2290 char srcBuffer[100], targetBuffer[100]; 2291 2292 const char *src; 2293 char *target; 2294 2295 UChar pivotBuffer[100]; 2296 UChar *pivotSource, *pivotTarget; 2297 2298 UConverter *cnv1, *cnv2; 2299 UErrorCode errorCode; 2300 2301 errorCode=U_ZERO_ERROR; 2302 cnv1=ucnv_open("UTF-8", &errorCode); 2303 if(U_FAILURE(errorCode)) { 2304 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode)); 2305 return; 2306 } 2307 2308 cnv2=ucnv_open("Shift-JIS", &errorCode); 2309 if(U_FAILURE(errorCode)) { 2310 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2311 ucnv_close(cnv1); 2312 return; 2313 } 2314 2315 /* test ucnv_convertEx() with streaming conversion style */ 2316 convertExMultiStreaming(cnv1, cnv2, 2317 (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS), 2318 "UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2319 2320 convertExMultiStreaming(cnv2, cnv1, 2321 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), 2322 "Shift-JIS -> UTF-8", U_ZERO_ERROR); 2323 2324 /* U_ZERO_ERROR because by default the SUB callbacks are set */ 2325 convertExMultiStreaming(cnv1, cnv2, 2326 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget), 2327 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2328 2329 /* test some simple conversions */ 2330 2331 /* NUL-terminated source and target */ 2332 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2333 memcpy(srcBuffer, utf8, sizeof(utf8)); 2334 srcBuffer[sizeof(utf8)]=0; 2335 src=srcBuffer; 2336 target=targetBuffer; 2337 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2338 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2339 if( errorCode!=U_ZERO_ERROR || 2340 target-targetBuffer!=sizeof(shiftJIS) || 2341 *target!=0 || 2342 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2343 ) { 2344 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n", 2345 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2346 } 2347 2348 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */ 2349 errorCode=U_AMBIGUOUS_ALIAS_WARNING; 2350 memset(targetBuffer, 0xff, sizeof(targetBuffer)); 2351 src=srcBuffer; 2352 target=targetBuffer; 2353 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL, 2354 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2355 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2356 target-targetBuffer!=sizeof(shiftJIS) || 2357 *target!=(char)0xff || 2358 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2359 ) { 2360 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n", 2361 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2362 } 2363 2364 /* bad arguments */ 2365 errorCode=U_MESSAGE_PARSE_ERROR; 2366 src=srcBuffer; 2367 target=targetBuffer; 2368 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2369 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2370 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2371 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2372 } 2373 2374 /* pivotLimit==pivotStart */ 2375 errorCode=U_ZERO_ERROR; 2376 pivotSource=pivotTarget=pivotBuffer; 2377 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2378 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, TRUE, TRUE, &errorCode); 2379 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2380 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode)); 2381 } 2382 2383 /* *pivotSource==NULL */ 2384 errorCode=U_ZERO_ERROR; 2385 pivotSource=NULL; 2386 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2387 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2388 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2389 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode)); 2390 } 2391 2392 /* *source==NULL */ 2393 errorCode=U_ZERO_ERROR; 2394 src=NULL; 2395 pivotSource=pivotBuffer; 2396 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2397 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2398 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2399 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode)); 2400 } 2401 2402 /* streaming conversion without a pivot buffer */ 2403 errorCode=U_ZERO_ERROR; 2404 src=srcBuffer; 2405 pivotSource=pivotBuffer; 2406 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2407 NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode); 2408 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2409 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode)); 2410 } 2411 2412 ucnv_close(cnv1); 2413 ucnv_close(cnv2); 2414 #endif 2415 } 2416 2417 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */ 2418 static const char *const badUTF8[]={ 2419 /* trail byte */ 2420 "\x80", 2421 2422 /* truncated multi-byte sequences */ 2423 "\xd0", 2424 "\xe0", 2425 "\xe1", 2426 "\xed", 2427 "\xee", 2428 "\xf0", 2429 "\xf1", 2430 "\xf4", 2431 "\xf8", 2432 "\xfc", 2433 2434 "\xe0\x80", 2435 "\xe0\xa0", 2436 "\xe1\x80", 2437 "\xed\x80", 2438 "\xed\xa0", 2439 "\xee\x80", 2440 "\xf0\x80", 2441 "\xf0\x90", 2442 "\xf1\x80", 2443 "\xf4\x80", 2444 "\xf4\x90", 2445 "\xf8\x80", 2446 "\xfc\x80", 2447 2448 "\xf0\x80\x80", 2449 "\xf0\x90\x80", 2450 "\xf1\x80\x80", 2451 "\xf4\x80\x80", 2452 "\xf4\x90\x80", 2453 "\xf8\x80\x80", 2454 "\xfc\x80\x80", 2455 2456 "\xf8\x80\x80\x80", 2457 "\xfc\x80\x80\x80", 2458 2459 "\xfc\x80\x80\x80\x80", 2460 2461 /* complete sequences but non-shortest forms or out of range etc. */ 2462 "\xc0\x80", 2463 "\xe0\x80\x80", 2464 "\xed\xa0\x80", 2465 "\xf0\x80\x80\x80", 2466 "\xf4\x90\x80\x80", 2467 "\xf8\x80\x80\x80\x80", 2468 "\xfc\x80\x80\x80\x80\x80", 2469 "\xfe", 2470 "\xff" 2471 }; 2472 2473 /* get some character that can be converted and convert it */ 2474 static UBool getTestChar(UConverter *cnv, const char *converterName, 2475 char charUTF8[4], int32_t *pCharUTF8Length, 2476 char char0[8], int32_t *pChar0Length, 2477 char char1[8], int32_t *pChar1Length) { 2478 UChar utf16[U16_MAX_LENGTH]; 2479 int32_t utf16Length; 2480 2481 const UChar *utf16Source; 2482 char *target; 2483 2484 USet *set; 2485 UChar32 c; 2486 UErrorCode errorCode; 2487 2488 errorCode=U_ZERO_ERROR; 2489 set=uset_open(1, 0); 2490 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2491 c=uset_charAt(set, uset_size(set)/2); 2492 uset_close(set); 2493 2494 utf16Length=0; 2495 U16_APPEND_UNSAFE(utf16, utf16Length, c); 2496 *pCharUTF8Length=0; 2497 U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c); 2498 2499 utf16Source=utf16; 2500 target=char0; 2501 ucnv_fromUnicode(cnv, 2502 &target, char0+sizeof(char0), 2503 &utf16Source, utf16+utf16Length, 2504 NULL, FALSE, &errorCode); 2505 *pChar0Length=(int32_t)(target-char0); 2506 2507 utf16Source=utf16; 2508 target=char1; 2509 ucnv_fromUnicode(cnv, 2510 &target, char1+sizeof(char1), 2511 &utf16Source, utf16+utf16Length, 2512 NULL, FALSE, &errorCode); 2513 *pChar1Length=(int32_t)(target-char1); 2514 2515 if(U_FAILURE(errorCode)) { 2516 log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode)); 2517 return FALSE; 2518 } 2519 return TRUE; 2520 } 2521 2522 static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2523 char charUTF8[4], int32_t charUTF8Length, 2524 char char0[8], int32_t char0Length, 2525 char char1[8], int32_t char1Length) { 2526 char utf8[16]; 2527 int32_t utf8Length; 2528 2529 char output[16]; 2530 int32_t outputLength; 2531 2532 char invalidChars[8]; 2533 int8_t invalidLength; 2534 2535 const char *source; 2536 char *target; 2537 2538 UChar pivotBuffer[8]; 2539 UChar *pivotSource, *pivotTarget; 2540 2541 UErrorCode errorCode; 2542 int32_t i; 2543 2544 /* test truncated sequences */ 2545 errorCode=U_ZERO_ERROR; 2546 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2547 2548 memcpy(utf8, charUTF8, charUTF8Length); 2549 2550 for(i=0; i<LENGTHOF(badUTF8); ++i) { 2551 /* truncated sequence? */ 2552 int32_t length=strlen(badUTF8[i]); 2553 if(length>=(1+U8_COUNT_TRAIL_BYTES(badUTF8[i][0]))) { 2554 continue; 2555 } 2556 2557 /* assemble a string with the test character and the truncated sequence */ 2558 memcpy(utf8+charUTF8Length, badUTF8[i], length); 2559 utf8Length=charUTF8Length+length; 2560 2561 /* convert and check the invalidChars */ 2562 source=utf8; 2563 target=output; 2564 pivotSource=pivotTarget=pivotBuffer; 2565 errorCode=U_ZERO_ERROR; 2566 ucnv_convertEx(cnv, utf8Cnv, 2567 &target, output+sizeof(output), 2568 &source, utf8+utf8Length, 2569 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer), 2570 TRUE, TRUE, /* reset & flush */ 2571 &errorCode); 2572 outputLength=(int32_t)(target-output); 2573 if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) { 2574 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i); 2575 continue; 2576 } 2577 2578 errorCode=U_ZERO_ERROR; 2579 invalidLength=(int8_t)sizeof(invalidChars); 2580 ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode); 2581 if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) { 2582 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i); 2583 } 2584 } 2585 } 2586 2587 static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2588 char charUTF8[4], int32_t charUTF8Length, 2589 char char0[8], int32_t char0Length, 2590 char char1[8], int32_t char1Length) { 2591 char utf8[600], expect[600]; 2592 int32_t utf8Length, expectLength; 2593 2594 char testName[32]; 2595 2596 UErrorCode errorCode; 2597 int32_t i; 2598 2599 errorCode=U_ZERO_ERROR; 2600 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode); 2601 2602 /* 2603 * assemble an input string with the test character between each 2604 * bad sequence, 2605 * and an expected string with repeated test character output 2606 */ 2607 memcpy(utf8, charUTF8, charUTF8Length); 2608 utf8Length=charUTF8Length; 2609 2610 memcpy(expect, char0, char0Length); 2611 expectLength=char0Length; 2612 2613 for(i=0; i<LENGTHOF(badUTF8); ++i) { 2614 int32_t length=strlen(badUTF8[i]); 2615 memcpy(utf8+utf8Length, badUTF8[i], length); 2616 utf8Length+=length; 2617 2618 memcpy(utf8+utf8Length, charUTF8, charUTF8Length); 2619 utf8Length+=charUTF8Length; 2620 2621 memcpy(expect+expectLength, char1, char1Length); 2622 expectLength+=char1Length; 2623 } 2624 2625 /* expect that each bad UTF-8 sequence is detected and skipped */ 2626 strcpy(testName, "from bad UTF-8 to "); 2627 strcat(testName, converterName); 2628 2629 convertExMultiStreaming(utf8Cnv, cnv, 2630 utf8, utf8Length, 2631 expect, expectLength, 2632 testName, 2633 U_ZERO_ERROR); 2634 } 2635 2636 /* Test illegal UTF-8 input. */ 2637 static void TestConvertExFromUTF8() { 2638 static const char *const converterNames[]={ 2639 #if !UCONFIG_NO_LEGACY_CONVERSION 2640 "windows-1252", 2641 "shift-jis", 2642 #endif 2643 "us-ascii", 2644 "iso-8859-1", 2645 "utf-8" 2646 }; 2647 2648 UConverter *utf8Cnv, *cnv; 2649 UErrorCode errorCode; 2650 int32_t i; 2651 2652 /* fromUnicode versions of some character, from initial state and later */ 2653 char charUTF8[4], char0[8], char1[8]; 2654 int32_t charUTF8Length, char0Length, char1Length; 2655 2656 errorCode=U_ZERO_ERROR; 2657 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2658 if(U_FAILURE(errorCode)) { 2659 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2660 return; 2661 } 2662 2663 for(i=0; i<LENGTHOF(converterNames); ++i) { 2664 errorCode=U_ZERO_ERROR; 2665 cnv=ucnv_open(converterNames[i], &errorCode); 2666 if(U_FAILURE(errorCode)) { 2667 log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode)); 2668 continue; 2669 } 2670 if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) { 2671 continue; 2672 } 2673 testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2674 testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2675 ucnv_close(cnv); 2676 } 2677 ucnv_close(utf8Cnv); 2678 } 2679 2680 static void 2681 TestConvertAlgorithmic() { 2682 #if !UCONFIG_NO_LEGACY_CONVERSION 2683 static const uint8_t 2684 utf8[]={ 2685 /* 4e00 30a1 ff61 0410 */ 2686 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2687 }, 2688 shiftJIS[]={ 2689 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2690 }, 2691 /*errorTarget[]={*/ 2692 /* 2693 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2694 * SUB, SUB, 0x40, SUB, SUB, 0x40 2695 */ 2696 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/ 2697 /*},*/ 2698 utf16[]={ 2699 0xfe, 0xff /* BOM only, no text */ 2700 }, 2701 utf32[]={ 2702 0xff, 0xfe, 0, 0 /* BOM only, no text */ 2703 }; 2704 2705 char target[100], utf8NUL[100], shiftJISNUL[100]; 2706 2707 UConverter *cnv; 2708 UErrorCode errorCode; 2709 2710 int32_t length; 2711 2712 errorCode=U_ZERO_ERROR; 2713 cnv=ucnv_open("Shift-JIS", &errorCode); 2714 if(U_FAILURE(errorCode)) { 2715 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2716 ucnv_close(cnv); 2717 return; 2718 } 2719 2720 memcpy(utf8NUL, utf8, sizeof(utf8)); 2721 utf8NUL[sizeof(utf8)]=0; 2722 memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS)); 2723 shiftJISNUL[sizeof(shiftJIS)]=0; 2724 2725 /* 2726 * The to/from algorithmic convenience functions share a common implementation, 2727 * so we need not test all permutations of them. 2728 */ 2729 2730 /* length in, not terminated out */ 2731 errorCode=U_ZERO_ERROR; 2732 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode); 2733 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2734 length!=sizeof(shiftJIS) || 2735 memcmp(target, shiftJIS, length)!=0 2736 ) { 2737 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n", 2738 u_errorName(errorCode), length, sizeof(shiftJIS)); 2739 } 2740 2741 /* terminated in and out */ 2742 memset(target, 0x55, sizeof(target)); 2743 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2744 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode); 2745 if( errorCode!=U_ZERO_ERROR || 2746 length!=sizeof(utf8) || 2747 memcmp(target, utf8, length)!=0 2748 ) { 2749 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n", 2750 u_errorName(errorCode), length, sizeof(shiftJIS)); 2751 } 2752 2753 /* empty string, some target buffer */ 2754 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2755 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode); 2756 if( errorCode!=U_ZERO_ERROR || 2757 length!=0 2758 ) { 2759 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n", 2760 u_errorName(errorCode), length); 2761 } 2762 2763 /* pseudo-empty string, no target buffer */ 2764 errorCode=U_ZERO_ERROR; 2765 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2766 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2767 length!=0 2768 ) { 2769 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2770 u_errorName(errorCode), length); 2771 } 2772 2773 errorCode=U_ZERO_ERROR; 2774 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode); 2775 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2776 length!=0 2777 ) { 2778 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2779 u_errorName(errorCode), length); 2780 } 2781 2782 /* bad arguments */ 2783 errorCode=U_MESSAGE_PARSE_ERROR; 2784 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2785 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2786 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2787 } 2788 2789 /* source==NULL */ 2790 errorCode=U_ZERO_ERROR; 2791 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode); 2792 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2793 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode)); 2794 } 2795 2796 /* illegal alg. type */ 2797 errorCode=U_ZERO_ERROR; 2798 length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode); 2799 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2800 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode)); 2801 } 2802 ucnv_close(cnv); 2803 #endif 2804 } 2805 2806 static void TestLMBCSMaxChar(void) { 2807 static const struct { 2808 int8_t maxSize; 2809 const char *name; 2810 } converter[] = { 2811 /* some non-LMBCS converters - perfect test setup here */ 2812 { 1, "US-ASCII"}, 2813 { 1, "ISO-8859-1"}, 2814 2815 /* BEGIN android-changed */ 2816 /* ICU ticket#7226 */ 2817 { 2, "UTF-16"}, 2818 { 2, "UTF-16BE"}, 2819 /* END android-changed */ 2820 2821 { 3, "UTF-8"}, 2822 { 3, "CESU-8"}, 2823 { 3, "SCSU"}, 2824 { 4, "UTF-32"}, 2825 { 4, "UTF-7"}, 2826 { 4, "IMAP-mailbox-name"}, 2827 { 4, "BOCU-1"}, 2828 2829 { 1, "windows-1256"}, 2830 { 2, "Shift-JIS"}, 2831 { 2, "ibm-16684"}, 2832 { 3, "ibm-930"}, 2833 { 3, "ibm-1390"}, 2834 { 4, "*test3"}, 2835 { 16,"*test4"}, 2836 2837 { 4, "ISCII"}, 2838 { 4, "HZ"}, 2839 2840 { 3, "ISO-2022"}, 2841 { 3, "ISO-2022-KR"}, 2842 { 6, "ISO-2022-JP"}, 2843 { 8, "ISO-2022-CN"}, 2844 2845 /* LMBCS */ 2846 { 3, "LMBCS-1"}, 2847 { 3, "LMBCS-2"}, 2848 { 3, "LMBCS-3"}, 2849 { 3, "LMBCS-4"}, 2850 { 3, "LMBCS-5"}, 2851 { 3, "LMBCS-6"}, 2852 { 3, "LMBCS-8"}, 2853 { 3, "LMBCS-11"}, 2854 { 3, "LMBCS-16"}, 2855 { 3, "LMBCS-17"}, 2856 { 3, "LMBCS-18"}, 2857 { 3, "LMBCS-19"} 2858 }; 2859 int32_t idx; 2860 2861 for (idx = 0; idx < LENGTHOF(converter); idx++) { 2862 UErrorCode status = U_ZERO_ERROR; 2863 UConverter *cnv = cnv_open(converter[idx].name, &status); 2864 if (U_FAILURE(status)) { 2865 continue; 2866 } 2867 if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) { 2868 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n", 2869 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv)); 2870 } 2871 ucnv_close(cnv); 2872 } 2873 2874 /* mostly test that the macro compiles */ 2875 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) { 2876 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n"); 2877 } 2878 } 2879 2880 2881 static void TestJ1968(void) { 2882 UErrorCode err = U_ZERO_ERROR; 2883 UConverter *cnv; 2884 char myConvName[] = "My really really really really really really really really really really really" 2885 " really really really really really really really really really really really" 2886 " really really really really really really really really long converter name"; 2887 UChar myConvNameU[sizeof(myConvName)]; 2888 2889 u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName)); 2890 2891 err = U_ZERO_ERROR; 2892 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0; 2893 cnv = ucnv_openU(myConvNameU, &err); 2894 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2895 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2896 } 2897 2898 err = U_ZERO_ERROR; 2899 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 2900 cnv = ucnv_openU(myConvNameU, &err); 2901 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2902 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2903 } 2904 2905 err = U_ZERO_ERROR; 2906 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 2907 cnv = ucnv_openU(myConvNameU, &err); 2908 if (cnv || err != U_FILE_ACCESS_ERROR) { 2909 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2910 } 2911 2912 2913 2914 2915 err = U_ZERO_ERROR; 2916 cnv = ucnv_open(myConvName, &err); 2917 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2918 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2919 } 2920 2921 err = U_ZERO_ERROR; 2922 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ','; 2923 cnv = ucnv_open(myConvName, &err); 2924 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2925 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2926 } 2927 2928 err = U_ZERO_ERROR; 2929 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 2930 cnv = ucnv_open(myConvName, &err); 2931 if (cnv || err != U_FILE_ACCESS_ERROR) { 2932 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2933 } 2934 2935 err = U_ZERO_ERROR; 2936 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 2937 strncpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7); 2938 cnv = ucnv_open(myConvName, &err); 2939 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2940 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2941 } 2942 2943 /* The comma isn't really a part of the converter name. */ 2944 err = U_ZERO_ERROR; 2945 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 2946 cnv = ucnv_open(myConvName, &err); 2947 if (cnv || err != U_FILE_ACCESS_ERROR) { 2948 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2949 } 2950 2951 err = U_ZERO_ERROR; 2952 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' '; 2953 cnv = ucnv_open(myConvName, &err); 2954 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2955 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2956 } 2957 2958 err = U_ZERO_ERROR; 2959 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 2960 cnv = ucnv_open(myConvName, &err); 2961 if (cnv || err != U_FILE_ACCESS_ERROR) { 2962 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2963 } 2964 2965 } 2966 2967 #if !UCONFIG_NO_LEGACY_CONVERSION 2968 static void 2969 testSwap(const char *name, UBool swap) { 2970 /* 2971 * Test Unicode text. 2972 * Contains characters that are the highest for some of the 2973 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the 2974 * tables copies the entire tables. 2975 */ 2976 static const UChar text[]={ 2977 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a 2978 }; 2979 2980 UChar uNormal[32], uSwapped[32]; 2981 char normal[32], swapped[32]; 2982 const UChar *pcu; 2983 UChar *pu; 2984 char *pc; 2985 int32_t i, normalLength, swappedLength; 2986 UChar u; 2987 char c; 2988 2989 const char *swappedName; 2990 UConverter *cnv, *swapCnv; 2991 UErrorCode errorCode; 2992 2993 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */ 2994 2995 /* open both the normal and the LF/NL-swapping converters */ 2996 strcpy(swapped, name); 2997 strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING); 2998 2999 errorCode=U_ZERO_ERROR; 3000 swapCnv=ucnv_open(swapped, &errorCode); 3001 cnv=ucnv_open(name, &errorCode); 3002 if(U_FAILURE(errorCode)) { 3003 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode)); 3004 goto cleanup; 3005 } 3006 3007 /* the name must contain the swap option if and only if we expect the converter to swap */ 3008 swappedName=ucnv_getName(swapCnv, &errorCode); 3009 if(U_FAILURE(errorCode)) { 3010 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode)); 3011 goto cleanup; 3012 } 3013 3014 pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING); 3015 if(swap != (pc!=NULL)) { 3016 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap); 3017 goto cleanup; 3018 } 3019 3020 /* convert to EBCDIC */ 3021 pcu=text; 3022 pc=normal; 3023 ucnv_fromUnicode(cnv, &pc, normal+LENGTHOF(normal), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); 3024 normalLength=(int32_t)(pc-normal); 3025 3026 pcu=text; 3027 pc=swapped; 3028 ucnv_fromUnicode(swapCnv, &pc, swapped+LENGTHOF(swapped), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); 3029 swappedLength=(int32_t)(pc-swapped); 3030 3031 if(U_FAILURE(errorCode)) { 3032 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode)); 3033 goto cleanup; 3034 } 3035 3036 /* compare EBCDIC output */ 3037 if(normalLength!=swappedLength) { 3038 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3039 goto cleanup; 3040 } 3041 for(i=0; i<normalLength; ++i) { 3042 /* swap EBCDIC LF/NL for comparison */ 3043 c=normal[i]; 3044 if(swap) { 3045 if(c==0x15) { 3046 c=0x25; 3047 } else if(c==0x25) { 3048 c=0x15; 3049 } 3050 } 3051 3052 if(c!=swapped[i]) { 3053 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]); 3054 goto cleanup; 3055 } 3056 } 3057 3058 /* convert back to Unicode (may not roundtrip) */ 3059 pc=normal; 3060 pu=uNormal; 3061 ucnv_toUnicode(cnv, &pu, uNormal+LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); 3062 normalLength=(int32_t)(pu-uNormal); 3063 3064 pc=normal; 3065 pu=uSwapped; 3066 ucnv_toUnicode(swapCnv, &pu, uSwapped+LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); 3067 swappedLength=(int32_t)(pu-uSwapped); 3068 3069 if(U_FAILURE(errorCode)) { 3070 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode)); 3071 goto cleanup; 3072 } 3073 3074 /* compare EBCDIC output */ 3075 if(normalLength!=swappedLength) { 3076 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3077 goto cleanup; 3078 } 3079 for(i=0; i<normalLength; ++i) { 3080 /* swap EBCDIC LF/NL for comparison */ 3081 u=uNormal[i]; 3082 if(swap) { 3083 if(u==0xa) { 3084 u=0x85; 3085 } else if(u==0x85) { 3086 u=0xa; 3087 } 3088 } 3089 3090 if(u!=uSwapped[i]) { 3091 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]); 3092 goto cleanup; 3093 } 3094 } 3095 3096 /* clean up */ 3097 cleanup: 3098 ucnv_close(cnv); 3099 ucnv_close(swapCnv); 3100 } 3101 3102 static void 3103 TestEBCDICSwapLFNL() { 3104 static const struct { 3105 const char *name; 3106 UBool swap; 3107 } tests[]={ 3108 { "ibm-37", TRUE }, 3109 { "ibm-1047", TRUE }, 3110 { "ibm-1140", TRUE }, 3111 { "ibm-930", TRUE }, 3112 { "iso-8859-3", FALSE } 3113 }; 3114 3115 int i; 3116 3117 for(i=0; i<LENGTHOF(tests); ++i) { 3118 testSwap(tests[i].name, tests[i].swap); 3119 } 3120 } 3121 #else 3122 static void 3123 TestEBCDICSwapLFNL() { 3124 /* test nothing... */ 3125 } 3126 #endif 3127 3128 static const UVersionInfo ICU_34 = {3,4,0,0}; 3129 3130 static void TestFromUCountPending(){ 3131 #if !UCONFIG_NO_LEGACY_CONVERSION 3132 UErrorCode status = U_ZERO_ERROR; 3133 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */ 3134 static const struct { 3135 UChar input[6]; 3136 int32_t len; 3137 int32_t exp; 3138 }fromUnicodeTests[] = { 3139 /*m:n conversion*/ 3140 {{0xdbc4},1,1}, 3141 {{ 0xdbc4, 0xde34, 0xd84d},3,1}, 3142 {{ 0xdbc4, 0xde34, 0xd900},3,3}, 3143 }; 3144 int i; 3145 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3146 if(U_FAILURE(status)){ 3147 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3148 return; 3149 } 3150 for(i=0; i<LENGTHOF(fromUnicodeTests); ++i) { 3151 char tgt[10]; 3152 char* target = tgt; 3153 char* targetLimit = target + 10; 3154 const UChar* source = fromUnicodeTests[i].input; 3155 const UChar* sourceLimit = source + fromUnicodeTests[i].len; 3156 int32_t len = 0; 3157 ucnv_reset(cnv); 3158 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3159 len = ucnv_fromUCountPending(cnv, &status); 3160 if(U_FAILURE(status)){ 3161 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3162 status = U_ZERO_ERROR; 3163 continue; 3164 } 3165 if(len != fromUnicodeTests[i].exp){ 3166 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n"); 3167 } 3168 } 3169 status = U_ZERO_ERROR; 3170 { 3171 /* 3172 * The converter has to read the tail before it knows that 3173 * only head alone matches. 3174 * At the end, the output for head will overflow the target, 3175 * middle will be pending, and tail will not have been consumed. 3176 */ 3177 /* 3178 \U00101234 -> x (<U101234> \x07 |0) 3179 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0) 3180 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0) 3181 \U00060007 -> unassigned 3182 */ 3183 static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */ 3184 static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */ 3185 static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */ 3186 char tgt[10]; 3187 char* target = tgt; 3188 char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */ 3189 const UChar* source = head; 3190 const UChar* sourceLimit = source + u_strlen(head); 3191 int32_t len = 0; 3192 ucnv_reset(cnv); 3193 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3194 len = ucnv_fromUCountPending(cnv, &status); 3195 if(U_FAILURE(status)){ 3196 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3197 status = U_ZERO_ERROR; 3198 } 3199 if(len!=4){ 3200 log_err("ucnv_fromUInputHeld did not return correct length for head\n"); 3201 } 3202 source = middle; 3203 sourceLimit = source + u_strlen(middle); 3204 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3205 len = ucnv_fromUCountPending(cnv, &status); 3206 if(U_FAILURE(status)){ 3207 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3208 status = U_ZERO_ERROR; 3209 } 3210 if(len!=5){ 3211 log_err("ucnv_fromUInputHeld did not return correct length for middle\n"); 3212 } 3213 source = tail; 3214 sourceLimit = source + u_strlen(tail); 3215 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3216 if(status != U_BUFFER_OVERFLOW_ERROR){ 3217 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3218 } 3219 status = U_ZERO_ERROR; 3220 len = ucnv_fromUCountPending(cnv, &status); 3221 /* middle[1] is pending, tail has not been consumed */ 3222 if(U_FAILURE(status)){ 3223 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status)); 3224 } 3225 if(len!=1){ 3226 log_err("ucnv_fromUInputHeld did not return correct length for tail\n"); 3227 } 3228 } 3229 ucnv_close(cnv); 3230 #endif 3231 } 3232 3233 static void 3234 TestToUCountPending(){ 3235 #if !UCONFIG_NO_LEGACY_CONVERSION 3236 UErrorCode status = U_ZERO_ERROR; 3237 static const struct { 3238 char input[6]; 3239 int32_t len; 3240 int32_t exp; 3241 }toUnicodeTests[] = { 3242 /*m:n conversion*/ 3243 {{0x05, 0x01, 0x02},3,3}, 3244 {{0x01, 0x02},2,2}, 3245 {{0x07, 0x00, 0x01, 0x02},4,4}, 3246 }; 3247 3248 int i; 3249 UConverterToUCallback *oldToUAction= NULL; 3250 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3251 if(U_FAILURE(status)){ 3252 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3253 return; 3254 } 3255 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3256 for(i=0; i<LENGTHOF(toUnicodeTests); ++i) { 3257 UChar tgt[10]; 3258 UChar* target = tgt; 3259 UChar* targetLimit = target + 20; 3260 const char* source = toUnicodeTests[i].input; 3261 const char* sourceLimit = source + toUnicodeTests[i].len; 3262 int32_t len = 0; 3263 ucnv_reset(cnv); 3264 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3265 len = ucnv_toUCountPending(cnv,&status); 3266 if(U_FAILURE(status)){ 3267 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3268 status = U_ZERO_ERROR; 3269 continue; 3270 } 3271 if(len != toUnicodeTests[i].exp){ 3272 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n"); 3273 } 3274 } 3275 status = U_ZERO_ERROR; 3276 ucnv_close(cnv); 3277 3278 { 3279 /* 3280 * The converter has to read the tail before it knows that 3281 * only head alone matches. 3282 * At the end, the output for head will overflow the target, 3283 * mid will be pending, and tail will not have been consumed. 3284 */ 3285 char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00}; 3286 char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 }; 3287 char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 }; 3288 /* 3289 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0) 3290 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0) 3291 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3) 3292 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar") 3293 */ 3294 UChar tgt[10]; 3295 UChar* target = tgt; 3296 UChar* targetLimit = target + 1; /* expect overflow from converting */ 3297 const char* source = head; 3298 const char* sourceLimit = source + strlen(head); 3299 int32_t len = 0; 3300 cnv = ucnv_openPackage(loadTestData(&status), "test4", &status); 3301 if(U_FAILURE(status)){ 3302 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3303 return; 3304 } 3305 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3306 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3307 len = ucnv_toUCountPending(cnv,&status); 3308 if(U_FAILURE(status)){ 3309 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3310 } 3311 if(len != 4){ 3312 log_err("Did not get the expected len for head.\n"); 3313 } 3314 source=mid; 3315 sourceLimit = source+strlen(mid); 3316 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3317 len = ucnv_toUCountPending(cnv,&status); 3318 if(U_FAILURE(status)){ 3319 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3320 } 3321 if(len != 8){ 3322 log_err("Did not get the expected len for mid.\n"); 3323 } 3324 3325 source=tail; 3326 sourceLimit = source+strlen(tail); 3327 targetLimit = target; 3328 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3329 if(status != U_BUFFER_OVERFLOW_ERROR){ 3330 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3331 } 3332 status = U_ZERO_ERROR; 3333 len = ucnv_toUCountPending(cnv,&status); 3334 /* mid[4] is pending, tail has not been consumed */ 3335 if(U_FAILURE(status)){ 3336 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status)); 3337 } 3338 if(len != 4){ 3339 log_err("Did not get the expected len for tail.\n"); 3340 } 3341 ucnv_close(cnv); 3342 } 3343 #endif 3344 } 3345 3346 static void TestOneDefaultNameChange(const char *name, const char *expected) { 3347 UErrorCode status = U_ZERO_ERROR; 3348 UConverter *cnv; 3349 ucnv_setDefaultName(name); 3350 if(strcmp(ucnv_getDefaultName(), expected)==0) 3351 log_verbose("setDefaultName of %s works.\n", name); 3352 else 3353 log_err("setDefaultName of %s failed\n", name); 3354 cnv=ucnv_open(NULL, &status); 3355 if (U_FAILURE(status) || cnv == NULL) { 3356 log_err("opening the default converter of %s failed\n", name); 3357 return; 3358 } 3359 if(strcmp(ucnv_getName(cnv, &status), expected)==0) 3360 log_verbose("ucnv_getName of %s works.\n", name); 3361 else 3362 log_err("ucnv_getName of %s failed\n", name); 3363 ucnv_close(cnv); 3364 } 3365 3366 static void TestDefaultName(void) { 3367 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/ 3368 static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; 3369 strcpy(defaultName, ucnv_getDefaultName()); 3370 3371 log_verbose("getDefaultName returned %s\n", defaultName); 3372 3373 /*change the default name by setting it */ 3374 TestOneDefaultNameChange("UTF-8", "UTF-8"); 3375 #if U_CHARSET_IS_UTF8 3376 TestOneDefaultNameChange("ISCII,version=1", "UTF-8"); 3377 TestOneDefaultNameChange("ISCII,version=2", "UTF-8"); 3378 TestOneDefaultNameChange("ISO-8859-1", "UTF-8"); 3379 #else 3380 # if !UCONFIG_NO_LEGACY_CONVERSION 3381 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1"); 3382 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2"); 3383 # endif 3384 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1"); 3385 #endif 3386 3387 /*set the default name back*/ 3388 ucnv_setDefaultName(defaultName); 3389 } 3390 3391 /* Test that ucnv_compareNames() matches names according to spec. ----------- */ 3392 3393 static U_INLINE int 3394 sign(int n) { 3395 if(n==0) { 3396 return 0; 3397 } else if(n<0) { 3398 return -1; 3399 } else /* n>0 */ { 3400 return 1; 3401 } 3402 } 3403 3404 static void 3405 compareNames(const char **names) { 3406 const char *relation, *name1, *name2; 3407 int rel, result; 3408 3409 relation=*names++; 3410 if(*relation=='=') { 3411 rel = 0; 3412 } else if(*relation=='<') { 3413 rel = -1; 3414 } else { 3415 rel = 1; 3416 } 3417 3418 name1=*names++; 3419 if(name1==NULL) { 3420 return; 3421 } 3422 while((name2=*names++)!=NULL) { 3423 result=ucnv_compareNames(name1, name2); 3424 if(sign(result)!=rel) { 3425 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel); 3426 } 3427 name1=name2; 3428 } 3429 } 3430 3431 static void 3432 TestCompareNames() { 3433 static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL }; 3434 static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL }; 3435 static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL }; 3436 static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL }; 3437 3438 compareNames(equalUTF8); 3439 compareNames(equalIBM); 3440 compareNames(lessMac); 3441 compareNames(lessUTF080); 3442 } 3443 3444 static void 3445 TestSubstString() { 3446 static const UChar surrogate[1]={ 0xd900 }; 3447 char buffer[16]; 3448 3449 static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3450 static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3451 UConverter *cnv; 3452 UErrorCode errorCode; 3453 int32_t length; 3454 int8_t len8; 3455 3456 /* UTF-16/32: test that the BOM is output before the sub character */ 3457 errorCode=U_ZERO_ERROR; 3458 cnv=ucnv_open("UTF-16", &errorCode); 3459 if(U_FAILURE(errorCode)) { 3460 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode)); 3461 return; 3462 } 3463 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3464 ucnv_close(cnv); 3465 if(U_FAILURE(errorCode) || 3466 length!=4 || 3467 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3468 ) { 3469 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n"); 3470 } 3471 3472 errorCode=U_ZERO_ERROR; 3473 cnv=ucnv_open("UTF-32", &errorCode); 3474 if(U_FAILURE(errorCode)) { 3475 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode)); 3476 return; 3477 } 3478 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3479 ucnv_close(cnv); 3480 if(U_FAILURE(errorCode) || 3481 length!=8 || 3482 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3483 ) { 3484 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n"); 3485 } 3486 3487 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */ 3488 errorCode=U_ZERO_ERROR; 3489 cnv=ucnv_open("ISO-8859-1", &errorCode); 3490 if(U_FAILURE(errorCode)) { 3491 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode)); 3492 return; 3493 } 3494 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode); 3495 if(U_FAILURE(errorCode)) { 3496 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode)); 3497 } else { 3498 len8 = sizeof(buffer); 3499 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3500 /* Stateless converter, we expect the string converted to charset bytes. */ 3501 if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) { 3502 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode)); 3503 } 3504 } 3505 ucnv_close(cnv); 3506 3507 #if !UCONFIG_NO_LEGACY_CONVERSION 3508 errorCode=U_ZERO_ERROR; 3509 cnv=ucnv_open("HZ", &errorCode); 3510 if(U_FAILURE(errorCode)) { 3511 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode)); 3512 return; 3513 } 3514 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode); 3515 if(U_FAILURE(errorCode)) { 3516 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode)); 3517 } else { 3518 len8 = sizeof(buffer); 3519 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3520 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */ 3521 if(U_FAILURE(errorCode) || len8!=0) { 3522 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode)); 3523 } 3524 } 3525 ucnv_close(cnv); 3526 #endif 3527 /* 3528 * Further testing of ucnv_setSubstString() is done via intltest convert. 3529 * We do not test edge cases of illegal arguments and similar because the 3530 * function implementation uses all of its parameters in calls to other 3531 * functions with UErrorCode parameters. 3532 */ 3533 } 3534 3535 static void 3536 InvalidArguments() { 3537 UConverter *cnv; 3538 UErrorCode errorCode; 3539 char charBuffer[2] = {1, 1}; 3540 char ucharAsCharBuffer[2] = {2, 2}; 3541 char *charsPtr = charBuffer; 3542 UChar *ucharsPtr = (UChar *)ucharAsCharBuffer; 3543 UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1); 3544 3545 errorCode=U_ZERO_ERROR; 3546 cnv=ucnv_open("UTF-8", &errorCode); 3547 if(U_FAILURE(errorCode)) { 3548 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode)); 3549 return; 3550 } 3551 3552 errorCode=U_ZERO_ERROR; 3553 /* This one should fail because an incomplete UChar is being passed in */ 3554 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode); 3555 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3556 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3557 } 3558 3559 errorCode=U_ZERO_ERROR; 3560 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3561 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode); 3562 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3563 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3564 } 3565 3566 errorCode=U_ZERO_ERROR; 3567 /* This one should fail because an incomplete UChar is being passed in */ 3568 ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3569 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3570 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3571 } 3572 3573 errorCode=U_ZERO_ERROR; 3574 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3575 ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3576 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3577 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3578 } 3579 3580 if (charBuffer[0] != 1 || charBuffer[1] != 1 3581 || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2) 3582 { 3583 log_err("Data was incorrectly written to buffers\n"); 3584 } 3585 3586 ucnv_close(cnv); 3587 } 3588 3589 static void TestGetName() { 3590 static const char *const names[] = { 3591 "Unicode", "UTF-16", 3592 "UnicodeBigUnmarked", "UTF-16BE", 3593 "UnicodeBig", "UTF-16BE,version=1", 3594 "UnicodeLittleUnmarked", "UTF-16LE", 3595 "UnicodeLittle", "UTF-16LE,version=1", 3596 "x-UTF-16LE-BOM", "UTF-16LE,version=1" 3597 }; 3598 int32_t i; 3599 for(i = 0; i < LENGTHOF(names); i += 2) { 3600 UErrorCode errorCode = U_ZERO_ERROR; 3601 UConverter *cnv = ucnv_open(names[i], &errorCode); 3602 if(U_SUCCESS(errorCode)) { 3603 const char *name = ucnv_getName(cnv, &errorCode); 3604 if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) { 3605 log_err("ucnv_getName(%s) = %s != %s -- %s\n", 3606 names[i], name, names[i+1], u_errorName(errorCode)); 3607 } 3608 ucnv_close(cnv); 3609 } 3610 } 3611 } 3612 3613 static void TestUTFBOM() { 3614 static const UChar a16[] = { 0x61 }; 3615 static const char *const names[] = { 3616 "UTF-16", 3617 "UTF-16,version=1", 3618 "UTF-16BE", 3619 "UnicodeBig", 3620 "UTF-16LE", 3621 "UnicodeLittle" 3622 }; 3623 static const uint8_t expected[][5] = { 3624 #if U_IS_BIG_ENDIAN 3625 { 4, 0xfe, 0xff, 0, 0x61 }, 3626 { 4, 0xfe, 0xff, 0, 0x61 }, 3627 #else 3628 { 4, 0xff, 0xfe, 0x61, 0 }, 3629 { 4, 0xff, 0xfe, 0x61, 0 }, 3630 #endif 3631 3632 { 2, 0, 0x61 }, 3633 { 4, 0xfe, 0xff, 0, 0x61 }, 3634 3635 { 2, 0x61, 0 }, 3636 { 4, 0xff, 0xfe, 0x61, 0 } 3637 }; 3638 3639 char bytes[10]; 3640 int32_t i; 3641 3642 for(i = 0; i < LENGTHOF(names); ++i) { 3643 UErrorCode errorCode = U_ZERO_ERROR; 3644 UConverter *cnv = ucnv_open(names[i], &errorCode); 3645 int32_t length = 0; 3646 const uint8_t *exp = expected[i]; 3647 if (U_FAILURE(errorCode)) { 3648 log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode)); 3649 continue; 3650 } 3651 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode); 3652 3653 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) { 3654 log_err("unexpected %s BOM writing behavior -- %s\n", 3655 names[i], u_errorName(errorCode)); 3656 } 3657 ucnv_close(cnv); 3658 } 3659 } 3660