1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2014, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /***************************************************************************** 7 * 8 * File CU_CAPITST.C 9 * 10 * Modification History: 11 * Name Description 12 * Madhu Katragadda Ported for C API 13 ****************************************************************************** 14 */ 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <ctype.h> 19 #include "unicode/uloc.h" 20 #include "unicode/ucnv.h" 21 #include "unicode/ucnv_err.h" 22 #include "unicode/putil.h" 23 #include "unicode/uset.h" 24 #include "unicode/ustring.h" 25 #include "ucnv_bld.h" /* for sizeof(UConverter) */ 26 #include "cmemory.h" /* for UAlignedMemory */ 27 #include "cintltst.h" 28 #include "ccapitst.h" 29 #include "cstring.h" 30 31 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 32 33 #define NUM_CODEPAGE 1 34 #define MAX_FILE_LEN 1024*20 35 #define UCS_FILE_NAME_SIZE 512 36 37 /*returns an action other than the one provided*/ 38 #if !UCONFIG_NO_LEGACY_CONVERSION 39 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA); 40 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA); 41 #endif 42 43 static UConverter * 44 cnv_open(const char *name, UErrorCode *pErrorCode) { 45 if(name!=NULL && name[0]=='*') { 46 return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode); 47 } else { 48 return ucnv_open(name, pErrorCode); 49 } 50 } 51 52 53 static void ListNames(void); 54 static void TestFlushCache(void); 55 static void TestDuplicateAlias(void); 56 static void TestCCSID(void); 57 static void TestJ932(void); 58 static void TestJ1968(void); 59 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 60 static void TestLMBCSMaxChar(void); 61 #endif 62 63 #if !UCONFIG_NO_LEGACY_CONVERSION 64 static void TestConvertSafeCloneCallback(void); 65 #endif 66 67 static void TestEBCDICSwapLFNL(void); 68 static void TestConvertEx(void); 69 static void TestConvertExFromUTF8(void); 70 static void TestConvertExFromUTF8_C5F0(void); 71 static void TestConvertAlgorithmic(void); 72 void TestDefaultConverterError(void); /* defined in cctest.c */ 73 void TestDefaultConverterSet(void); /* defined in cctest.c */ 74 static void TestToUCountPending(void); 75 static void TestFromUCountPending(void); 76 static void TestDefaultName(void); 77 static void TestCompareNames(void); 78 static void TestSubstString(void); 79 static void InvalidArguments(void); 80 static void TestGetName(void); 81 static void TestUTFBOM(void); 82 83 void addTestConvert(TestNode** root); 84 85 void addTestConvert(TestNode** root) 86 { 87 addTest(root, &ListNames, "tsconv/ccapitst/ListNames"); 88 addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert"); 89 addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache"); 90 addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias"); 91 addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias"); 92 addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone"); 93 #if !UCONFIG_NO_LEGACY_CONVERSION 94 addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback"); 95 #endif 96 addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID"); 97 addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932"); 98 addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968"); 99 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 100 addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar"); 101 #endif 102 addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL"); 103 addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx"); 104 addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8"); 105 addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0"); 106 addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic"); 107 addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError"); 108 addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet"); 109 #if !UCONFIG_NO_FILE_IO 110 addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending"); 111 addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending"); 112 #endif 113 addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName"); 114 addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames"); 115 addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString"); 116 addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments"); 117 addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName"); 118 addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM"); 119 } 120 121 static void ListNames(void) { 122 UErrorCode err = U_ZERO_ERROR; 123 int32_t testLong1 = 0; 124 const char* available_conv; 125 UEnumeration *allNamesEnum = NULL; 126 int32_t allNamesCount = 0; 127 uint16_t count; 128 129 log_verbose("Testing ucnv_openAllNames()..."); 130 allNamesEnum = ucnv_openAllNames(&err); 131 if(U_FAILURE(err)) { 132 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); 133 } 134 else { 135 const char *string = NULL; 136 int32_t len = 0; 137 int32_t count1 = 0; 138 int32_t count2 = 0; 139 allNamesCount = uenum_count(allNamesEnum, &err); 140 while ((string = uenum_next(allNamesEnum, &len, &err))) { 141 count1++; 142 log_verbose("read \"%s\", length %i\n", string, len); 143 } 144 if (U_FAILURE(err)) { 145 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err)); 146 err = U_ZERO_ERROR; 147 } 148 uenum_reset(allNamesEnum, &err); 149 while ((string = uenum_next(allNamesEnum, &len, &err))) { 150 count2++; 151 ucnv_close(ucnv_open(string, &err)); 152 log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable"); 153 err = U_ZERO_ERROR; 154 } 155 if (count1 != count2) { 156 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n"); 157 } 158 } 159 uenum_close(allNamesEnum); 160 err = U_ZERO_ERROR; 161 162 /*Tests ucnv_getAvailableName(), getAvialableCount()*/ 163 164 log_verbose("Testing ucnv_countAvailable()..."); 165 166 testLong1=ucnv_countAvailable(); 167 log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount); 168 169 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */ 170 171 available_conv = ucnv_getAvailableName(testLong1); 172 /*test ucnv_getAvailableName with err condition*/ 173 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 "); 174 available_conv = ucnv_getAvailableName(-1); 175 if(available_conv != NULL){ 176 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n"); 177 } 178 179 /* Test ucnv_countAliases() etc. */ 180 count = ucnv_countAliases("utf-8", &err); 181 if(U_FAILURE(err)) { 182 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); 183 } else if(count <= 0) { 184 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count); 185 } else { 186 /* try to get the aliases individually */ 187 const char *alias; 188 alias = ucnv_getAlias("utf-8", 0, &err); 189 if(U_FAILURE(err)) { 190 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err)); 191 } else if(strcmp("UTF-8", alias) != 0) { 192 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias); 193 } else { 194 uint16_t aliasNum; 195 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 196 alias = ucnv_getAlias("utf-8", aliasNum, &err); 197 if(U_FAILURE(err)) { 198 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 199 } else if(strlen(alias) > 20) { 200 /* sanity check */ 201 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias); 202 } else { 203 log_verbose("alias %d for utf-8: %s\n", aliasNum, alias); 204 } 205 } 206 if(U_SUCCESS(err)) { 207 /* try to fill an array with all aliases */ 208 const char **aliases; 209 aliases=(const char **)malloc(count * sizeof(const char *)); 210 if(aliases != 0) { 211 ucnv_getAliases("utf-8", aliases, &err); 212 if(U_FAILURE(err)) { 213 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err)); 214 } else { 215 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 216 /* compare the pointers with the ones returned individually */ 217 alias = ucnv_getAlias("utf-8", aliasNum, &err); 218 if(U_FAILURE(err)) { 219 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 220 } else if(aliases[aliasNum] != alias) { 221 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum); 222 } 223 } 224 } 225 free((char **)aliases); 226 } 227 } 228 } 229 } 230 } 231 232 233 static void TestConvert() 234 { 235 #if !UCONFIG_NO_LEGACY_CONVERSION 236 char myptr[4]; 237 char save[4]; 238 int32_t testLong1 = 0; 239 uint16_t rest = 0; 240 int32_t len = 0; 241 int32_t x = 0; 242 FILE* ucs_file_in = NULL; 243 UChar BOM = 0x0000; 244 UChar myUChar = 0x0000; 245 char* mytarget; /* [MAX_FILE_LEN] */ 246 char* mytarget_1; 247 char* mytarget_use; 248 UChar* consumedUni = NULL; 249 char* consumed = NULL; 250 char* output_cp_buffer; /* [MAX_FILE_LEN] */ 251 UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */ 252 UChar* ucs_file_buffer_use; 253 UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */ 254 UChar* my_ucs_file_buffer_1; 255 int8_t ii = 0; 256 uint16_t codepage_index = 0; 257 int32_t cp = 0; 258 UErrorCode err = U_ZERO_ERROR; 259 char ucs_file_name[UCS_FILE_NAME_SIZE]; 260 UConverterFromUCallback MIA1, MIA1_2; 261 UConverterToUCallback MIA2, MIA2_2; 262 const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2; 263 UConverter* someConverters[5]; 264 UConverter* myConverter = 0; 265 UChar* displayname = 0; 266 267 const char* locale; 268 269 UChar* uchar1 = 0; 270 UChar* uchar2 = 0; 271 UChar* uchar3 = 0; 272 int32_t targetcapacity2; 273 int32_t targetcapacity; 274 int32_t targetsize; 275 int32_t disnamelen; 276 277 const UChar* tmp_ucs_buf; 278 const UChar* tmp_consumedUni=NULL; 279 const char* tmp_mytarget_use; 280 const char* tmp_consumed; 281 282 /****************************************************************** 283 Checking Unicode -> ksc 284 ******************************************************************/ 285 286 const char* CodePagesToTest[NUM_CODEPAGE] = 287 { 288 "ibm-949_P110-1999" 289 290 291 }; 292 const uint16_t CodePageNumberToTest[NUM_CODEPAGE] = 293 { 294 949 295 }; 296 297 298 const int8_t CodePagesMinChars[NUM_CODEPAGE] = 299 { 300 1 301 302 }; 303 304 const int8_t CodePagesMaxChars[NUM_CODEPAGE] = 305 { 306 2 307 308 }; 309 310 const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] = 311 { 312 0xAFFE 313 }; 314 315 const char* CodePagesTestFiles[NUM_CODEPAGE] = 316 { 317 "uni-text.bin" 318 }; 319 320 321 const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] = 322 { 323 UCNV_IBM 324 325 }; 326 327 const char* CodePagesLocale[NUM_CODEPAGE] = 328 { 329 "ko_KR" 330 }; 331 332 UConverterFromUCallback oldFromUAction = NULL; 333 UConverterToUCallback oldToUAction = NULL; 334 const void* oldFromUContext = NULL; 335 const void* oldToUContext = NULL; 336 337 /* Allocate memory */ 338 mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0])); 339 output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0])); 340 ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0])); 341 my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0])); 342 343 ucs_file_buffer_use = ucs_file_buffer; 344 mytarget_1=mytarget; 345 mytarget_use = mytarget; 346 my_ucs_file_buffer_1=my_ucs_file_buffer; 347 348 /* flush the converter cache to get a consistent state before the flushing is tested */ 349 ucnv_flushCache(); 350 351 /*Testing ucnv_openU()*/ 352 { 353 UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/ 354 UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */ 355 UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */ 356 const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"}; 357 UChar illegalName[100]; 358 UConverter *converter=NULL; 359 err=U_ZERO_ERROR; 360 converter=ucnv_openU(converterName, &err); 361 if(U_FAILURE(err)){ 362 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err)); 363 } 364 ucnv_close(converter); 365 err=U_ZERO_ERROR; 366 converter=ucnv_openU(NULL, &err); 367 if(U_FAILURE(err)){ 368 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err)); 369 } 370 ucnv_close(converter); 371 /*testing with error value*/ 372 err=U_ILLEGAL_ARGUMENT_ERROR; 373 converter=ucnv_openU(converterName, &err); 374 if(!(converter == NULL)){ 375 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n"); 376 } 377 ucnv_close(converter); 378 err=U_ZERO_ERROR; 379 u_uastrcpy(illegalName, ""); 380 u_uastrcpy(illegalName, illegalNameChars); 381 ucnv_openU(illegalName, &err); 382 if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){ 383 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n"); 384 } 385 386 err=U_ZERO_ERROR; 387 ucnv_openU(firstSortedName, &err); 388 if(err!=U_FILE_ACCESS_ERROR){ 389 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n"); 390 } 391 392 err=U_ZERO_ERROR; 393 ucnv_openU(lastSortedName, &err); 394 if(err!=U_FILE_ACCESS_ERROR){ 395 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n"); 396 } 397 398 err=U_ZERO_ERROR; 399 } 400 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n"); 401 { 402 UConverter *cnv=NULL; 403 err=U_ZERO_ERROR; 404 cnv=ucnv_open("ibm-949,Madhu", &err); 405 if(U_FAILURE(err)){ 406 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err)); 407 } 408 ucnv_close(cnv); 409 410 } 411 /*Testing ucnv_convert()*/ 412 { 413 int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0; 414 const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00}; 415 const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00}; 416 char *target=0; 417 sourceLimit=sizeof(source)/sizeof(source[0]); 418 err=U_ZERO_ERROR; 419 targetLimit=0; 420 421 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err); 422 if(err == U_BUFFER_OVERFLOW_ERROR){ 423 err=U_ZERO_ERROR; 424 targetLimit=targetCapacity+1; 425 target=(char*)malloc(sizeof(char) * targetLimit); 426 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 427 } 428 if(U_FAILURE(err)){ 429 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err)); 430 } 431 else { 432 for(i=0; i<targetCapacity; i++){ 433 if(target[i] != expectedTarget[i]){ 434 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]); 435 } 436 } 437 438 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err); 439 if(U_FAILURE(err) || i!=7){ 440 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n", 441 u_errorName(err), i); 442 } 443 444 /*Test error conditions*/ 445 err=U_ZERO_ERROR; 446 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err); 447 if(i !=0){ 448 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n"); 449 } 450 451 err=U_ILLEGAL_ARGUMENT_ERROR; 452 sourceLimit=sizeof(source)/sizeof(source[0]); 453 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 454 if(i !=0 ){ 455 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n"); 456 } 457 458 err=U_ZERO_ERROR; 459 sourceLimit=sizeof(source)/sizeof(source[0]); 460 targetLimit=0; 461 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 462 if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){ 463 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n"); 464 } 465 err=U_ZERO_ERROR; 466 free(target); 467 } 468 } 469 470 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/ 471 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n"); 472 err=U_ILLEGAL_ARGUMENT_ERROR; 473 if(ucnv_open(NULL, &err) != NULL){ 474 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 475 } 476 if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){ 477 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 478 } 479 err=U_ZERO_ERROR; 480 481 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */ 482 log_verbose("\n---Testing ucnv_open default...\n"); 483 someConverters[0] = ucnv_open(NULL,&err); 484 someConverters[1] = ucnv_open(NULL,&err); 485 someConverters[2] = ucnv_open("utf8", &err); 486 someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err); 487 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */ 488 if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));} 489 490 /* Testing ucnv_getName()*/ 491 /*default code page */ 492 ucnv_getName(someConverters[0], &err); 493 if(U_FAILURE(err)) { 494 log_data_err("getName[0] failed\n"); 495 } else { 496 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err)); 497 } 498 ucnv_getName(someConverters[1], &err); 499 if(U_FAILURE(err)) { 500 log_data_err("getName[1] failed\n"); 501 } else { 502 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err)); 503 } 504 505 ucnv_close(someConverters[0]); 506 ucnv_close(someConverters[1]); 507 ucnv_close(someConverters[2]); 508 ucnv_close(someConverters[3]); 509 510 511 for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index) 512 { 513 int32_t i = 0; 514 515 err = U_ZERO_ERROR; 516 #ifdef U_TOPSRCDIR 517 strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING); 518 #else 519 strcpy(ucs_file_name, loadTestData(&err)); 520 521 if(U_FAILURE(err)){ 522 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err)); 523 return; 524 } 525 526 { 527 char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR); 528 529 if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){ 530 *(index+1)=0; 531 } 532 } 533 534 strcat(ucs_file_name,".."U_FILE_SEP_STRING); 535 #endif 536 strcat(ucs_file_name, CodePagesTestFiles[codepage_index]); 537 538 ucs_file_in = fopen(ucs_file_name,"rb"); 539 if (!ucs_file_in) 540 { 541 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); 542 return; 543 } 544 545 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/ 546 547 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */ 548 /* ucnv_flushCache(); */ 549 myConverter =ucnv_open( "ibm-949", &err); 550 if (!myConverter || U_FAILURE(err)) 551 { 552 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err)); 553 fclose(ucs_file_in); 554 break; 555 } 556 557 /*testing for ucnv_getName() */ 558 log_verbose("Testing ucnv_getName()...\n"); 559 ucnv_getName(myConverter, &err); 560 if(U_FAILURE(err)) 561 log_err("Error in getName\n"); 562 else 563 { 564 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err)); 565 } 566 if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) 567 log_err("getName failed\n"); 568 else 569 log_verbose("getName ok\n"); 570 /*Test getName with error condition*/ 571 { 572 const char* name=0; 573 err=U_ILLEGAL_ARGUMENT_ERROR; 574 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR"); 575 name=ucnv_getName(myConverter, &err); 576 if(name != NULL){ 577 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail"); 578 } 579 err=U_ZERO_ERROR; 580 } 581 582 583 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/ 584 585 log_verbose("Testing ucnv_getMaxCharSize()...\n"); 586 if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index]) 587 log_verbose("Max byte per character OK\n"); 588 else 589 log_err("Max byte per character failed\n"); 590 591 log_verbose("\n---Testing ucnv_getMinCharSize()...\n"); 592 if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index]) 593 log_verbose("Min byte per character OK\n"); 594 else 595 log_err("Min byte per character failed\n"); 596 597 598 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/ 599 log_verbose("\n---Testing ucnv_getSubstChars...\n"); 600 ii=4; 601 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 602 if (ii <= 0) { 603 log_err("ucnv_getSubstChars returned a negative number %d\n", ii); 604 } 605 606 for(x=0;x<ii;x++) 607 rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]); 608 if (rest==CodePagesSubstitutionChars[codepage_index]) 609 log_verbose("Substitution character ok\n"); 610 else 611 log_err("Substitution character failed.\n"); 612 613 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n"); 614 ucnv_setSubstChars(myConverter, myptr, ii, &err); 615 if (U_FAILURE(err)) 616 { 617 log_err("FAILURE! %s\n", myErrorName(err)); 618 } 619 ucnv_getSubstChars(myConverter,save, &ii, &err); 620 if (U_FAILURE(err)) 621 { 622 log_err("FAILURE! %s\n", myErrorName(err)); 623 } 624 625 if (strncmp(save, myptr, ii)) 626 log_err("Saved substitution character failed\n"); 627 else 628 log_verbose("Saved substitution character ok\n"); 629 630 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/ 631 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n"); 632 ii=1; 633 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 634 if(err != U_INDEX_OUTOFBOUNDS_ERROR){ 635 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err)); 636 } 637 err=U_ZERO_ERROR; 638 ii=4; 639 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 640 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n"); 641 ucnv_setSubstChars(myConverter, myptr, 0, &err); 642 if(err != U_ILLEGAL_ARGUMENT_ERROR){ 643 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err)); 644 } 645 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n"); 646 strcpy(myptr, "abc"); 647 ucnv_setSubstChars(myConverter, myptr, ii, &err); 648 err=U_ZERO_ERROR; 649 ucnv_getSubstChars(myConverter, save, &ii, &err); 650 if(strncmp(save, myptr, ii) == 0){ 651 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n"); 652 } 653 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n"); 654 err=U_ZERO_ERROR; 655 strcpy(myptr, "abc"); 656 ucnv_setSubstChars(myConverter, myptr, ii, &err); 657 err=U_ILLEGAL_ARGUMENT_ERROR; 658 ucnv_getSubstChars(myConverter, save, &ii, &err); 659 if(strncmp(save, myptr, ii) == 0){ 660 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n"); 661 } 662 err=U_ZERO_ERROR; 663 /*------*/ 664 665 #ifdef U_ENABLE_GENERIC_ISO_2022 666 /*resetState ucnv_reset()*/ 667 log_verbose("\n---Testing ucnv_reset()..\n"); 668 ucnv_reset(myConverter); 669 { 670 UChar32 c; 671 const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80}; 672 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 673 UConverter *cnv=ucnv_open("ISO_2022", &err); 674 if(U_FAILURE(err)) { 675 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 676 } 677 c=ucnv_getNextUChar(cnv, &source, limit, &err); 678 if((U_FAILURE(err) || c != (UChar32)0x0031)) { 679 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err)); 680 } 681 ucnv_reset(cnv); 682 ucnv_close(cnv); 683 684 } 685 #endif 686 687 /*getDisplayName*/ 688 log_verbose("\n---Testing ucnv_getDisplayName()...\n"); 689 locale=CodePagesLocale[codepage_index]; 690 len=0; 691 displayname=NULL; 692 disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err); 693 if(err==U_BUFFER_OVERFLOW_ERROR) { 694 err=U_ZERO_ERROR; 695 displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar)); 696 ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err); 697 if(U_FAILURE(err)) { 698 log_err("getDisplayName failed. The error is %s\n", myErrorName(err)); 699 } 700 else { 701 log_verbose(" getDisplayName o.k.\n"); 702 } 703 free(displayname); 704 displayname=NULL; 705 } 706 else { 707 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err)); 708 } 709 /*test ucnv_getDiaplayName with error condition*/ 710 err= U_ILLEGAL_ARGUMENT_ERROR; 711 len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err); 712 if( len !=0 ){ 713 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n"); 714 } 715 /*test ucnv_getDiaplayName with error condition*/ 716 err=U_ZERO_ERROR; 717 len=ucnv_getDisplayName(NULL,locale,NULL,0, &err); 718 if( len !=0 || U_SUCCESS(err)){ 719 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n"); 720 } 721 err=U_ZERO_ERROR; 722 723 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/ 724 ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context); 725 726 log_verbose("\n---Testing ucnv_setFromUCallBack...\n"); 727 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 728 if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context) 729 { 730 log_err("FAILURE! %s\n", myErrorName(err)); 731 } 732 733 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 734 if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM) 735 log_err("get From UCallBack failed\n"); 736 else 737 log_verbose("get From UCallBack ok\n"); 738 739 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n"); 740 ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err); 741 if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM) 742 { 743 log_err("FAILURE! %s\n", myErrorName(err)); 744 } 745 746 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 747 if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context) 748 log_err("get From UCallBack action failed\n"); 749 else 750 log_verbose("get From UCallBack action ok\n"); 751 752 /*testing ucnv_setToUCallBack with error conditions*/ 753 err=U_ILLEGAL_ARGUMENT_ERROR; 754 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n"); 755 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 756 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 757 if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){ 758 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 759 } 760 err=U_ZERO_ERROR; 761 762 763 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/ 764 ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context); 765 766 log_verbose("\n---Testing setTo UCallBack...\n"); 767 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err); 768 if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context) 769 { 770 log_err("FAILURE! %s\n", myErrorName(err)); 771 } 772 773 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 774 if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM) 775 log_err("To UCallBack failed\n"); 776 else 777 log_verbose("To UCallBack ok\n"); 778 779 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n"); 780 ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err); 781 if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM) 782 { log_err("FAILURE! %s\n", myErrorName(err)); } 783 784 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 785 if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context) 786 log_err("To UCallBack failed\n"); 787 else 788 log_verbose("To UCallBack ok\n"); 789 790 /*testing ucnv_setToUCallBack with error conditions*/ 791 err=U_ILLEGAL_ARGUMENT_ERROR; 792 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n"); 793 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err); 794 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 795 if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){ 796 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 797 } 798 err=U_ZERO_ERROR; 799 800 801 /*getcodepageid testing ucnv_getCCSID() */ 802 log_verbose("\n----Testing getCCSID....\n"); 803 cp = ucnv_getCCSID(myConverter,&err); 804 if (U_FAILURE(err)) 805 { 806 log_err("FAILURE!..... %s\n", myErrorName(err)); 807 } 808 if (cp != CodePageNumberToTest[codepage_index]) 809 log_err("Codepage number test failed\n"); 810 else 811 log_verbose("Codepage number test OK\n"); 812 813 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/ 814 err=U_ILLEGAL_ARGUMENT_ERROR; 815 if( ucnv_getCCSID(myConverter,&err) != -1){ 816 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n"); 817 } 818 err=U_ZERO_ERROR; 819 820 /*getCodepagePlatform testing ucnv_getPlatform()*/ 821 log_verbose("\n---Testing getCodepagePlatform ..\n"); 822 if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err)) 823 log_err("Platform codepage test failed\n"); 824 else 825 log_verbose("Platform codepage test ok\n"); 826 827 if (U_FAILURE(err)) 828 { 829 log_err("FAILURE! %s\n", myErrorName(err)); 830 } 831 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/ 832 err= U_ILLEGAL_ARGUMENT_ERROR; 833 if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){ 834 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n"); 835 } 836 err=U_ZERO_ERROR; 837 838 839 /*Reads the BOM*/ 840 { 841 // Note: gcc produces a compile warning if the return value from fread() is ignored. 842 size_t numRead = fread(&BOM, sizeof(UChar), 1, ucs_file_in); 843 (void)numRead; 844 } 845 if (BOM!=0xFEFF && BOM!=0xFFFE) 846 { 847 log_err("File Missing BOM...Bailing!\n"); 848 fclose(ucs_file_in); 849 break; 850 } 851 852 853 /*Reads in the file*/ 854 while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in))) 855 { 856 myUChar = ucs_file_buffer[i-1]; 857 858 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/ 859 } 860 861 myUChar = ucs_file_buffer[i-1]; 862 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/ 863 864 865 /*testing ucnv_fromUChars() and ucnv_toUChars() */ 866 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/ 867 868 uchar1=(UChar*)malloc(sizeof(UChar) * (i+1)); 869 u_uastrcpy(uchar1,""); 870 u_strncpy(uchar1,ucs_file_buffer,i); 871 uchar1[i] = 0; 872 873 uchar3=(UChar*)malloc(sizeof(UChar)*(i+1)); 874 u_uastrcpy(uchar3,""); 875 u_strncpy(uchar3,ucs_file_buffer,i); 876 uchar3[i] = 0; 877 878 /*Calls the Conversion Routine */ 879 testLong1 = MAX_FILE_LEN; 880 log_verbose("\n---Testing ucnv_fromUChars()\n"); 881 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 882 if (U_FAILURE(err)) 883 { 884 log_err("\nFAILURE...%s\n", myErrorName(err)); 885 } 886 else 887 log_verbose(" ucnv_fromUChars() o.k.\n"); 888 889 /*test the conversion routine */ 890 log_verbose("\n---Testing ucnv_toUChars()\n"); 891 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */ 892 targetcapacity2=0; 893 targetsize = ucnv_toUChars(myConverter, 894 NULL, 895 targetcapacity2, 896 output_cp_buffer, 897 strlen(output_cp_buffer), 898 &err); 899 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/ 900 901 if(err==U_BUFFER_OVERFLOW_ERROR) 902 { 903 err=U_ZERO_ERROR; 904 uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar)); 905 targetsize = ucnv_toUChars(myConverter, 906 uchar2, 907 targetsize+1, 908 output_cp_buffer, 909 strlen(output_cp_buffer), 910 &err); 911 912 if(U_FAILURE(err)) 913 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err)); 914 else 915 log_verbose(" ucnv_toUChars() o.k.\n"); 916 917 if(u_strcmp(uchar1,uchar2)!=0) 918 log_err("equality test failed with conversion routine\n"); 919 } 920 else 921 { 922 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n"); 923 } 924 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/ 925 err=U_ILLEGAL_ARGUMENT_ERROR; 926 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n"); 927 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 928 if (targetcapacity !=0) { 929 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 930 } 931 err=U_ZERO_ERROR; 932 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n"); 933 targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err); 934 if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) { 935 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n"); 936 } 937 err=U_ZERO_ERROR; 938 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n"); 939 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err); 940 if (targetcapacity !=0) { 941 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n"); 942 } 943 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n"); 944 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err); 945 if (err != U_BUFFER_OVERFLOW_ERROR) { 946 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n"); 947 } 948 /*toUChars with error conditions*/ 949 targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err); 950 if(targetsize != 0){ 951 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 952 } 953 err=U_ZERO_ERROR; 954 targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err); 955 if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){ 956 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n"); 957 } 958 err=U_ZERO_ERROR; 959 targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err); 960 if (targetsize !=0) { 961 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n"); 962 } 963 targetcapacity2=0; 964 targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err); 965 if (err != U_STRING_NOT_TERMINATED_WARNING) { 966 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n", 967 u_errorName(err)); 968 } 969 err=U_ZERO_ERROR; 970 /*-----*/ 971 972 973 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */ 974 /*Clean up re-usable vars*/ 975 log_verbose("Testing ucnv_fromUnicode().....\n"); 976 tmp_ucs_buf=ucs_file_buffer_use; 977 ucnv_fromUnicode(myConverter, &mytarget_1, 978 mytarget + MAX_FILE_LEN, 979 &tmp_ucs_buf, 980 ucs_file_buffer_use+i, 981 NULL, 982 TRUE, 983 &err); 984 consumedUni = (UChar*)tmp_consumedUni; 985 (void)consumedUni; /* Suppress set but not used warning. */ 986 987 if (U_FAILURE(err)) 988 { 989 log_err("FAILURE! %s\n", myErrorName(err)); 990 } 991 else 992 log_verbose("ucnv_fromUnicode() o.k.\n"); 993 994 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */ 995 log_verbose("Testing ucnv_toUnicode().....\n"); 996 tmp_mytarget_use=mytarget_use; 997 tmp_consumed = consumed; 998 ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1, 999 my_ucs_file_buffer + MAX_FILE_LEN, 1000 &tmp_mytarget_use, 1001 mytarget_use + (mytarget_1 - mytarget), 1002 NULL, 1003 FALSE, 1004 &err); 1005 consumed = (char*)tmp_consumed; 1006 if (U_FAILURE(err)) 1007 { 1008 log_err("FAILURE! %s\n", myErrorName(err)); 1009 } 1010 else 1011 log_verbose("ucnv_toUnicode() o.k.\n"); 1012 1013 1014 log_verbose("\n---Testing RoundTrip ...\n"); 1015 1016 1017 u_strncpy(uchar3, my_ucs_file_buffer,i); 1018 uchar3[i] = 0; 1019 1020 if(u_strcmp(uchar1,uchar3)==0) 1021 log_verbose("Equality test o.k.\n"); 1022 else 1023 log_err("Equality test failed\n"); 1024 1025 /*sanity compare */ 1026 if(uchar2 == NULL) 1027 { 1028 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__); 1029 } 1030 else 1031 { 1032 if(u_strcmp(uchar2, uchar3)==0) 1033 log_verbose("Equality test o.k.\n"); 1034 else 1035 log_err("Equality test failed\n"); 1036 } 1037 1038 fclose(ucs_file_in); 1039 ucnv_close(myConverter); 1040 if (uchar1 != 0) free(uchar1); 1041 if (uchar2 != 0) free(uchar2); 1042 if (uchar3 != 0) free(uchar3); 1043 } 1044 1045 free((void*)mytarget); 1046 free((void*)output_cp_buffer); 1047 free((void*)ucs_file_buffer); 1048 free((void*)my_ucs_file_buffer); 1049 #endif 1050 } 1051 1052 #if !UCONFIG_NO_LEGACY_CONVERSION 1053 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA) 1054 { 1055 return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP; 1056 } 1057 1058 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA) 1059 { 1060 return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP; 1061 } 1062 #endif 1063 1064 static void TestFlushCache(void) { 1065 #if !UCONFIG_NO_LEGACY_CONVERSION 1066 UErrorCode err = U_ZERO_ERROR; 1067 UConverter* someConverters[5]; 1068 int flushCount = 0; 1069 1070 /* flush the converter cache to get a consistent state before the flushing is tested */ 1071 ucnv_flushCache(); 1072 1073 /*Testing ucnv_open()*/ 1074 /* Note: These converters have been chosen because they do NOT 1075 encode the Latin characters (U+0041, ...), and therefore are 1076 highly unlikely to be chosen as system default codepages */ 1077 1078 someConverters[0] = ucnv_open("ibm-1047", &err); 1079 if (U_FAILURE(err)) { 1080 log_data_err("FAILURE! %s\n", myErrorName(err)); 1081 } 1082 1083 someConverters[1] = ucnv_open("ibm-1047", &err); 1084 if (U_FAILURE(err)) { 1085 log_data_err("FAILURE! %s\n", myErrorName(err)); 1086 } 1087 1088 someConverters[2] = ucnv_open("ibm-1047", &err); 1089 if (U_FAILURE(err)) { 1090 log_data_err("FAILURE! %s\n", myErrorName(err)); 1091 } 1092 1093 someConverters[3] = ucnv_open("gb18030", &err); 1094 if (U_FAILURE(err)) { 1095 log_data_err("FAILURE! %s\n", myErrorName(err)); 1096 } 1097 1098 someConverters[4] = ucnv_open("ibm-954", &err); 1099 if (U_FAILURE(err)) { 1100 log_data_err("FAILURE! %s\n", myErrorName(err)); 1101 } 1102 1103 1104 /* Testing ucnv_flushCache() */ 1105 log_verbose("\n---Testing ucnv_flushCache...\n"); 1106 if ((flushCount=ucnv_flushCache())==0) 1107 log_verbose("Flush cache ok\n"); 1108 else 1109 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1110 1111 /*testing ucnv_close() and ucnv_flushCache() */ 1112 ucnv_close(someConverters[0]); 1113 ucnv_close(someConverters[1]); 1114 1115 if ((flushCount=ucnv_flushCache())==0) 1116 log_verbose("Flush cache ok\n"); 1117 else 1118 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1119 1120 ucnv_close(someConverters[2]); 1121 ucnv_close(someConverters[3]); 1122 1123 if ((flushCount=ucnv_flushCache())==2) 1124 log_verbose("Flush cache ok\n"); /*because first, second and third are same */ 1125 else 1126 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n", 1127 __LINE__, 1128 flushCount); 1129 1130 ucnv_close(someConverters[4]); 1131 if ( (flushCount=ucnv_flushCache())==1) 1132 log_verbose("Flush cache ok\n"); 1133 else 1134 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount); 1135 #endif 1136 } 1137 1138 /** 1139 * Test the converter alias API, specifically the fuzzy matching of 1140 * alias names and the alias table integrity. Make sure each 1141 * converter has at least one alias (itself), and that its listed 1142 * aliases map back to itself. Check some hard-coded UTF-8 and 1143 * ISO_2022 aliases to make sure they work. 1144 */ 1145 static void TestAlias() { 1146 int32_t i, ncnv; 1147 UErrorCode status = U_ZERO_ERROR; 1148 1149 /* Predetermined aliases that we expect to map back to ISO_2022 1150 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */ 1151 const char* ISO_2022_NAMES[] = 1152 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2", 1153 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"}; 1154 int32_t ISO_2022_NAMES_LENGTH = 1155 sizeof(ISO_2022_NAMES) / sizeof(ISO_2022_NAMES[0]); 1156 const char *UTF8_NAMES[] = 1157 { "UTF-8", "utf-8", "utf8", "ibm-1208", 1158 "utf_8", "ibm1208", "cp1208" }; 1159 int32_t UTF8_NAMES_LENGTH = 1160 sizeof(UTF8_NAMES) / sizeof(UTF8_NAMES[0]); 1161 1162 struct { 1163 const char *name; 1164 const char *alias; 1165 } CONVERTERS_NAMES[] = { 1166 { "UTF-32BE", "UTF32_BigEndian" }, 1167 { "UTF-32LE", "UTF32_LittleEndian" }, 1168 { "UTF-32", "ISO-10646-UCS-4" }, 1169 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" }, 1170 { "UTF-32", "ucs-4" } 1171 }; 1172 int32_t CONVERTERS_NAMES_LENGTH = sizeof(CONVERTERS_NAMES) / sizeof(*CONVERTERS_NAMES); 1173 1174 /* When there are bugs in gencnval or in ucnv_io, converters can 1175 appear to have no aliases. */ 1176 ncnv = ucnv_countAvailable(); 1177 log_verbose("%d converters\n", ncnv); 1178 for (i=0; i<ncnv; ++i) { 1179 const char *name = ucnv_getAvailableName(i); 1180 const char *alias0; 1181 uint16_t na = ucnv_countAliases(name, &status); 1182 uint16_t j; 1183 UConverter *cnv; 1184 1185 if (na == 0) { 1186 log_err("FAIL: Converter \"%s\" (i=%d)" 1187 " has no aliases; expect at least one\n", 1188 name, i); 1189 continue; 1190 } 1191 cnv = ucnv_open(name, &status); 1192 if (U_FAILURE(status)) { 1193 log_data_err("FAIL: Converter \"%s\" (i=%d)" 1194 " can't be opened.\n", 1195 name, i); 1196 } 1197 else { 1198 if (strcmp(ucnv_getName(cnv, &status), name) != 0 1199 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) { 1200 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. " 1201 "The should be the same\n", 1202 name, ucnv_getName(cnv, &status)); 1203 } 1204 } 1205 ucnv_close(cnv); 1206 1207 status = U_ZERO_ERROR; 1208 alias0 = ucnv_getAlias(name, 0, &status); 1209 for (j=1; j<na; ++j) { 1210 const char *alias; 1211 /* Make sure each alias maps back to the the same list of 1212 aliases. Assume that if alias 0 is the same, the whole 1213 list is the same (this should always be true). */ 1214 const char *mapBack; 1215 1216 status = U_ZERO_ERROR; 1217 alias = ucnv_getAlias(name, j, &status); 1218 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1219 log_err("FAIL: Converter \"%s\"is ambiguous\n", name); 1220 } 1221 1222 if (alias == NULL) { 1223 log_err("FAIL: Converter \"%s\" -> " 1224 "alias[%d]=NULL\n", 1225 name, j); 1226 continue; 1227 } 1228 1229 mapBack = ucnv_getAlias(alias, 0, &status); 1230 1231 if (mapBack == NULL) { 1232 log_err("FAIL: Converter \"%s\" -> " 1233 "alias[%d]=\"%s\" -> " 1234 "alias[0]=NULL, exp. \"%s\"\n", 1235 name, j, alias, alias0); 1236 continue; 1237 } 1238 1239 if (0 != strcmp(alias0, mapBack)) { 1240 int32_t idx; 1241 UBool foundAlias = FALSE; 1242 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1243 /* Make sure that we only get this mismapping when there is 1244 an ambiguous alias, and the other converter has this alias too. */ 1245 for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) { 1246 if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) { 1247 foundAlias = TRUE; 1248 break; 1249 } 1250 } 1251 } 1252 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */ 1253 1254 if (!foundAlias) { 1255 log_err("FAIL: Converter \"%s\" -> " 1256 "alias[%d]=\"%s\" -> " 1257 "alias[0]=\"%s\", exp. \"%s\"\n", 1258 name, j, alias, mapBack, alias0); 1259 } 1260 } 1261 } 1262 } 1263 1264 1265 /* Check a list of predetermined aliases that we expect to map 1266 * back to ISO_2022 and UTF-8. */ 1267 for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) { 1268 const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status); 1269 if(!mapBack) { 1270 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]); 1271 continue; 1272 } 1273 if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) { 1274 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n", 1275 ISO_2022_NAMES[i], mapBack); 1276 } 1277 } 1278 1279 1280 for (i=1; i<UTF8_NAMES_LENGTH; ++i) { 1281 const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status); 1282 if(!mapBack) { 1283 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]); 1284 continue; 1285 } 1286 if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) { 1287 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n", 1288 UTF8_NAMES[i], mapBack); 1289 } 1290 } 1291 1292 /* 1293 * Check a list of predetermined aliases that we expect to map 1294 * back to predermined converter names. 1295 */ 1296 1297 for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) { 1298 const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status); 1299 if(!mapBack) { 1300 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name); 1301 continue; 1302 } 1303 if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) { 1304 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n", 1305 CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name); 1306 } 1307 } 1308 1309 } 1310 1311 static void TestDuplicateAlias(void) { 1312 const char *alias; 1313 UErrorCode status = U_ZERO_ERROR; 1314 1315 status = U_ZERO_ERROR; 1316 alias = ucnv_getStandardName("Shift_JIS", "IBM", &status); 1317 if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1318 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); 1319 } 1320 status = U_ZERO_ERROR; 1321 alias = ucnv_getStandardName("ibm-943", "IANA", &status); 1322 if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1323 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias); 1324 } 1325 status = U_ZERO_ERROR; 1326 alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status); 1327 if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) { 1328 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias); 1329 } 1330 } 1331 1332 1333 /* Test safe clone callback */ 1334 1335 static uint32_t TSCC_nextSerial() 1336 { 1337 static uint32_t n = 1; 1338 1339 return (n++); 1340 } 1341 1342 typedef struct 1343 { 1344 uint32_t magic; /* 0xC0FFEE to identify that the object is OK */ 1345 uint32_t serial; /* minted from nextSerial, above */ 1346 UBool wasClosed; /* close happened on the object */ 1347 } TSCCContext; 1348 1349 static TSCCContext *TSCC_clone(TSCCContext *ctx) 1350 { 1351 TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext)); 1352 1353 newCtx->serial = TSCC_nextSerial(); 1354 newCtx->wasClosed = 0; 1355 newCtx->magic = 0xC0FFEE; 1356 1357 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial); 1358 1359 return newCtx; 1360 } 1361 1362 #if !UCONFIG_NO_LEGACY_CONVERSION 1363 static void TSCC_fromU(const void *context, 1364 UConverterFromUnicodeArgs *fromUArgs, 1365 const UChar* codeUnits, 1366 int32_t length, 1367 UChar32 codePoint, 1368 UConverterCallbackReason reason, 1369 UErrorCode * err) 1370 { 1371 TSCCContext *ctx = (TSCCContext*)context; 1372 UConverterFromUCallback junkFrom; 1373 1374 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter); 1375 1376 if(ctx->magic != 0xC0FFEE) { 1377 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1378 return; 1379 } 1380 1381 if(reason == UCNV_CLONE) { 1382 UErrorCode subErr = U_ZERO_ERROR; 1383 TSCCContext *newCtx; 1384 TSCCContext *junkCtx; 1385 TSCCContext **pjunkCtx = &junkCtx; 1386 1387 /* "recreate" it */ 1388 log_verbose("TSCC_fromU: cloning..\n"); 1389 newCtx = TSCC_clone(ctx); 1390 1391 if(newCtx == NULL) { 1392 log_err("TSCC_fromU: internal clone failed on %p\n", ctx); 1393 } 1394 1395 /* now, SET it */ 1396 ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1397 ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1398 1399 if(U_FAILURE(subErr)) { 1400 *err = subErr; 1401 } 1402 } 1403 1404 if(reason == UCNV_CLOSE) { 1405 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial); 1406 ctx->wasClosed = TRUE; 1407 } 1408 } 1409 1410 static void TSCC_toU(const void *context, 1411 UConverterToUnicodeArgs *toUArgs, 1412 const char* codeUnits, 1413 int32_t length, 1414 UConverterCallbackReason reason, 1415 UErrorCode * err) 1416 { 1417 TSCCContext *ctx = (TSCCContext*)context; 1418 UConverterToUCallback junkFrom; 1419 1420 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter); 1421 1422 if(ctx->magic != 0xC0FFEE) { 1423 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1424 return; 1425 } 1426 1427 if(reason == UCNV_CLONE) { 1428 UErrorCode subErr = U_ZERO_ERROR; 1429 TSCCContext *newCtx; 1430 TSCCContext *junkCtx; 1431 TSCCContext **pjunkCtx = &junkCtx; 1432 1433 /* "recreate" it */ 1434 log_verbose("TSCC_toU: cloning..\n"); 1435 newCtx = TSCC_clone(ctx); 1436 1437 if(newCtx == NULL) { 1438 log_err("TSCC_toU: internal clone failed on %p\n", ctx); 1439 } 1440 1441 /* now, SET it */ 1442 ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1443 ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1444 1445 if(U_FAILURE(subErr)) { 1446 *err = subErr; 1447 } 1448 } 1449 1450 if(reason == UCNV_CLOSE) { 1451 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial); 1452 ctx->wasClosed = TRUE; 1453 } 1454 } 1455 1456 static void TSCC_init(TSCCContext *q) 1457 { 1458 q->magic = 0xC0FFEE; 1459 q->serial = TSCC_nextSerial(); 1460 q->wasClosed = 0; 1461 } 1462 1463 static void TSCC_print_log(TSCCContext *q, const char *name) 1464 { 1465 if(q==NULL) { 1466 log_verbose("TSCContext: %s is NULL!!\n", name); 1467 } else { 1468 if(q->magic != 0xC0FFEE) { 1469 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n", 1470 q,q->serial, q->magic); 1471 } 1472 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n", 1473 q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open"); 1474 } 1475 } 1476 1477 static void TestConvertSafeCloneCallback() 1478 { 1479 UErrorCode err = U_ZERO_ERROR; 1480 TSCCContext from1, to1; 1481 TSCCContext *from2, *from3, *to2, *to3; 1482 TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3; 1483 char hunk[8192]; 1484 int32_t hunkSize = 8192; 1485 UConverterFromUCallback junkFrom; 1486 UConverterToUCallback junkTo; 1487 UConverter *conv1, *conv2 = NULL; 1488 1489 conv1 = ucnv_open("iso-8859-3", &err); 1490 1491 if(U_FAILURE(err)) { 1492 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err)); 1493 return; 1494 } 1495 1496 log_verbose("Opened conv1=%p\n", conv1); 1497 1498 TSCC_init(&from1); 1499 TSCC_init(&to1); 1500 1501 TSCC_print_log(&from1, "from1"); 1502 TSCC_print_log(&to1, "to1"); 1503 1504 ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err); 1505 log_verbose("Set from1 on conv1\n"); 1506 TSCC_print_log(&from1, "from1"); 1507 1508 ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err); 1509 log_verbose("Set to1 on conv1\n"); 1510 TSCC_print_log(&to1, "to1"); 1511 1512 conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err); 1513 if(U_FAILURE(err)) { 1514 log_err("safeClone failed: %s\n", u_errorName(err)); 1515 return; 1516 } 1517 log_verbose("Cloned to conv2=%p.\n", conv2); 1518 1519 /********** from *********************/ 1520 ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2); 1521 ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3); 1522 1523 TSCC_print_log(from2, "from2"); 1524 TSCC_print_log(from3, "from3(==from1)"); 1525 1526 if(from2 == NULL) { 1527 log_err("FAIL! from2 is null \n"); 1528 return; 1529 } 1530 1531 if(from3 == NULL) { 1532 log_err("FAIL! from3 is null \n"); 1533 return; 1534 } 1535 1536 if(from3 != (&from1) ) { 1537 log_err("FAIL! conv1's FROM context changed!\n"); 1538 } 1539 1540 if(from2 == (&from1) ) { 1541 log_err("FAIL! conv1's FROM context is the same as conv2's!\n"); 1542 } 1543 1544 if(from1.wasClosed) { 1545 log_err("FAIL! from1 is closed \n"); 1546 } 1547 1548 if(from2->wasClosed) { 1549 log_err("FAIL! from2 was closed\n"); 1550 } 1551 1552 /********** to *********************/ 1553 ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2); 1554 ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3); 1555 1556 TSCC_print_log(to2, "to2"); 1557 TSCC_print_log(to3, "to3(==to1)"); 1558 1559 if(to2 == NULL) { 1560 log_err("FAIL! to2 is null \n"); 1561 return; 1562 } 1563 1564 if(to3 == NULL) { 1565 log_err("FAIL! to3 is null \n"); 1566 return; 1567 } 1568 1569 if(to3 != (&to1) ) { 1570 log_err("FAIL! conv1's TO context changed!\n"); 1571 } 1572 1573 if(to2 == (&to1) ) { 1574 log_err("FAIL! conv1's TO context is the same as conv2's!\n"); 1575 } 1576 1577 if(to1.wasClosed) { 1578 log_err("FAIL! to1 is closed \n"); 1579 } 1580 1581 if(to2->wasClosed) { 1582 log_err("FAIL! to2 was closed\n"); 1583 } 1584 1585 /*************************************/ 1586 1587 ucnv_close(conv1); 1588 log_verbose("ucnv_closed (conv1)\n"); 1589 TSCC_print_log(&from1, "from1"); 1590 TSCC_print_log(from2, "from2"); 1591 TSCC_print_log(&to1, "to1"); 1592 TSCC_print_log(to2, "to2"); 1593 1594 if(from1.wasClosed == FALSE) { 1595 log_err("FAIL! from1 is NOT closed \n"); 1596 } 1597 1598 if(from2->wasClosed) { 1599 log_err("FAIL! from2 was closed\n"); 1600 } 1601 1602 if(to1.wasClosed == FALSE) { 1603 log_err("FAIL! to1 is NOT closed \n"); 1604 } 1605 1606 if(to2->wasClosed) { 1607 log_err("FAIL! to2 was closed\n"); 1608 } 1609 1610 ucnv_close(conv2); 1611 log_verbose("ucnv_closed (conv2)\n"); 1612 1613 TSCC_print_log(&from1, "from1"); 1614 TSCC_print_log(from2, "from2"); 1615 1616 if(from1.wasClosed == FALSE) { 1617 log_err("FAIL! from1 is NOT closed \n"); 1618 } 1619 1620 if(from2->wasClosed == FALSE) { 1621 log_err("FAIL! from2 was NOT closed\n"); 1622 } 1623 1624 TSCC_print_log(&to1, "to1"); 1625 TSCC_print_log(to2, "to2"); 1626 1627 if(to1.wasClosed == FALSE) { 1628 log_err("FAIL! to1 is NOT closed \n"); 1629 } 1630 1631 if(to2->wasClosed == FALSE) { 1632 log_err("FAIL! to2 was NOT closed\n"); 1633 } 1634 1635 if(to2 != (&to1)) { 1636 free(to2); /* to1 is stack based */ 1637 } 1638 if(from2 != (&from1)) { 1639 free(from2); /* from1 is stack based */ 1640 } 1641 } 1642 #endif 1643 1644 static UBool 1645 containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) { 1646 while(length>0) { 1647 if(*p!=b) { 1648 return TRUE; 1649 } 1650 ++p; 1651 --length; 1652 } 1653 return FALSE; 1654 } 1655 1656 static void TestConvertSafeClone() 1657 { 1658 /* one 'regular' & all the 'private stateful' converters */ 1659 static const char *const names[] = { 1660 #if !UCONFIG_NO_LEGACY_CONVERSION 1661 "ibm-1047", 1662 "ISO_2022,locale=zh,version=1", 1663 #endif 1664 "SCSU", 1665 #if !UCONFIG_NO_LEGACY_CONVERSION 1666 "HZ", 1667 "lmbcs", 1668 "ISCII,version=0", 1669 "ISO_2022,locale=kr,version=1", 1670 "ISO_2022,locale=jp,version=2", 1671 #endif 1672 "BOCU-1", 1673 "UTF-7", 1674 #if !UCONFIG_NO_LEGACY_CONVERSION 1675 "IMAP-mailbox-name", 1676 "ibm-1047-s390" 1677 #else 1678 "IMAP=mailbox-name" 1679 #endif 1680 }; 1681 1682 /* store the actual sizes of each converter */ 1683 int32_t actualSizes[LENGTHOF(names)]; 1684 1685 static const int32_t bufferSizes[] = { 1686 U_CNV_SAFECLONE_BUFFERSIZE, 1687 (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */ 1688 (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */ 1689 }; 1690 1691 char charBuffer[21]; /* Leave at an odd number for alignment testing */ 1692 uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE]; 1693 int32_t bufferSize, maxBufferSize; 1694 const char *maxName; 1695 UConverter * cnv, *cnv2; 1696 UErrorCode err; 1697 1698 char *pCharBuffer; 1699 const char *pConstCharBuffer; 1700 const char *charBufferLimit = charBuffer + sizeof(charBuffer)/sizeof(*charBuffer); 1701 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */ 1702 UChar uniCharBuffer[20]; 1703 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 }; 1704 const char *pCharSource = charSourceBuffer; 1705 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer); 1706 UChar *pUCharTarget = uniCharBuffer; 1707 UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer); 1708 const UChar * pUniBuffer; 1709 const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer); 1710 int32_t idx, j; 1711 1712 err = U_ZERO_ERROR; 1713 cnv = ucnv_open(names[0], &err); 1714 if(U_SUCCESS(err)) { 1715 /* Check the various error & informational states: */ 1716 1717 /* Null status - just returns NULL */ 1718 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1719 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, NULL)) 1720 { 1721 log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); 1722 } 1723 /* error status - should return 0 & keep error the same */ 1724 err = U_MEMORY_ALLOCATION_ERROR; 1725 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) 1726 { 1727 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); 1728 } 1729 err = U_ZERO_ERROR; 1730 1731 /* Null buffer size pointer is ok */ 1732 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], NULL, &err)) || U_FAILURE(err)) 1733 { 1734 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); 1735 } 1736 ucnv_close(cnv2); 1737 err = U_ZERO_ERROR; 1738 1739 /* buffer size pointer is 0 - fill in pbufferSize with a size */ 1740 bufferSize = 0; 1741 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) 1742 { 1743 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); 1744 } 1745 /* Verify our define is large enough */ 1746 if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize) 1747 { 1748 log_err("FAIL: Pre-calculated buffer size is too small\n"); 1749 } 1750 /* Verify we can use this run-time calculated size */ 1751 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) 1752 { 1753 log_err("FAIL: Converter can't be cloned with run-time size\n"); 1754 } 1755 if (cnv2) { 1756 ucnv_close(cnv2); 1757 } 1758 1759 /* size one byte too small - should allocate & let us know */ 1760 --bufferSize; 1761 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1762 { 1763 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); 1764 } 1765 if (cnv2) { 1766 ucnv_close(cnv2); 1767 } 1768 1769 err = U_ZERO_ERROR; 1770 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1771 1772 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ 1773 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1774 { 1775 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); 1776 } 1777 if (cnv2) { 1778 ucnv_close(cnv2); 1779 } 1780 1781 err = U_ZERO_ERROR; 1782 1783 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ 1784 if (NULL != ucnv_safeClone(NULL, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) 1785 { 1786 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); 1787 } 1788 1789 ucnv_close(cnv); 1790 } 1791 1792 maxBufferSize = 0; 1793 maxName = ""; 1794 1795 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/ 1796 1797 for(j = 0; j < LENGTHOF(bufferSizes); ++j) { 1798 for (idx = 0; idx < LENGTHOF(names); idx++) 1799 { 1800 err = U_ZERO_ERROR; 1801 cnv = ucnv_open(names[idx], &err); 1802 if(U_FAILURE(err)) { 1803 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err)); 1804 continue; 1805 } 1806 1807 if(j == 0) { 1808 /* preflight to get maxBufferSize */ 1809 actualSizes[idx] = 0; 1810 ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err); 1811 if(actualSizes[idx] > maxBufferSize) { 1812 maxBufferSize = actualSizes[idx]; 1813 maxName = names[idx]; 1814 } 1815 } 1816 1817 memset(buffer, 0xaa, sizeof(buffer)); 1818 1819 bufferSize = bufferSizes[j]; 1820 cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err); 1821 1822 /* close the original immediately to make sure that the clone works by itself */ 1823 ucnv_close(cnv); 1824 1825 if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) && 1826 err == U_SAFECLONE_ALLOCATED_WARNING 1827 ) { 1828 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]); 1829 } 1830 1831 /* check if the clone function overwrote any bytes that it is not supposed to touch */ 1832 if(bufferSize <= bufferSizes[j]) { 1833 /* used the stack buffer */ 1834 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) || 1835 containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa) 1836 ) { 1837 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n", 1838 names[idx], bufferSize, bufferSizes[j]); 1839 } 1840 } else { 1841 /* heap-allocated the clone */ 1842 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) { 1843 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n", 1844 names[idx], bufferSize, bufferSizes[j]); 1845 } 1846 } 1847 1848 pCharBuffer = charBuffer; 1849 pUniBuffer = uniBuffer; 1850 1851 ucnv_fromUnicode(cnv2, 1852 &pCharBuffer, 1853 charBufferLimit, 1854 &pUniBuffer, 1855 uniBufferLimit, 1856 NULL, 1857 TRUE, 1858 &err); 1859 if(U_FAILURE(err)){ 1860 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err)); 1861 } 1862 ucnv_toUnicode(cnv2, 1863 &pUCharTarget, 1864 pUCharTargetLimit, 1865 &pCharSource, 1866 pCharSourceLimit, 1867 NULL, 1868 TRUE, 1869 &err 1870 ); 1871 1872 if(U_FAILURE(err)){ 1873 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err)); 1874 } 1875 1876 pConstCharBuffer = charBuffer; 1877 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err)) 1878 { 1879 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err)); 1880 } 1881 ucnv_close(cnv2); 1882 } 1883 } 1884 1885 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1886 sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1887 if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) { 1888 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1889 maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1890 } 1891 } 1892 1893 static void TestCCSID() { 1894 #if !UCONFIG_NO_LEGACY_CONVERSION 1895 UConverter *cnv; 1896 UErrorCode errorCode; 1897 int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 }; 1898 int32_t i, ccsid; 1899 1900 for(i=0; i<(int32_t)(sizeof(ccsids)/sizeof(int32_t)); ++i) { 1901 ccsid=ccsids[i]; 1902 1903 errorCode=U_ZERO_ERROR; 1904 cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode); 1905 if(U_FAILURE(errorCode)) { 1906 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode)); 1907 continue; 1908 } 1909 1910 if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) { 1911 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode)); 1912 } 1913 1914 /* skip gb18030(ccsid 1392) */ 1915 if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) { 1916 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode)); 1917 } 1918 1919 ucnv_close(cnv); 1920 } 1921 #endif 1922 } 1923 1924 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */ 1925 1926 /* CHUNK_SIZE defined in common\ucnv.c: */ 1927 #define CHUNK_SIZE 1024 1928 1929 static void bug1(void); 1930 static void bug2(void); 1931 static void bug3(void); 1932 1933 static void 1934 TestJ932(void) 1935 { 1936 bug1(); /* Unicode intermediate buffer straddle bug */ 1937 bug2(); /* pre-flighting size incorrect caused by simple overflow */ 1938 bug3(); /* pre-flighting size incorrect caused by expansion overflow */ 1939 } 1940 1941 /* 1942 * jitterbug 932: test chunking boundary conditions in 1943 1944 int32_t ucnv_convert(const char *toConverterName, 1945 const char *fromConverterName, 1946 char *target, 1947 int32_t targetSize, 1948 const char *source, 1949 int32_t sourceSize, 1950 UErrorCode * err) 1951 1952 * See discussions on the icu mailing list in 1953 * 2001-April with the subject "converter 'flush' question". 1954 * 1955 * Bug report and test code provided by Edward J. Batutis. 1956 */ 1957 static void bug1() 1958 { 1959 #if !UCONFIG_NO_LEGACY_CONVERSION 1960 char char_in[CHUNK_SIZE+32]; 1961 char char_out[CHUNK_SIZE*2]; 1962 1963 /* GB 18030 equivalent of U+10000 is 90308130 */ 1964 static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 }; 1965 1966 UErrorCode err = U_ZERO_ERROR; 1967 int32_t i, test_seq_len = sizeof(test_seq); 1968 1969 /* 1970 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward 1971 * until the straddle bug appears. I didn't want to hard-code everything so this test could 1972 * be expanded - however this is the only type of straddle bug I can think of at the moment - 1973 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no 1974 * other Unicode sequences cause a bug since combining sequences are not supported by the 1975 * converters. 1976 */ 1977 1978 for (i = test_seq_len; i >= 0; i--) { 1979 /* put character sequence into input buffer */ 1980 memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */ 1981 memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len); 1982 1983 /* do the conversion */ 1984 ucnv_convert("us-ascii", /* out */ 1985 "gb18030", /* in */ 1986 char_out, 1987 sizeof(char_out), 1988 char_in, 1989 sizeof(char_in), 1990 &err); 1991 1992 /* bug1: */ 1993 if (err == U_TRUNCATED_CHAR_FOUND) { 1994 /* this happens when surrogate pair straddles the intermediate buffer in 1995 * T_UConverter_fromCodepageToCodepage */ 1996 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n"); 1997 } 1998 } 1999 #endif 2000 } 2001 2002 /* bug2: pre-flighting loop bug: simple overflow causes bug */ 2003 static void bug2() 2004 { 2005 /* US-ASCII "1234567890" */ 2006 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 }; 2007 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 }; 2008 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30, 2009 0x00, 0x00, 0x00, 0x31, 2010 0x00, 0x00, 0x00, 0x32, 2011 0x00, 0x00, 0x00, 0x33, 2012 0x00, 0x00, 0x00, 0x34, 2013 0x00, 0x00, 0x00, 0x35, 2014 0x00, 0x00, 0x00, 0x36, 2015 0x00, 0x00, 0x00, 0x37, 2016 0x00, 0x00, 0x00, 0x38, 2017 0x00, 0x00, (char)0xf0, 0x00}; 2018 static char target[5]; 2019 2020 UErrorCode err = U_ZERO_ERROR; 2021 int32_t size; 2022 2023 /* do the conversion */ 2024 size = ucnv_convert("iso-8859-1", /* out */ 2025 "us-ascii", /* in */ 2026 target, 2027 sizeof(target), 2028 source, 2029 sizeof(source), 2030 &err); 2031 2032 if ( size != 10 ) { 2033 /* bug2: size is 5, should be 10 */ 2034 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size); 2035 } 2036 2037 err = U_ZERO_ERROR; 2038 /* do the conversion */ 2039 size = ucnv_convert("UTF-32BE", /* out */ 2040 "UTF-8", /* in */ 2041 target, 2042 sizeof(target), 2043 sourceUTF8, 2044 sizeof(sourceUTF8), 2045 &err); 2046 2047 if ( size != 32 ) { 2048 /* bug2: size is 5, should be 32 */ 2049 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size); 2050 } 2051 2052 err = U_ZERO_ERROR; 2053 /* do the conversion */ 2054 size = ucnv_convert("UTF-8", /* out */ 2055 "UTF-32BE", /* in */ 2056 target, 2057 sizeof(target), 2058 sourceUTF32, 2059 sizeof(sourceUTF32), 2060 &err); 2061 2062 if ( size != 12 ) { 2063 /* bug2: size is 5, should be 12 */ 2064 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size); 2065 } 2066 } 2067 2068 /* 2069 * bug3: when the characters expand going from source to target codepage 2070 * you get bug3 in addition to bug2 2071 */ 2072 static void bug3() 2073 { 2074 #if !UCONFIG_NO_LEGACY_CONVERSION 2075 char char_in[CHUNK_SIZE*4]; 2076 char target[5]; 2077 UErrorCode err = U_ZERO_ERROR; 2078 int32_t size; 2079 2080 /* 2081 * first get the buggy size from bug2 then 2082 * compare it to buggy size with an expansion 2083 */ 2084 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */ 2085 2086 /* do the conversion */ 2087 size = ucnv_convert("lmbcs", /* out */ 2088 "us-ascii", /* in */ 2089 target, 2090 sizeof(target), 2091 char_in, 2092 sizeof(char_in), 2093 &err); 2094 2095 if ( size != sizeof(char_in) ) { 2096 /* 2097 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer 2098 * in the converter?), should be CHUNK_SIZE*4 2099 * 2100 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize... 2101 */ 2102 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size); 2103 } 2104 2105 /* 2106 * now do the conversion with expansion 2107 * ascii 0x08 expands to 0x0F 0x28 in lmbcs 2108 */ 2109 memset(char_in, 8, sizeof(char_in)); 2110 err = U_ZERO_ERROR; 2111 2112 /* do the conversion */ 2113 size = ucnv_convert("lmbcs", /* out */ 2114 "us-ascii", /* in */ 2115 target, 2116 sizeof(target), 2117 char_in, 2118 sizeof(char_in), 2119 &err); 2120 2121 /* expect 2X expansion */ 2122 if ( size != sizeof(char_in) * 2 ) { 2123 /* 2124 * bug3: 2125 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05: 2126 */ 2127 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size); 2128 } 2129 #endif 2130 } 2131 2132 static void 2133 convertExStreaming(UConverter *srcCnv, UConverter *targetCnv, 2134 const char *src, int32_t srcLength, 2135 const char *expectTarget, int32_t expectTargetLength, 2136 int32_t chunkSize, 2137 const char *testName, 2138 UErrorCode expectCode) { 2139 UChar pivotBuffer[CHUNK_SIZE]; 2140 UChar *pivotSource, *pivotTarget; 2141 const UChar *pivotLimit; 2142 2143 char targetBuffer[CHUNK_SIZE]; 2144 char *target; 2145 const char *srcLimit, *finalSrcLimit, *targetLimit; 2146 2147 int32_t targetLength; 2148 2149 UBool flush; 2150 2151 UErrorCode errorCode; 2152 2153 /* setup */ 2154 if(chunkSize>CHUNK_SIZE) { 2155 chunkSize=CHUNK_SIZE; 2156 } 2157 2158 pivotSource=pivotTarget=pivotBuffer; 2159 pivotLimit=pivotBuffer+chunkSize; 2160 2161 finalSrcLimit=src+srcLength; 2162 target=targetBuffer; 2163 targetLimit=targetBuffer+chunkSize; 2164 2165 ucnv_resetToUnicode(srcCnv); 2166 ucnv_resetFromUnicode(targetCnv); 2167 2168 errorCode=U_ZERO_ERROR; 2169 flush=FALSE; 2170 2171 /* convert, streaming-style (both converters and pivot keep state) */ 2172 for(;;) { 2173 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */ 2174 if(src+chunkSize<=finalSrcLimit) { 2175 srcLimit=src+chunkSize; 2176 } else { 2177 srcLimit=finalSrcLimit; 2178 } 2179 ucnv_convertEx(targetCnv, srcCnv, 2180 &target, targetLimit, 2181 &src, srcLimit, 2182 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit, 2183 FALSE, flush, &errorCode); 2184 targetLength=(int32_t)(target-targetBuffer); 2185 if(target>targetLimit) { 2186 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n", 2187 testName, chunkSize, target, targetLimit); 2188 break; /* TODO: major problem! */ 2189 } 2190 if(errorCode==U_BUFFER_OVERFLOW_ERROR) { 2191 /* continue converting another chunk */ 2192 errorCode=U_ZERO_ERROR; 2193 if(targetLength+chunkSize<=sizeof(targetBuffer)) { 2194 targetLimit=target+chunkSize; 2195 } else { 2196 targetLimit=targetBuffer+sizeof(targetBuffer); 2197 } 2198 } else if(U_FAILURE(errorCode)) { 2199 /* failure */ 2200 break; 2201 } else if(flush) { 2202 /* all done */ 2203 break; 2204 } else if(src==finalSrcLimit && pivotSource==pivotTarget) { 2205 /* all consumed, now flush without input (separate from conversion for testing) */ 2206 flush=TRUE; 2207 } 2208 } 2209 2210 if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) { 2211 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n", 2212 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode)); 2213 } else if(targetLength!=expectTargetLength) { 2214 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n", 2215 testName, chunkSize, targetLength, expectTargetLength); 2216 } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) { 2217 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n", 2218 testName, chunkSize); 2219 } 2220 } 2221 2222 static void 2223 convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv, 2224 const char *src, int32_t srcLength, 2225 const char *expectTarget, int32_t expectTargetLength, 2226 const char *testName, 2227 UErrorCode expectCode) { 2228 convertExStreaming(srcCnv, targetCnv, 2229 src, srcLength, 2230 expectTarget, expectTargetLength, 2231 1, testName, expectCode); 2232 convertExStreaming(srcCnv, targetCnv, 2233 src, srcLength, 2234 expectTarget, expectTargetLength, 2235 3, testName, expectCode); 2236 convertExStreaming(srcCnv, targetCnv, 2237 src, srcLength, 2238 expectTarget, expectTargetLength, 2239 7, testName, expectCode); 2240 } 2241 2242 static void TestConvertEx() { 2243 #if !UCONFIG_NO_LEGACY_CONVERSION 2244 static const uint8_t 2245 utf8[]={ 2246 /* 4e00 30a1 ff61 0410 */ 2247 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2248 }, 2249 shiftJIS[]={ 2250 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2251 }, 2252 errorTarget[]={ 2253 /* 2254 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2255 * SUB, SUB, 0x40, SUB, SUB, 0x40 2256 */ 2257 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40 2258 }; 2259 2260 char srcBuffer[100], targetBuffer[100]; 2261 2262 const char *src; 2263 char *target; 2264 2265 UChar pivotBuffer[100]; 2266 UChar *pivotSource, *pivotTarget; 2267 2268 UConverter *cnv1, *cnv2; 2269 UErrorCode errorCode; 2270 2271 errorCode=U_ZERO_ERROR; 2272 cnv1=ucnv_open("UTF-8", &errorCode); 2273 if(U_FAILURE(errorCode)) { 2274 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode)); 2275 return; 2276 } 2277 2278 cnv2=ucnv_open("Shift-JIS", &errorCode); 2279 if(U_FAILURE(errorCode)) { 2280 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2281 ucnv_close(cnv1); 2282 return; 2283 } 2284 2285 /* test ucnv_convertEx() with streaming conversion style */ 2286 convertExMultiStreaming(cnv1, cnv2, 2287 (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS), 2288 "UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2289 2290 convertExMultiStreaming(cnv2, cnv1, 2291 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), 2292 "Shift-JIS -> UTF-8", U_ZERO_ERROR); 2293 2294 /* U_ZERO_ERROR because by default the SUB callbacks are set */ 2295 convertExMultiStreaming(cnv1, cnv2, 2296 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget), 2297 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2298 2299 /* test some simple conversions */ 2300 2301 /* NUL-terminated source and target */ 2302 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2303 memcpy(srcBuffer, utf8, sizeof(utf8)); 2304 srcBuffer[sizeof(utf8)]=0; 2305 src=srcBuffer; 2306 target=targetBuffer; 2307 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2308 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2309 if( errorCode!=U_ZERO_ERROR || 2310 target-targetBuffer!=sizeof(shiftJIS) || 2311 *target!=0 || 2312 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2313 ) { 2314 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n", 2315 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2316 } 2317 2318 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */ 2319 errorCode=U_AMBIGUOUS_ALIAS_WARNING; 2320 memset(targetBuffer, 0xff, sizeof(targetBuffer)); 2321 src=srcBuffer; 2322 target=targetBuffer; 2323 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL, 2324 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2325 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2326 target-targetBuffer!=sizeof(shiftJIS) || 2327 *target!=(char)0xff || 2328 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2329 ) { 2330 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n", 2331 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2332 } 2333 2334 /* bad arguments */ 2335 errorCode=U_MESSAGE_PARSE_ERROR; 2336 src=srcBuffer; 2337 target=targetBuffer; 2338 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2339 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2340 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2341 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2342 } 2343 2344 /* pivotLimit==pivotStart */ 2345 errorCode=U_ZERO_ERROR; 2346 pivotSource=pivotTarget=pivotBuffer; 2347 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2348 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, TRUE, TRUE, &errorCode); 2349 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2350 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode)); 2351 } 2352 2353 /* *pivotSource==NULL */ 2354 errorCode=U_ZERO_ERROR; 2355 pivotSource=NULL; 2356 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2357 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2358 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2359 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode)); 2360 } 2361 2362 /* *source==NULL */ 2363 errorCode=U_ZERO_ERROR; 2364 src=NULL; 2365 pivotSource=pivotBuffer; 2366 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2367 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2368 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2369 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode)); 2370 } 2371 2372 /* streaming conversion without a pivot buffer */ 2373 errorCode=U_ZERO_ERROR; 2374 src=srcBuffer; 2375 pivotSource=pivotBuffer; 2376 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2377 NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode); 2378 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2379 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode)); 2380 } 2381 2382 ucnv_close(cnv1); 2383 ucnv_close(cnv2); 2384 #endif 2385 } 2386 2387 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */ 2388 static const char *const badUTF8[]={ 2389 /* trail byte */ 2390 "\x80", 2391 2392 /* truncated multi-byte sequences */ 2393 "\xd0", 2394 "\xe0", 2395 "\xe1", 2396 "\xed", 2397 "\xee", 2398 "\xf0", 2399 "\xf1", 2400 "\xf4", 2401 "\xf8", 2402 "\xfc", 2403 2404 "\xe0\x80", 2405 "\xe0\xa0", 2406 "\xe1\x80", 2407 "\xed\x80", 2408 "\xed\xa0", 2409 "\xee\x80", 2410 "\xf0\x80", 2411 "\xf0\x90", 2412 "\xf1\x80", 2413 "\xf4\x80", 2414 "\xf4\x90", 2415 "\xf8\x80", 2416 "\xfc\x80", 2417 2418 "\xf0\x80\x80", 2419 "\xf0\x90\x80", 2420 "\xf1\x80\x80", 2421 "\xf4\x80\x80", 2422 "\xf4\x90\x80", 2423 "\xf8\x80\x80", 2424 "\xfc\x80\x80", 2425 2426 "\xf8\x80\x80\x80", 2427 "\xfc\x80\x80\x80", 2428 2429 "\xfc\x80\x80\x80\x80", 2430 2431 /* complete sequences but non-shortest forms or out of range etc. */ 2432 "\xc0\x80", 2433 "\xe0\x80\x80", 2434 "\xed\xa0\x80", 2435 "\xf0\x80\x80\x80", 2436 "\xf4\x90\x80\x80", 2437 "\xf8\x80\x80\x80\x80", 2438 "\xfc\x80\x80\x80\x80\x80", 2439 "\xfe", 2440 "\xff" 2441 }; 2442 2443 #define ARG_CHAR_ARR_SIZE 8 2444 2445 /* get some character that can be converted and convert it */ 2446 static UBool getTestChar(UConverter *cnv, const char *converterName, 2447 char charUTF8[4], int32_t *pCharUTF8Length, 2448 char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length, 2449 char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) { 2450 UChar utf16[U16_MAX_LENGTH]; 2451 int32_t utf16Length; 2452 2453 const UChar *utf16Source; 2454 char *target; 2455 2456 USet *set; 2457 UChar32 c; 2458 UErrorCode errorCode; 2459 2460 errorCode=U_ZERO_ERROR; 2461 set=uset_open(1, 0); 2462 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2463 c=uset_charAt(set, uset_size(set)/2); 2464 uset_close(set); 2465 2466 utf16Length=0; 2467 U16_APPEND_UNSAFE(utf16, utf16Length, c); 2468 *pCharUTF8Length=0; 2469 U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c); 2470 2471 utf16Source=utf16; 2472 target=char0; 2473 ucnv_fromUnicode(cnv, 2474 &target, char0+ARG_CHAR_ARR_SIZE, 2475 &utf16Source, utf16+utf16Length, 2476 NULL, FALSE, &errorCode); 2477 *pChar0Length=(int32_t)(target-char0); 2478 2479 utf16Source=utf16; 2480 target=char1; 2481 ucnv_fromUnicode(cnv, 2482 &target, char1+ARG_CHAR_ARR_SIZE, 2483 &utf16Source, utf16+utf16Length, 2484 NULL, FALSE, &errorCode); 2485 *pChar1Length=(int32_t)(target-char1); 2486 2487 if(U_FAILURE(errorCode)) { 2488 log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode)); 2489 return FALSE; 2490 } 2491 return TRUE; 2492 } 2493 2494 static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2495 char charUTF8[4], int32_t charUTF8Length, 2496 char char0[8], int32_t char0Length, 2497 char char1[8], int32_t char1Length) { 2498 char utf8[16]; 2499 int32_t utf8Length; 2500 2501 char output[16]; 2502 int32_t outputLength; 2503 2504 char invalidChars[8]; 2505 int8_t invalidLength; 2506 2507 const char *source; 2508 char *target; 2509 2510 UChar pivotBuffer[8]; 2511 UChar *pivotSource, *pivotTarget; 2512 2513 UErrorCode errorCode; 2514 int32_t i; 2515 2516 /* test truncated sequences */ 2517 errorCode=U_ZERO_ERROR; 2518 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2519 2520 memcpy(utf8, charUTF8, charUTF8Length); 2521 2522 for(i=0; i<LENGTHOF(badUTF8); ++i) { 2523 /* truncated sequence? */ 2524 int32_t length=strlen(badUTF8[i]); 2525 if(length>=(1+U8_COUNT_TRAIL_BYTES(badUTF8[i][0]))) { 2526 continue; 2527 } 2528 2529 /* assemble a string with the test character and the truncated sequence */ 2530 memcpy(utf8+charUTF8Length, badUTF8[i], length); 2531 utf8Length=charUTF8Length+length; 2532 2533 /* convert and check the invalidChars */ 2534 source=utf8; 2535 target=output; 2536 pivotSource=pivotTarget=pivotBuffer; 2537 errorCode=U_ZERO_ERROR; 2538 ucnv_convertEx(cnv, utf8Cnv, 2539 &target, output+sizeof(output), 2540 &source, utf8+utf8Length, 2541 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer), 2542 TRUE, TRUE, /* reset & flush */ 2543 &errorCode); 2544 outputLength=(int32_t)(target-output); 2545 (void)outputLength; /* Suppress set but not used warning. */ 2546 if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) { 2547 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i); 2548 continue; 2549 } 2550 2551 errorCode=U_ZERO_ERROR; 2552 invalidLength=(int8_t)sizeof(invalidChars); 2553 ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode); 2554 if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) { 2555 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i); 2556 } 2557 } 2558 } 2559 2560 static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2561 char charUTF8[4], int32_t charUTF8Length, 2562 char char0[8], int32_t char0Length, 2563 char char1[8], int32_t char1Length) { 2564 char utf8[600], expect[600]; 2565 int32_t utf8Length, expectLength; 2566 2567 char testName[32]; 2568 2569 UErrorCode errorCode; 2570 int32_t i; 2571 2572 errorCode=U_ZERO_ERROR; 2573 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode); 2574 2575 /* 2576 * assemble an input string with the test character between each 2577 * bad sequence, 2578 * and an expected string with repeated test character output 2579 */ 2580 memcpy(utf8, charUTF8, charUTF8Length); 2581 utf8Length=charUTF8Length; 2582 2583 memcpy(expect, char0, char0Length); 2584 expectLength=char0Length; 2585 2586 for(i=0; i<LENGTHOF(badUTF8); ++i) { 2587 int32_t length=strlen(badUTF8[i]); 2588 memcpy(utf8+utf8Length, badUTF8[i], length); 2589 utf8Length+=length; 2590 2591 memcpy(utf8+utf8Length, charUTF8, charUTF8Length); 2592 utf8Length+=charUTF8Length; 2593 2594 memcpy(expect+expectLength, char1, char1Length); 2595 expectLength+=char1Length; 2596 } 2597 2598 /* expect that each bad UTF-8 sequence is detected and skipped */ 2599 strcpy(testName, "from bad UTF-8 to "); 2600 strcat(testName, converterName); 2601 2602 convertExMultiStreaming(utf8Cnv, cnv, 2603 utf8, utf8Length, 2604 expect, expectLength, 2605 testName, 2606 U_ZERO_ERROR); 2607 } 2608 2609 /* Test illegal UTF-8 input. */ 2610 static void TestConvertExFromUTF8() { 2611 static const char *const converterNames[]={ 2612 #if !UCONFIG_NO_LEGACY_CONVERSION 2613 "windows-1252", 2614 "shift-jis", 2615 #endif 2616 "us-ascii", 2617 "iso-8859-1", 2618 "utf-8" 2619 }; 2620 2621 UConverter *utf8Cnv, *cnv; 2622 UErrorCode errorCode; 2623 int32_t i; 2624 2625 /* fromUnicode versions of some character, from initial state and later */ 2626 char charUTF8[4], char0[8], char1[8]; 2627 int32_t charUTF8Length, char0Length, char1Length; 2628 2629 errorCode=U_ZERO_ERROR; 2630 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2631 if(U_FAILURE(errorCode)) { 2632 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2633 return; 2634 } 2635 2636 for(i=0; i<LENGTHOF(converterNames); ++i) { 2637 errorCode=U_ZERO_ERROR; 2638 cnv=ucnv_open(converterNames[i], &errorCode); 2639 if(U_FAILURE(errorCode)) { 2640 log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode)); 2641 continue; 2642 } 2643 if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) { 2644 continue; 2645 } 2646 testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2647 testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2648 ucnv_close(cnv); 2649 } 2650 ucnv_close(utf8Cnv); 2651 } 2652 2653 static void TestConvertExFromUTF8_C5F0() { 2654 static const char *const converterNames[]={ 2655 #if !UCONFIG_NO_LEGACY_CONVERSION 2656 "windows-1251", 2657 "shift-jis", 2658 #endif 2659 "us-ascii", 2660 "iso-8859-1", 2661 "utf-8" 2662 }; 2663 2664 UConverter *utf8Cnv, *cnv; 2665 UErrorCode errorCode; 2666 int32_t i; 2667 2668 static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 }; 2669 /* Expect "��" (2x U+FFFD as decimal NCRs) */ 2670 static const char twoNCRs[16]={ 2671 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B, 2672 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B 2673 }; 2674 static const char twoFFFD[6]={ 2675 (char)0xef, (char)0xbf, (char)0xbd, 2676 (char)0xef, (char)0xbf, (char)0xbd 2677 }; 2678 const char *expected; 2679 int32_t expectedLength; 2680 char dest[20]; /* longer than longest expectedLength */ 2681 2682 const char *src; 2683 char *target; 2684 2685 UChar pivotBuffer[128]; 2686 UChar *pivotSource, *pivotTarget; 2687 2688 errorCode=U_ZERO_ERROR; 2689 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2690 if(U_FAILURE(errorCode)) { 2691 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2692 return; 2693 } 2694 2695 for(i=0; i<LENGTHOF(converterNames); ++i) { 2696 errorCode=U_ZERO_ERROR; 2697 cnv=ucnv_open(converterNames[i], &errorCode); 2698 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 2699 NULL, NULL, &errorCode); 2700 if(U_FAILURE(errorCode)) { 2701 log_data_err("unable to open %s converter - %s\n", 2702 converterNames[i], u_errorName(errorCode)); 2703 continue; 2704 } 2705 src=bad_utf8; 2706 target=dest; 2707 uprv_memset(dest, 9, sizeof(dest)); 2708 if(i==LENGTHOF(converterNames)-1) { 2709 /* conversion to UTF-8 yields two U+FFFD directly */ 2710 expected=twoFFFD; 2711 expectedLength=6; 2712 } else { 2713 /* conversion to a non-Unicode charset yields two NCRs */ 2714 expected=twoNCRs; 2715 expectedLength=16; 2716 } 2717 pivotBuffer[0]=0; 2718 pivotBuffer[1]=1; 2719 pivotBuffer[2]=2; 2720 pivotSource=pivotTarget=pivotBuffer; 2721 ucnv_convertEx( 2722 cnv, utf8Cnv, 2723 &target, dest+expectedLength, 2724 &src, bad_utf8+sizeof(bad_utf8), 2725 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer), 2726 TRUE, TRUE, &errorCode); 2727 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 || 2728 target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) || 2729 dest[expectedLength]!=9 2730 ) { 2731 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]); 2732 } 2733 ucnv_close(cnv); 2734 } 2735 ucnv_close(utf8Cnv); 2736 } 2737 2738 static void 2739 TestConvertAlgorithmic() { 2740 #if !UCONFIG_NO_LEGACY_CONVERSION 2741 static const uint8_t 2742 utf8[]={ 2743 /* 4e00 30a1 ff61 0410 */ 2744 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2745 }, 2746 shiftJIS[]={ 2747 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2748 }, 2749 /*errorTarget[]={*/ 2750 /* 2751 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2752 * SUB, SUB, 0x40, SUB, SUB, 0x40 2753 */ 2754 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/ 2755 /*},*/ 2756 utf16[]={ 2757 0xfe, 0xff /* BOM only, no text */ 2758 }, 2759 utf32[]={ 2760 0xff, 0xfe, 0, 0 /* BOM only, no text */ 2761 }; 2762 2763 char target[100], utf8NUL[100], shiftJISNUL[100]; 2764 2765 UConverter *cnv; 2766 UErrorCode errorCode; 2767 2768 int32_t length; 2769 2770 errorCode=U_ZERO_ERROR; 2771 cnv=ucnv_open("Shift-JIS", &errorCode); 2772 if(U_FAILURE(errorCode)) { 2773 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2774 ucnv_close(cnv); 2775 return; 2776 } 2777 2778 memcpy(utf8NUL, utf8, sizeof(utf8)); 2779 utf8NUL[sizeof(utf8)]=0; 2780 memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS)); 2781 shiftJISNUL[sizeof(shiftJIS)]=0; 2782 2783 /* 2784 * The to/from algorithmic convenience functions share a common implementation, 2785 * so we need not test all permutations of them. 2786 */ 2787 2788 /* length in, not terminated out */ 2789 errorCode=U_ZERO_ERROR; 2790 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode); 2791 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2792 length!=sizeof(shiftJIS) || 2793 memcmp(target, shiftJIS, length)!=0 2794 ) { 2795 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n", 2796 u_errorName(errorCode), length, sizeof(shiftJIS)); 2797 } 2798 2799 /* terminated in and out */ 2800 memset(target, 0x55, sizeof(target)); 2801 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2802 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode); 2803 if( errorCode!=U_ZERO_ERROR || 2804 length!=sizeof(utf8) || 2805 memcmp(target, utf8, length)!=0 2806 ) { 2807 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n", 2808 u_errorName(errorCode), length, sizeof(shiftJIS)); 2809 } 2810 2811 /* empty string, some target buffer */ 2812 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2813 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode); 2814 if( errorCode!=U_ZERO_ERROR || 2815 length!=0 2816 ) { 2817 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n", 2818 u_errorName(errorCode), length); 2819 } 2820 2821 /* pseudo-empty string, no target buffer */ 2822 errorCode=U_ZERO_ERROR; 2823 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2824 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2825 length!=0 2826 ) { 2827 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2828 u_errorName(errorCode), length); 2829 } 2830 2831 errorCode=U_ZERO_ERROR; 2832 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode); 2833 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2834 length!=0 2835 ) { 2836 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2837 u_errorName(errorCode), length); 2838 } 2839 2840 /* bad arguments */ 2841 errorCode=U_MESSAGE_PARSE_ERROR; 2842 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2843 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2844 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2845 } 2846 2847 /* source==NULL */ 2848 errorCode=U_ZERO_ERROR; 2849 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode); 2850 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2851 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode)); 2852 } 2853 2854 /* illegal alg. type */ 2855 errorCode=U_ZERO_ERROR; 2856 length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode); 2857 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2858 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode)); 2859 } 2860 ucnv_close(cnv); 2861 #endif 2862 } 2863 2864 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 2865 static void TestLMBCSMaxChar(void) { 2866 static const struct { 2867 int8_t maxSize; 2868 const char *name; 2869 } converter[] = { 2870 /* some non-LMBCS converters - perfect test setup here */ 2871 { 1, "US-ASCII"}, 2872 { 1, "ISO-8859-1"}, 2873 2874 { 2, "UTF-16"}, 2875 { 2, "UTF-16BE"}, 2876 { 3, "UTF-8"}, 2877 { 3, "CESU-8"}, 2878 { 3, "SCSU"}, 2879 { 4, "UTF-32"}, 2880 { 4, "UTF-7"}, 2881 { 4, "IMAP-mailbox-name"}, 2882 { 4, "BOCU-1"}, 2883 2884 { 1, "windows-1256"}, 2885 { 2, "Shift-JIS"}, 2886 { 2, "ibm-16684"}, 2887 { 3, "ibm-930"}, 2888 { 3, "ibm-1390"}, 2889 { 4, "*test3"}, 2890 { 16,"*test4"}, 2891 2892 { 4, "ISCII"}, 2893 { 4, "HZ"}, 2894 2895 { 3, "ISO-2022"}, 2896 { 3, "ISO-2022-KR"}, 2897 { 6, "ISO-2022-JP"}, 2898 { 8, "ISO-2022-CN"}, 2899 2900 /* LMBCS */ 2901 { 3, "LMBCS-1"}, 2902 { 3, "LMBCS-2"}, 2903 { 3, "LMBCS-3"}, 2904 { 3, "LMBCS-4"}, 2905 { 3, "LMBCS-5"}, 2906 { 3, "LMBCS-6"}, 2907 { 3, "LMBCS-8"}, 2908 { 3, "LMBCS-11"}, 2909 { 3, "LMBCS-16"}, 2910 { 3, "LMBCS-17"}, 2911 { 3, "LMBCS-18"}, 2912 { 3, "LMBCS-19"} 2913 }; 2914 int32_t idx; 2915 2916 for (idx = 0; idx < LENGTHOF(converter); idx++) { 2917 UErrorCode status = U_ZERO_ERROR; 2918 UConverter *cnv = cnv_open(converter[idx].name, &status); 2919 if (U_FAILURE(status)) { 2920 continue; 2921 } 2922 if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) { 2923 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n", 2924 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv)); 2925 } 2926 ucnv_close(cnv); 2927 } 2928 2929 /* mostly test that the macro compiles */ 2930 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) { 2931 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n"); 2932 } 2933 } 2934 #endif 2935 2936 static void TestJ1968(void) { 2937 UErrorCode err = U_ZERO_ERROR; 2938 UConverter *cnv; 2939 char myConvName[] = "My really really really really really really really really really really really" 2940 " really really really really really really really really really really really" 2941 " really really really really really really really really long converter name"; 2942 UChar myConvNameU[sizeof(myConvName)]; 2943 2944 u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName)); 2945 2946 err = U_ZERO_ERROR; 2947 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0; 2948 cnv = ucnv_openU(myConvNameU, &err); 2949 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2950 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2951 } 2952 2953 err = U_ZERO_ERROR; 2954 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 2955 cnv = ucnv_openU(myConvNameU, &err); 2956 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2957 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2958 } 2959 2960 err = U_ZERO_ERROR; 2961 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 2962 cnv = ucnv_openU(myConvNameU, &err); 2963 if (cnv || err != U_FILE_ACCESS_ERROR) { 2964 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2965 } 2966 2967 2968 2969 2970 err = U_ZERO_ERROR; 2971 cnv = ucnv_open(myConvName, &err); 2972 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2973 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2974 } 2975 2976 err = U_ZERO_ERROR; 2977 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ','; 2978 cnv = ucnv_open(myConvName, &err); 2979 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2980 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2981 } 2982 2983 err = U_ZERO_ERROR; 2984 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 2985 cnv = ucnv_open(myConvName, &err); 2986 if (cnv || err != U_FILE_ACCESS_ERROR) { 2987 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2988 } 2989 2990 err = U_ZERO_ERROR; 2991 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 2992 strncpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7); 2993 cnv = ucnv_open(myConvName, &err); 2994 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2995 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2996 } 2997 2998 /* The comma isn't really a part of the converter name. */ 2999 err = U_ZERO_ERROR; 3000 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 3001 cnv = ucnv_open(myConvName, &err); 3002 if (cnv || err != U_FILE_ACCESS_ERROR) { 3003 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3004 } 3005 3006 err = U_ZERO_ERROR; 3007 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' '; 3008 cnv = ucnv_open(myConvName, &err); 3009 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3010 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3011 } 3012 3013 err = U_ZERO_ERROR; 3014 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 3015 cnv = ucnv_open(myConvName, &err); 3016 if (cnv || err != U_FILE_ACCESS_ERROR) { 3017 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3018 } 3019 3020 } 3021 3022 #if !UCONFIG_NO_LEGACY_CONVERSION 3023 static void 3024 testSwap(const char *name, UBool swap) { 3025 /* 3026 * Test Unicode text. 3027 * Contains characters that are the highest for some of the 3028 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the 3029 * tables copies the entire tables. 3030 */ 3031 static const UChar text[]={ 3032 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a 3033 }; 3034 3035 UChar uNormal[32], uSwapped[32]; 3036 char normal[32], swapped[32]; 3037 const UChar *pcu; 3038 UChar *pu; 3039 char *pc; 3040 int32_t i, normalLength, swappedLength; 3041 UChar u; 3042 char c; 3043 3044 const char *swappedName; 3045 UConverter *cnv, *swapCnv; 3046 UErrorCode errorCode; 3047 3048 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */ 3049 3050 /* open both the normal and the LF/NL-swapping converters */ 3051 strcpy(swapped, name); 3052 strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING); 3053 3054 errorCode=U_ZERO_ERROR; 3055 swapCnv=ucnv_open(swapped, &errorCode); 3056 cnv=ucnv_open(name, &errorCode); 3057 if(U_FAILURE(errorCode)) { 3058 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode)); 3059 goto cleanup; 3060 } 3061 3062 /* the name must contain the swap option if and only if we expect the converter to swap */ 3063 swappedName=ucnv_getName(swapCnv, &errorCode); 3064 if(U_FAILURE(errorCode)) { 3065 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode)); 3066 goto cleanup; 3067 } 3068 3069 pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING); 3070 if(swap != (pc!=NULL)) { 3071 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap); 3072 goto cleanup; 3073 } 3074 3075 /* convert to EBCDIC */ 3076 pcu=text; 3077 pc=normal; 3078 ucnv_fromUnicode(cnv, &pc, normal+LENGTHOF(normal), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); 3079 normalLength=(int32_t)(pc-normal); 3080 3081 pcu=text; 3082 pc=swapped; 3083 ucnv_fromUnicode(swapCnv, &pc, swapped+LENGTHOF(swapped), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); 3084 swappedLength=(int32_t)(pc-swapped); 3085 3086 if(U_FAILURE(errorCode)) { 3087 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode)); 3088 goto cleanup; 3089 } 3090 3091 /* compare EBCDIC output */ 3092 if(normalLength!=swappedLength) { 3093 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3094 goto cleanup; 3095 } 3096 for(i=0; i<normalLength; ++i) { 3097 /* swap EBCDIC LF/NL for comparison */ 3098 c=normal[i]; 3099 if(swap) { 3100 if(c==0x15) { 3101 c=0x25; 3102 } else if(c==0x25) { 3103 c=0x15; 3104 } 3105 } 3106 3107 if(c!=swapped[i]) { 3108 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]); 3109 goto cleanup; 3110 } 3111 } 3112 3113 /* convert back to Unicode (may not roundtrip) */ 3114 pc=normal; 3115 pu=uNormal; 3116 ucnv_toUnicode(cnv, &pu, uNormal+LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); 3117 normalLength=(int32_t)(pu-uNormal); 3118 3119 pc=normal; 3120 pu=uSwapped; 3121 ucnv_toUnicode(swapCnv, &pu, uSwapped+LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); 3122 swappedLength=(int32_t)(pu-uSwapped); 3123 3124 if(U_FAILURE(errorCode)) { 3125 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode)); 3126 goto cleanup; 3127 } 3128 3129 /* compare EBCDIC output */ 3130 if(normalLength!=swappedLength) { 3131 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3132 goto cleanup; 3133 } 3134 for(i=0; i<normalLength; ++i) { 3135 /* swap EBCDIC LF/NL for comparison */ 3136 u=uNormal[i]; 3137 if(swap) { 3138 if(u==0xa) { 3139 u=0x85; 3140 } else if(u==0x85) { 3141 u=0xa; 3142 } 3143 } 3144 3145 if(u!=uSwapped[i]) { 3146 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]); 3147 goto cleanup; 3148 } 3149 } 3150 3151 /* clean up */ 3152 cleanup: 3153 ucnv_close(cnv); 3154 ucnv_close(swapCnv); 3155 } 3156 3157 static void 3158 TestEBCDICSwapLFNL() { 3159 static const struct { 3160 const char *name; 3161 UBool swap; 3162 } tests[]={ 3163 { "ibm-37", TRUE }, 3164 { "ibm-1047", TRUE }, 3165 { "ibm-1140", TRUE }, 3166 { "ibm-930", TRUE }, 3167 { "iso-8859-3", FALSE } 3168 }; 3169 3170 int i; 3171 3172 for(i=0; i<LENGTHOF(tests); ++i) { 3173 testSwap(tests[i].name, tests[i].swap); 3174 } 3175 } 3176 #else 3177 static void 3178 TestEBCDICSwapLFNL() { 3179 /* test nothing... */ 3180 } 3181 #endif 3182 3183 static void TestFromUCountPending(){ 3184 #if !UCONFIG_NO_LEGACY_CONVERSION 3185 UErrorCode status = U_ZERO_ERROR; 3186 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */ 3187 static const struct { 3188 UChar input[6]; 3189 int32_t len; 3190 int32_t exp; 3191 }fromUnicodeTests[] = { 3192 /*m:n conversion*/ 3193 {{0xdbc4},1,1}, 3194 {{ 0xdbc4, 0xde34, 0xd84d},3,1}, 3195 {{ 0xdbc4, 0xde34, 0xd900},3,3}, 3196 }; 3197 int i; 3198 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3199 if(U_FAILURE(status)){ 3200 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3201 return; 3202 } 3203 for(i=0; i<LENGTHOF(fromUnicodeTests); ++i) { 3204 char tgt[10]; 3205 char* target = tgt; 3206 char* targetLimit = target + 10; 3207 const UChar* source = fromUnicodeTests[i].input; 3208 const UChar* sourceLimit = source + fromUnicodeTests[i].len; 3209 int32_t len = 0; 3210 ucnv_reset(cnv); 3211 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3212 len = ucnv_fromUCountPending(cnv, &status); 3213 if(U_FAILURE(status)){ 3214 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3215 status = U_ZERO_ERROR; 3216 continue; 3217 } 3218 if(len != fromUnicodeTests[i].exp){ 3219 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n"); 3220 } 3221 } 3222 status = U_ZERO_ERROR; 3223 { 3224 /* 3225 * The converter has to read the tail before it knows that 3226 * only head alone matches. 3227 * At the end, the output for head will overflow the target, 3228 * middle will be pending, and tail will not have been consumed. 3229 */ 3230 /* 3231 \U00101234 -> x (<U101234> \x07 |0) 3232 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0) 3233 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0) 3234 \U00060007 -> unassigned 3235 */ 3236 static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */ 3237 static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */ 3238 static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */ 3239 char tgt[10]; 3240 char* target = tgt; 3241 char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */ 3242 const UChar* source = head; 3243 const UChar* sourceLimit = source + u_strlen(head); 3244 int32_t len = 0; 3245 ucnv_reset(cnv); 3246 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3247 len = ucnv_fromUCountPending(cnv, &status); 3248 if(U_FAILURE(status)){ 3249 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3250 status = U_ZERO_ERROR; 3251 } 3252 if(len!=4){ 3253 log_err("ucnv_fromUInputHeld did not return correct length for head\n"); 3254 } 3255 source = middle; 3256 sourceLimit = source + u_strlen(middle); 3257 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3258 len = ucnv_fromUCountPending(cnv, &status); 3259 if(U_FAILURE(status)){ 3260 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3261 status = U_ZERO_ERROR; 3262 } 3263 if(len!=5){ 3264 log_err("ucnv_fromUInputHeld did not return correct length for middle\n"); 3265 } 3266 source = tail; 3267 sourceLimit = source + u_strlen(tail); 3268 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3269 if(status != U_BUFFER_OVERFLOW_ERROR){ 3270 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3271 } 3272 status = U_ZERO_ERROR; 3273 len = ucnv_fromUCountPending(cnv, &status); 3274 /* middle[1] is pending, tail has not been consumed */ 3275 if(U_FAILURE(status)){ 3276 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status)); 3277 } 3278 if(len!=1){ 3279 log_err("ucnv_fromUInputHeld did not return correct length for tail\n"); 3280 } 3281 } 3282 ucnv_close(cnv); 3283 #endif 3284 } 3285 3286 static void 3287 TestToUCountPending(){ 3288 #if !UCONFIG_NO_LEGACY_CONVERSION 3289 UErrorCode status = U_ZERO_ERROR; 3290 static const struct { 3291 char input[6]; 3292 int32_t len; 3293 int32_t exp; 3294 }toUnicodeTests[] = { 3295 /*m:n conversion*/ 3296 {{0x05, 0x01, 0x02},3,3}, 3297 {{0x01, 0x02},2,2}, 3298 {{0x07, 0x00, 0x01, 0x02},4,4}, 3299 }; 3300 3301 int i; 3302 UConverterToUCallback *oldToUAction= NULL; 3303 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3304 if(U_FAILURE(status)){ 3305 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3306 return; 3307 } 3308 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3309 for(i=0; i<LENGTHOF(toUnicodeTests); ++i) { 3310 UChar tgt[20]; 3311 UChar* target = tgt; 3312 UChar* targetLimit = target + 20; 3313 const char* source = toUnicodeTests[i].input; 3314 const char* sourceLimit = source + toUnicodeTests[i].len; 3315 int32_t len = 0; 3316 ucnv_reset(cnv); 3317 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3318 len = ucnv_toUCountPending(cnv,&status); 3319 if(U_FAILURE(status)){ 3320 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3321 status = U_ZERO_ERROR; 3322 continue; 3323 } 3324 if(len != toUnicodeTests[i].exp){ 3325 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n"); 3326 } 3327 } 3328 status = U_ZERO_ERROR; 3329 ucnv_close(cnv); 3330 3331 { 3332 /* 3333 * The converter has to read the tail before it knows that 3334 * only head alone matches. 3335 * At the end, the output for head will overflow the target, 3336 * mid will be pending, and tail will not have been consumed. 3337 */ 3338 char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00}; 3339 char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 }; 3340 char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 }; 3341 /* 3342 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0) 3343 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0) 3344 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3) 3345 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar") 3346 */ 3347 UChar tgt[10]; 3348 UChar* target = tgt; 3349 UChar* targetLimit = target + 1; /* expect overflow from converting */ 3350 const char* source = head; 3351 const char* sourceLimit = source + strlen(head); 3352 int32_t len = 0; 3353 cnv = ucnv_openPackage(loadTestData(&status), "test4", &status); 3354 if(U_FAILURE(status)){ 3355 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3356 return; 3357 } 3358 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3359 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3360 len = ucnv_toUCountPending(cnv,&status); 3361 if(U_FAILURE(status)){ 3362 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3363 } 3364 if(len != 4){ 3365 log_err("Did not get the expected len for head.\n"); 3366 } 3367 source=mid; 3368 sourceLimit = source+strlen(mid); 3369 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3370 len = ucnv_toUCountPending(cnv,&status); 3371 if(U_FAILURE(status)){ 3372 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3373 } 3374 if(len != 8){ 3375 log_err("Did not get the expected len for mid.\n"); 3376 } 3377 3378 source=tail; 3379 sourceLimit = source+strlen(tail); 3380 targetLimit = target; 3381 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3382 if(status != U_BUFFER_OVERFLOW_ERROR){ 3383 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3384 } 3385 status = U_ZERO_ERROR; 3386 len = ucnv_toUCountPending(cnv,&status); 3387 /* mid[4] is pending, tail has not been consumed */ 3388 if(U_FAILURE(status)){ 3389 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status)); 3390 } 3391 if(len != 4){ 3392 log_err("Did not get the expected len for tail.\n"); 3393 } 3394 ucnv_close(cnv); 3395 } 3396 #endif 3397 } 3398 3399 static void TestOneDefaultNameChange(const char *name, const char *expected) { 3400 UErrorCode status = U_ZERO_ERROR; 3401 UConverter *cnv; 3402 ucnv_setDefaultName(name); 3403 if(strcmp(ucnv_getDefaultName(), expected)==0) 3404 log_verbose("setDefaultName of %s works.\n", name); 3405 else 3406 log_err("setDefaultName of %s failed\n", name); 3407 cnv=ucnv_open(NULL, &status); 3408 if (U_FAILURE(status) || cnv == NULL) { 3409 log_err("opening the default converter of %s failed\n", name); 3410 return; 3411 } 3412 if(strcmp(ucnv_getName(cnv, &status), expected)==0) 3413 log_verbose("ucnv_getName of %s works.\n", name); 3414 else 3415 log_err("ucnv_getName of %s failed\n", name); 3416 ucnv_close(cnv); 3417 } 3418 3419 static void TestDefaultName(void) { 3420 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/ 3421 static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; 3422 strcpy(defaultName, ucnv_getDefaultName()); 3423 3424 log_verbose("getDefaultName returned %s\n", defaultName); 3425 3426 /*change the default name by setting it */ 3427 TestOneDefaultNameChange("UTF-8", "UTF-8"); 3428 #if U_CHARSET_IS_UTF8 3429 TestOneDefaultNameChange("ISCII,version=1", "UTF-8"); 3430 TestOneDefaultNameChange("ISCII,version=2", "UTF-8"); 3431 TestOneDefaultNameChange("ISO-8859-1", "UTF-8"); 3432 #else 3433 # if !UCONFIG_NO_LEGACY_CONVERSION 3434 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1"); 3435 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2"); 3436 # endif 3437 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1"); 3438 #endif 3439 3440 /*set the default name back*/ 3441 ucnv_setDefaultName(defaultName); 3442 } 3443 3444 /* Test that ucnv_compareNames() matches names according to spec. ----------- */ 3445 3446 static int 3447 sign(int n) { 3448 if(n==0) { 3449 return 0; 3450 } else if(n<0) { 3451 return -1; 3452 } else /* n>0 */ { 3453 return 1; 3454 } 3455 } 3456 3457 static void 3458 compareNames(const char **names) { 3459 const char *relation, *name1, *name2; 3460 int rel, result; 3461 3462 relation=*names++; 3463 if(*relation=='=') { 3464 rel = 0; 3465 } else if(*relation=='<') { 3466 rel = -1; 3467 } else { 3468 rel = 1; 3469 } 3470 3471 name1=*names++; 3472 if(name1==NULL) { 3473 return; 3474 } 3475 while((name2=*names++)!=NULL) { 3476 result=ucnv_compareNames(name1, name2); 3477 if(sign(result)!=rel) { 3478 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel); 3479 } 3480 name1=name2; 3481 } 3482 } 3483 3484 static void 3485 TestCompareNames() { 3486 static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL }; 3487 static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL }; 3488 static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL }; 3489 static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL }; 3490 3491 compareNames(equalUTF8); 3492 compareNames(equalIBM); 3493 compareNames(lessMac); 3494 compareNames(lessUTF080); 3495 } 3496 3497 static void 3498 TestSubstString() { 3499 static const UChar surrogate[1]={ 0xd900 }; 3500 char buffer[16]; 3501 3502 static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3503 static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3504 UConverter *cnv; 3505 UErrorCode errorCode; 3506 int32_t length; 3507 int8_t len8; 3508 3509 /* UTF-16/32: test that the BOM is output before the sub character */ 3510 errorCode=U_ZERO_ERROR; 3511 cnv=ucnv_open("UTF-16", &errorCode); 3512 if(U_FAILURE(errorCode)) { 3513 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode)); 3514 return; 3515 } 3516 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3517 ucnv_close(cnv); 3518 if(U_FAILURE(errorCode) || 3519 length!=4 || 3520 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3521 ) { 3522 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n"); 3523 } 3524 3525 errorCode=U_ZERO_ERROR; 3526 cnv=ucnv_open("UTF-32", &errorCode); 3527 if(U_FAILURE(errorCode)) { 3528 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode)); 3529 return; 3530 } 3531 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3532 ucnv_close(cnv); 3533 if(U_FAILURE(errorCode) || 3534 length!=8 || 3535 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3536 ) { 3537 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n"); 3538 } 3539 3540 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */ 3541 errorCode=U_ZERO_ERROR; 3542 cnv=ucnv_open("ISO-8859-1", &errorCode); 3543 if(U_FAILURE(errorCode)) { 3544 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode)); 3545 return; 3546 } 3547 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode); 3548 if(U_FAILURE(errorCode)) { 3549 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode)); 3550 } else { 3551 len8 = sizeof(buffer); 3552 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3553 /* Stateless converter, we expect the string converted to charset bytes. */ 3554 if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) { 3555 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode)); 3556 } 3557 } 3558 ucnv_close(cnv); 3559 3560 #if !UCONFIG_NO_LEGACY_CONVERSION 3561 errorCode=U_ZERO_ERROR; 3562 cnv=ucnv_open("HZ", &errorCode); 3563 if(U_FAILURE(errorCode)) { 3564 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode)); 3565 return; 3566 } 3567 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode); 3568 if(U_FAILURE(errorCode)) { 3569 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode)); 3570 } else { 3571 len8 = sizeof(buffer); 3572 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3573 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */ 3574 if(U_FAILURE(errorCode) || len8!=0) { 3575 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode)); 3576 } 3577 } 3578 ucnv_close(cnv); 3579 #endif 3580 /* 3581 * Further testing of ucnv_setSubstString() is done via intltest convert. 3582 * We do not test edge cases of illegal arguments and similar because the 3583 * function implementation uses all of its parameters in calls to other 3584 * functions with UErrorCode parameters. 3585 */ 3586 } 3587 3588 static void 3589 InvalidArguments() { 3590 UConverter *cnv; 3591 UErrorCode errorCode; 3592 char charBuffer[2] = {1, 1}; 3593 char ucharAsCharBuffer[2] = {2, 2}; 3594 char *charsPtr = charBuffer; 3595 UChar *ucharsPtr = (UChar *)ucharAsCharBuffer; 3596 UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1); 3597 3598 errorCode=U_ZERO_ERROR; 3599 cnv=ucnv_open("UTF-8", &errorCode); 3600 if(U_FAILURE(errorCode)) { 3601 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode)); 3602 return; 3603 } 3604 3605 errorCode=U_ZERO_ERROR; 3606 /* This one should fail because an incomplete UChar is being passed in */ 3607 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode); 3608 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3609 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3610 } 3611 3612 errorCode=U_ZERO_ERROR; 3613 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3614 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode); 3615 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3616 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3617 } 3618 3619 errorCode=U_ZERO_ERROR; 3620 /* This one should fail because an incomplete UChar is being passed in */ 3621 ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3622 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3623 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3624 } 3625 3626 errorCode=U_ZERO_ERROR; 3627 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3628 ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3629 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3630 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3631 } 3632 3633 if (charBuffer[0] != 1 || charBuffer[1] != 1 3634 || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2) 3635 { 3636 log_err("Data was incorrectly written to buffers\n"); 3637 } 3638 3639 ucnv_close(cnv); 3640 } 3641 3642 static void TestGetName() { 3643 static const char *const names[] = { 3644 "Unicode", "UTF-16", 3645 "UnicodeBigUnmarked", "UTF-16BE", 3646 "UnicodeBig", "UTF-16BE,version=1", 3647 "UnicodeLittleUnmarked", "UTF-16LE", 3648 "UnicodeLittle", "UTF-16LE,version=1", 3649 "x-UTF-16LE-BOM", "UTF-16LE,version=1" 3650 }; 3651 int32_t i; 3652 for(i = 0; i < LENGTHOF(names); i += 2) { 3653 UErrorCode errorCode = U_ZERO_ERROR; 3654 UConverter *cnv = ucnv_open(names[i], &errorCode); 3655 if(U_SUCCESS(errorCode)) { 3656 const char *name = ucnv_getName(cnv, &errorCode); 3657 if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) { 3658 log_err("ucnv_getName(%s) = %s != %s -- %s\n", 3659 names[i], name, names[i+1], u_errorName(errorCode)); 3660 } 3661 ucnv_close(cnv); 3662 } 3663 } 3664 } 3665 3666 static void TestUTFBOM() { 3667 static const UChar a16[] = { 0x61 }; 3668 static const char *const names[] = { 3669 "UTF-16", 3670 "UTF-16,version=1", 3671 "UTF-16BE", 3672 "UnicodeBig", 3673 "UTF-16LE", 3674 "UnicodeLittle" 3675 }; 3676 static const uint8_t expected[][5] = { 3677 #if U_IS_BIG_ENDIAN 3678 { 4, 0xfe, 0xff, 0, 0x61 }, 3679 { 4, 0xfe, 0xff, 0, 0x61 }, 3680 #else 3681 { 4, 0xff, 0xfe, 0x61, 0 }, 3682 { 4, 0xff, 0xfe, 0x61, 0 }, 3683 #endif 3684 3685 { 2, 0, 0x61 }, 3686 { 4, 0xfe, 0xff, 0, 0x61 }, 3687 3688 { 2, 0x61, 0 }, 3689 { 4, 0xff, 0xfe, 0x61, 0 } 3690 }; 3691 3692 char bytes[10]; 3693 int32_t i; 3694 3695 for(i = 0; i < LENGTHOF(names); ++i) { 3696 UErrorCode errorCode = U_ZERO_ERROR; 3697 UConverter *cnv = ucnv_open(names[i], &errorCode); 3698 int32_t length = 0; 3699 const uint8_t *exp = expected[i]; 3700 if (U_FAILURE(errorCode)) { 3701 log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode)); 3702 continue; 3703 } 3704 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode); 3705 3706 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) { 3707 log_err("unexpected %s BOM writing behavior -- %s\n", 3708 names[i], u_errorName(errorCode)); 3709 } 3710 ucnv_close(cnv); 3711 } 3712 } 3713