1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2012, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /***************************************************************************** 7 * 8 * File CU_CAPITST.C 9 * 10 * Modification History: 11 * Name Description 12 * Madhu Katragadda Ported for C API 13 ****************************************************************************** 14 */ 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <ctype.h> 19 #include "unicode/uloc.h" 20 #include "unicode/ucnv.h" 21 #include "unicode/ucnv_err.h" 22 #include "unicode/putil.h" 23 #include "unicode/uset.h" 24 #include "unicode/ustring.h" 25 #include "ucnv_bld.h" /* for sizeof(UConverter) */ 26 #include "cmemory.h" /* for UAlignedMemory */ 27 #include "cintltst.h" 28 #include "ccapitst.h" 29 #include "cstring.h" 30 31 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 32 33 #define NUM_CODEPAGE 1 34 #define MAX_FILE_LEN 1024*20 35 #define UCS_FILE_NAME_SIZE 512 36 37 /*returns an action other than the one provided*/ 38 #if !UCONFIG_NO_LEGACY_CONVERSION 39 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA); 40 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA); 41 #endif 42 43 static UConverter * 44 cnv_open(const char *name, UErrorCode *pErrorCode) { 45 if(name!=NULL && name[0]=='*') { 46 return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode); 47 } else { 48 return ucnv_open(name, pErrorCode); 49 } 50 } 51 52 53 static void ListNames(void); 54 static void TestFlushCache(void); 55 static void TestDuplicateAlias(void); 56 static void TestCCSID(void); 57 static void TestJ932(void); 58 static void TestJ1968(void); 59 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 60 static void TestLMBCSMaxChar(void); 61 #endif 62 63 #if !UCONFIG_NO_LEGACY_CONVERSION 64 static void TestConvertSafeCloneCallback(void); 65 #endif 66 67 static void TestEBCDICSwapLFNL(void); 68 static void TestConvertEx(void); 69 static void TestConvertExFromUTF8(void); 70 static void TestConvertExFromUTF8_C5F0(void); 71 static void TestConvertAlgorithmic(void); 72 void TestDefaultConverterError(void); /* defined in cctest.c */ 73 void TestDefaultConverterSet(void); /* defined in cctest.c */ 74 static void TestToUCountPending(void); 75 static void TestFromUCountPending(void); 76 static void TestDefaultName(void); 77 static void TestCompareNames(void); 78 static void TestSubstString(void); 79 static void InvalidArguments(void); 80 static void TestGetName(void); 81 static void TestUTFBOM(void); 82 83 void addTestConvert(TestNode** root); 84 85 void addTestConvert(TestNode** root) 86 { 87 addTest(root, &ListNames, "tsconv/ccapitst/ListNames"); 88 addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert"); 89 addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache"); 90 addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias"); 91 addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias"); 92 addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone"); 93 #if !UCONFIG_NO_LEGACY_CONVERSION 94 addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback"); 95 #endif 96 addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID"); 97 addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932"); 98 addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968"); 99 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 100 addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar"); 101 #endif 102 addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL"); 103 addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx"); 104 addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8"); 105 addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0"); 106 addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic"); 107 addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError"); 108 addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet"); 109 #if !UCONFIG_NO_FILE_IO 110 addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending"); 111 addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending"); 112 #endif 113 addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName"); 114 addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames"); 115 addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString"); 116 addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments"); 117 addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName"); 118 addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM"); 119 } 120 121 static void ListNames(void) { 122 UErrorCode err = U_ZERO_ERROR; 123 int32_t testLong1 = 0; 124 const char* available_conv; 125 UEnumeration *allNamesEnum = NULL; 126 int32_t allNamesCount = 0; 127 uint16_t count; 128 129 log_verbose("Testing ucnv_openAllNames()..."); 130 allNamesEnum = ucnv_openAllNames(&err); 131 if(U_FAILURE(err)) { 132 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); 133 } 134 else { 135 const char *string = NULL; 136 int32_t len = 0; 137 int32_t count1 = 0; 138 int32_t count2 = 0; 139 allNamesCount = uenum_count(allNamesEnum, &err); 140 while ((string = uenum_next(allNamesEnum, &len, &err))) { 141 count1++; 142 log_verbose("read \"%s\", length %i\n", string, len); 143 } 144 if (U_FAILURE(err)) { 145 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err)); 146 err = U_ZERO_ERROR; 147 } 148 uenum_reset(allNamesEnum, &err); 149 while ((string = uenum_next(allNamesEnum, &len, &err))) { 150 count2++; 151 ucnv_close(ucnv_open(string, &err)); 152 log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable"); 153 err = U_ZERO_ERROR; 154 } 155 if (count1 != count2) { 156 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n"); 157 } 158 } 159 uenum_close(allNamesEnum); 160 err = U_ZERO_ERROR; 161 162 /*Tests ucnv_getAvailableName(), getAvialableCount()*/ 163 164 log_verbose("Testing ucnv_countAvailable()..."); 165 166 testLong1=ucnv_countAvailable(); 167 log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount); 168 169 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */ 170 171 available_conv = ucnv_getAvailableName(testLong1); 172 /*test ucnv_getAvailableName with err condition*/ 173 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 "); 174 available_conv = ucnv_getAvailableName(-1); 175 if(available_conv != NULL){ 176 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n"); 177 } 178 179 /* Test ucnv_countAliases() etc. */ 180 count = ucnv_countAliases("utf-8", &err); 181 if(U_FAILURE(err)) { 182 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); 183 } else if(count <= 0) { 184 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count); 185 } else { 186 /* try to get the aliases individually */ 187 const char *alias; 188 alias = ucnv_getAlias("utf-8", 0, &err); 189 if(U_FAILURE(err)) { 190 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err)); 191 } else if(strcmp("UTF-8", alias) != 0) { 192 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias); 193 } else { 194 uint16_t aliasNum; 195 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 196 alias = ucnv_getAlias("utf-8", aliasNum, &err); 197 if(U_FAILURE(err)) { 198 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 199 } else if(strlen(alias) > 20) { 200 /* sanity check */ 201 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias); 202 } else { 203 log_verbose("alias %d for utf-8: %s\n", aliasNum, alias); 204 } 205 } 206 if(U_SUCCESS(err)) { 207 /* try to fill an array with all aliases */ 208 const char **aliases; 209 aliases=(const char **)malloc(count * sizeof(const char *)); 210 if(aliases != 0) { 211 ucnv_getAliases("utf-8", aliases, &err); 212 if(U_FAILURE(err)) { 213 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err)); 214 } else { 215 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 216 /* compare the pointers with the ones returned individually */ 217 alias = ucnv_getAlias("utf-8", aliasNum, &err); 218 if(U_FAILURE(err)) { 219 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 220 } else if(aliases[aliasNum] != alias) { 221 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum); 222 } 223 } 224 } 225 free((char **)aliases); 226 } 227 } 228 } 229 } 230 } 231 232 233 static void TestConvert() 234 { 235 #if !UCONFIG_NO_LEGACY_CONVERSION 236 char myptr[4]; 237 char save[4]; 238 int32_t testLong1 = 0; 239 uint16_t rest = 0; 240 int32_t len = 0; 241 int32_t x = 0; 242 FILE* ucs_file_in = NULL; 243 UChar BOM = 0x0000; 244 UChar myUChar = 0x0000; 245 char* mytarget; /* [MAX_FILE_LEN] */ 246 char* mytarget_1; 247 char* mytarget_use; 248 UChar* consumedUni = NULL; 249 char* consumed = NULL; 250 char* output_cp_buffer; /* [MAX_FILE_LEN] */ 251 UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */ 252 UChar* ucs_file_buffer_use; 253 UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */ 254 UChar* my_ucs_file_buffer_1; 255 int8_t ii = 0; 256 int32_t j = 0; 257 uint16_t codepage_index = 0; 258 int32_t cp = 0; 259 UErrorCode err = U_ZERO_ERROR; 260 char ucs_file_name[UCS_FILE_NAME_SIZE]; 261 UConverterFromUCallback MIA1, MIA1_2; 262 UConverterToUCallback MIA2, MIA2_2; 263 const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2; 264 UConverter* someConverters[5]; 265 UConverter* myConverter = 0; 266 UChar* displayname = 0; 267 268 const char* locale; 269 270 UChar* uchar1 = 0; 271 UChar* uchar2 = 0; 272 UChar* uchar3 = 0; 273 int32_t targetcapacity2; 274 int32_t targetcapacity; 275 int32_t targetsize; 276 int32_t disnamelen; 277 278 const UChar* tmp_ucs_buf; 279 const UChar* tmp_consumedUni=NULL; 280 const char* tmp_mytarget_use; 281 const char* tmp_consumed; 282 283 /****************************************************************** 284 Checking Unicode -> ksc 285 ******************************************************************/ 286 287 const char* CodePagesToTest[NUM_CODEPAGE] = 288 { 289 "ibm-949_P110-1999" 290 291 292 }; 293 const uint16_t CodePageNumberToTest[NUM_CODEPAGE] = 294 { 295 949 296 }; 297 298 299 const int8_t CodePagesMinChars[NUM_CODEPAGE] = 300 { 301 1 302 303 }; 304 305 const int8_t CodePagesMaxChars[NUM_CODEPAGE] = 306 { 307 2 308 309 }; 310 311 const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] = 312 { 313 0xAFFE 314 }; 315 316 const char* CodePagesTestFiles[NUM_CODEPAGE] = 317 { 318 "uni-text.bin" 319 }; 320 321 322 const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] = 323 { 324 UCNV_IBM 325 326 }; 327 328 const char* CodePagesLocale[NUM_CODEPAGE] = 329 { 330 "ko_KR" 331 }; 332 333 UConverterFromUCallback oldFromUAction = NULL; 334 UConverterToUCallback oldToUAction = NULL; 335 const void* oldFromUContext = NULL; 336 const void* oldToUContext = NULL; 337 338 /* Allocate memory */ 339 mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0])); 340 output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0])); 341 ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0])); 342 my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0])); 343 344 ucs_file_buffer_use = ucs_file_buffer; 345 mytarget_1=mytarget; 346 mytarget_use = mytarget; 347 my_ucs_file_buffer_1=my_ucs_file_buffer; 348 349 /* flush the converter cache to get a consistent state before the flushing is tested */ 350 ucnv_flushCache(); 351 352 /*Testing ucnv_openU()*/ 353 { 354 UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/ 355 UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */ 356 UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */ 357 const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"}; 358 UChar illegalName[100]; 359 UConverter *converter=NULL; 360 err=U_ZERO_ERROR; 361 converter=ucnv_openU(converterName, &err); 362 if(U_FAILURE(err)){ 363 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err)); 364 } 365 ucnv_close(converter); 366 err=U_ZERO_ERROR; 367 converter=ucnv_openU(NULL, &err); 368 if(U_FAILURE(err)){ 369 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err)); 370 } 371 ucnv_close(converter); 372 /*testing with error value*/ 373 err=U_ILLEGAL_ARGUMENT_ERROR; 374 converter=ucnv_openU(converterName, &err); 375 if(!(converter == NULL)){ 376 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n"); 377 } 378 ucnv_close(converter); 379 err=U_ZERO_ERROR; 380 u_uastrcpy(illegalName, ""); 381 u_uastrcpy(illegalName, illegalNameChars); 382 ucnv_openU(illegalName, &err); 383 if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){ 384 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n"); 385 } 386 387 err=U_ZERO_ERROR; 388 ucnv_openU(firstSortedName, &err); 389 if(err!=U_FILE_ACCESS_ERROR){ 390 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n"); 391 } 392 393 err=U_ZERO_ERROR; 394 ucnv_openU(lastSortedName, &err); 395 if(err!=U_FILE_ACCESS_ERROR){ 396 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n"); 397 } 398 399 err=U_ZERO_ERROR; 400 } 401 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n"); 402 { 403 UConverter *cnv=NULL; 404 err=U_ZERO_ERROR; 405 cnv=ucnv_open("ibm-949,Madhu", &err); 406 if(U_FAILURE(err)){ 407 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err)); 408 } 409 ucnv_close(cnv); 410 411 } 412 /*Testing ucnv_convert()*/ 413 { 414 int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0; 415 const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00}; 416 const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00}; 417 char *target=0; 418 sourceLimit=sizeof(source)/sizeof(source[0]); 419 err=U_ZERO_ERROR; 420 targetLimit=0; 421 422 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err); 423 if(err == U_BUFFER_OVERFLOW_ERROR){ 424 err=U_ZERO_ERROR; 425 targetLimit=targetCapacity+1; 426 target=(char*)malloc(sizeof(char) * targetLimit); 427 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 428 } 429 if(U_FAILURE(err)){ 430 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err)); 431 } 432 else { 433 for(i=0; i<targetCapacity; i++){ 434 if(target[i] != expectedTarget[i]){ 435 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]); 436 } 437 } 438 439 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err); 440 if(U_FAILURE(err) || i!=7){ 441 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n", 442 u_errorName(err), i); 443 } 444 445 /*Test error conditions*/ 446 err=U_ZERO_ERROR; 447 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err); 448 if(i !=0){ 449 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n"); 450 } 451 452 err=U_ILLEGAL_ARGUMENT_ERROR; 453 sourceLimit=sizeof(source)/sizeof(source[0]); 454 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 455 if(i !=0 ){ 456 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n"); 457 } 458 459 err=U_ZERO_ERROR; 460 sourceLimit=sizeof(source)/sizeof(source[0]); 461 targetLimit=0; 462 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 463 if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){ 464 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n"); 465 } 466 err=U_ZERO_ERROR; 467 free(target); 468 } 469 } 470 471 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/ 472 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n"); 473 err=U_ILLEGAL_ARGUMENT_ERROR; 474 if(ucnv_open(NULL, &err) != NULL){ 475 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 476 } 477 if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){ 478 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 479 } 480 err=U_ZERO_ERROR; 481 482 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */ 483 log_verbose("\n---Testing ucnv_open default...\n"); 484 someConverters[0] = ucnv_open(NULL,&err); 485 someConverters[1] = ucnv_open(NULL,&err); 486 someConverters[2] = ucnv_open("utf8", &err); 487 someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err); 488 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */ 489 if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));} 490 491 /* Testing ucnv_getName()*/ 492 /*default code page */ 493 ucnv_getName(someConverters[0], &err); 494 if(U_FAILURE(err)) { 495 log_data_err("getName[0] failed\n"); 496 } else { 497 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err)); 498 } 499 ucnv_getName(someConverters[1], &err); 500 if(U_FAILURE(err)) { 501 log_data_err("getName[1] failed\n"); 502 } else { 503 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err)); 504 } 505 506 ucnv_close(someConverters[0]); 507 ucnv_close(someConverters[1]); 508 ucnv_close(someConverters[2]); 509 ucnv_close(someConverters[3]); 510 511 512 for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index) 513 { 514 int32_t i = 0; 515 516 err = U_ZERO_ERROR; 517 #ifdef U_TOPSRCDIR 518 strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING); 519 #else 520 strcpy(ucs_file_name, loadTestData(&err)); 521 522 if(U_FAILURE(err)){ 523 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err)); 524 return; 525 } 526 527 { 528 char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR); 529 530 if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){ 531 *(index+1)=0; 532 } 533 } 534 535 strcat(ucs_file_name,".."U_FILE_SEP_STRING); 536 #endif 537 strcat(ucs_file_name, CodePagesTestFiles[codepage_index]); 538 539 ucs_file_in = fopen(ucs_file_name,"rb"); 540 if (!ucs_file_in) 541 { 542 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); 543 return; 544 } 545 546 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/ 547 548 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */ 549 /* ucnv_flushCache(); */ 550 myConverter =ucnv_open( "ibm-949", &err); 551 if (!myConverter || U_FAILURE(err)) 552 { 553 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err)); 554 fclose(ucs_file_in); 555 break; 556 } 557 558 /*testing for ucnv_getName() */ 559 log_verbose("Testing ucnv_getName()...\n"); 560 ucnv_getName(myConverter, &err); 561 if(U_FAILURE(err)) 562 log_err("Error in getName\n"); 563 else 564 { 565 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err)); 566 } 567 if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) 568 log_err("getName failed\n"); 569 else 570 log_verbose("getName ok\n"); 571 /*Test getName with error condition*/ 572 { 573 const char* name=0; 574 err=U_ILLEGAL_ARGUMENT_ERROR; 575 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR"); 576 name=ucnv_getName(myConverter, &err); 577 if(name != NULL){ 578 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail"); 579 } 580 err=U_ZERO_ERROR; 581 } 582 583 584 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/ 585 586 log_verbose("Testing ucnv_getMaxCharSize()...\n"); 587 if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index]) 588 log_verbose("Max byte per character OK\n"); 589 else 590 log_err("Max byte per character failed\n"); 591 592 log_verbose("\n---Testing ucnv_getMinCharSize()...\n"); 593 if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index]) 594 log_verbose("Min byte per character OK\n"); 595 else 596 log_err("Min byte per character failed\n"); 597 598 599 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/ 600 log_verbose("\n---Testing ucnv_getSubstChars...\n"); 601 ii=4; 602 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 603 if (ii <= 0) { 604 log_err("ucnv_getSubstChars returned a negative number %d\n", ii); 605 } 606 607 for(x=0;x<ii;x++) 608 rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]); 609 if (rest==CodePagesSubstitutionChars[codepage_index]) 610 log_verbose("Substitution character ok\n"); 611 else 612 log_err("Substitution character failed.\n"); 613 614 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n"); 615 ucnv_setSubstChars(myConverter, myptr, ii, &err); 616 if (U_FAILURE(err)) 617 { 618 log_err("FAILURE! %s\n", myErrorName(err)); 619 } 620 ucnv_getSubstChars(myConverter,save, &ii, &err); 621 if (U_FAILURE(err)) 622 { 623 log_err("FAILURE! %s\n", myErrorName(err)); 624 } 625 626 if (strncmp(save, myptr, ii)) 627 log_err("Saved substitution character failed\n"); 628 else 629 log_verbose("Saved substitution character ok\n"); 630 631 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/ 632 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n"); 633 ii=1; 634 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 635 if(err != U_INDEX_OUTOFBOUNDS_ERROR){ 636 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err)); 637 } 638 err=U_ZERO_ERROR; 639 ii=4; 640 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 641 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n"); 642 ucnv_setSubstChars(myConverter, myptr, 0, &err); 643 if(err != U_ILLEGAL_ARGUMENT_ERROR){ 644 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err)); 645 } 646 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n"); 647 strcpy(myptr, "abc"); 648 ucnv_setSubstChars(myConverter, myptr, ii, &err); 649 err=U_ZERO_ERROR; 650 ucnv_getSubstChars(myConverter, save, &ii, &err); 651 if(strncmp(save, myptr, ii) == 0){ 652 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n"); 653 } 654 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n"); 655 err=U_ZERO_ERROR; 656 strcpy(myptr, "abc"); 657 ucnv_setSubstChars(myConverter, myptr, ii, &err); 658 err=U_ILLEGAL_ARGUMENT_ERROR; 659 ucnv_getSubstChars(myConverter, save, &ii, &err); 660 if(strncmp(save, myptr, ii) == 0){ 661 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n"); 662 } 663 err=U_ZERO_ERROR; 664 /*------*/ 665 666 #ifdef U_ENABLE_GENERIC_ISO_2022 667 /*resetState ucnv_reset()*/ 668 log_verbose("\n---Testing ucnv_reset()..\n"); 669 ucnv_reset(myConverter); 670 { 671 UChar32 c; 672 const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80}; 673 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 674 UConverter *cnv=ucnv_open("ISO_2022", &err); 675 if(U_FAILURE(err)) { 676 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 677 } 678 c=ucnv_getNextUChar(cnv, &source, limit, &err); 679 if((U_FAILURE(err) || c != (UChar32)0x0031)) { 680 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err)); 681 } 682 ucnv_reset(cnv); 683 ucnv_close(cnv); 684 685 } 686 #endif 687 688 /*getDisplayName*/ 689 log_verbose("\n---Testing ucnv_getDisplayName()...\n"); 690 locale=CodePagesLocale[codepage_index]; 691 len=0; 692 displayname=NULL; 693 disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err); 694 if(err==U_BUFFER_OVERFLOW_ERROR) { 695 err=U_ZERO_ERROR; 696 displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar)); 697 ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err); 698 if(U_FAILURE(err)) { 699 log_err("getDisplayName failed. The error is %s\n", myErrorName(err)); 700 } 701 else { 702 log_verbose(" getDisplayName o.k.\n"); 703 } 704 free(displayname); 705 displayname=NULL; 706 } 707 else { 708 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err)); 709 } 710 /*test ucnv_getDiaplayName with error condition*/ 711 err= U_ILLEGAL_ARGUMENT_ERROR; 712 len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err); 713 if( len !=0 ){ 714 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n"); 715 } 716 /*test ucnv_getDiaplayName with error condition*/ 717 err=U_ZERO_ERROR; 718 len=ucnv_getDisplayName(NULL,locale,NULL,0, &err); 719 if( len !=0 || U_SUCCESS(err)){ 720 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n"); 721 } 722 err=U_ZERO_ERROR; 723 724 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/ 725 ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context); 726 727 log_verbose("\n---Testing ucnv_setFromUCallBack...\n"); 728 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 729 if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context) 730 { 731 log_err("FAILURE! %s\n", myErrorName(err)); 732 } 733 734 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 735 if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM) 736 log_err("get From UCallBack failed\n"); 737 else 738 log_verbose("get From UCallBack ok\n"); 739 740 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n"); 741 ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err); 742 if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM) 743 { 744 log_err("FAILURE! %s\n", myErrorName(err)); 745 } 746 747 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 748 if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context) 749 log_err("get From UCallBack action failed\n"); 750 else 751 log_verbose("get From UCallBack action ok\n"); 752 753 /*testing ucnv_setToUCallBack with error conditions*/ 754 err=U_ILLEGAL_ARGUMENT_ERROR; 755 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n"); 756 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 757 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 758 if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){ 759 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 760 } 761 err=U_ZERO_ERROR; 762 763 764 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/ 765 ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context); 766 767 log_verbose("\n---Testing setTo UCallBack...\n"); 768 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err); 769 if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context) 770 { 771 log_err("FAILURE! %s\n", myErrorName(err)); 772 } 773 774 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 775 if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM) 776 log_err("To UCallBack failed\n"); 777 else 778 log_verbose("To UCallBack ok\n"); 779 780 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n"); 781 ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err); 782 if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM) 783 { log_err("FAILURE! %s\n", myErrorName(err)); } 784 785 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 786 if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context) 787 log_err("To UCallBack failed\n"); 788 else 789 log_verbose("To UCallBack ok\n"); 790 791 /*testing ucnv_setToUCallBack with error conditions*/ 792 err=U_ILLEGAL_ARGUMENT_ERROR; 793 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n"); 794 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err); 795 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 796 if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){ 797 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 798 } 799 err=U_ZERO_ERROR; 800 801 802 /*getcodepageid testing ucnv_getCCSID() */ 803 log_verbose("\n----Testing getCCSID....\n"); 804 cp = ucnv_getCCSID(myConverter,&err); 805 if (U_FAILURE(err)) 806 { 807 log_err("FAILURE!..... %s\n", myErrorName(err)); 808 } 809 if (cp != CodePageNumberToTest[codepage_index]) 810 log_err("Codepage number test failed\n"); 811 else 812 log_verbose("Codepage number test OK\n"); 813 814 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/ 815 err=U_ILLEGAL_ARGUMENT_ERROR; 816 if( ucnv_getCCSID(myConverter,&err) != -1){ 817 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n"); 818 } 819 err=U_ZERO_ERROR; 820 821 /*getCodepagePlatform testing ucnv_getPlatform()*/ 822 log_verbose("\n---Testing getCodepagePlatform ..\n"); 823 if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err)) 824 log_err("Platform codepage test failed\n"); 825 else 826 log_verbose("Platform codepage test ok\n"); 827 828 if (U_FAILURE(err)) 829 { 830 log_err("FAILURE! %s\n", myErrorName(err)); 831 } 832 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/ 833 err= U_ILLEGAL_ARGUMENT_ERROR; 834 if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){ 835 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n"); 836 } 837 err=U_ZERO_ERROR; 838 839 840 /*Reads the BOM*/ 841 fread(&BOM, sizeof(UChar), 1, ucs_file_in); 842 if (BOM!=0xFEFF && BOM!=0xFFFE) 843 { 844 log_err("File Missing BOM...Bailing!\n"); 845 fclose(ucs_file_in); 846 break; 847 } 848 849 850 /*Reads in the file*/ 851 while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in))) 852 { 853 myUChar = ucs_file_buffer[i-1]; 854 855 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/ 856 } 857 858 myUChar = ucs_file_buffer[i-1]; 859 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/ 860 861 862 /*testing ucnv_fromUChars() and ucnv_toUChars() */ 863 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/ 864 865 uchar1=(UChar*)malloc(sizeof(UChar) * (i+1)); 866 u_uastrcpy(uchar1,""); 867 u_strncpy(uchar1,ucs_file_buffer,i); 868 uchar1[i] = 0; 869 870 uchar3=(UChar*)malloc(sizeof(UChar)*(i+1)); 871 u_uastrcpy(uchar3,""); 872 u_strncpy(uchar3,ucs_file_buffer,i); 873 uchar3[i] = 0; 874 875 /*Calls the Conversion Routine */ 876 testLong1 = MAX_FILE_LEN; 877 log_verbose("\n---Testing ucnv_fromUChars()\n"); 878 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 879 if (U_FAILURE(err)) 880 { 881 log_err("\nFAILURE...%s\n", myErrorName(err)); 882 } 883 else 884 log_verbose(" ucnv_fromUChars() o.k.\n"); 885 886 /*test the conversion routine */ 887 log_verbose("\n---Testing ucnv_toUChars()\n"); 888 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */ 889 targetcapacity2=0; 890 targetsize = ucnv_toUChars(myConverter, 891 NULL, 892 targetcapacity2, 893 output_cp_buffer, 894 strlen(output_cp_buffer), 895 &err); 896 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/ 897 898 if(err==U_BUFFER_OVERFLOW_ERROR) 899 { 900 err=U_ZERO_ERROR; 901 uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar)); 902 targetsize = ucnv_toUChars(myConverter, 903 uchar2, 904 targetsize+1, 905 output_cp_buffer, 906 strlen(output_cp_buffer), 907 &err); 908 909 if(U_FAILURE(err)) 910 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err)); 911 else 912 log_verbose(" ucnv_toUChars() o.k.\n"); 913 914 if(u_strcmp(uchar1,uchar2)!=0) 915 log_err("equality test failed with conversion routine\n"); 916 } 917 else 918 { 919 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n"); 920 } 921 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/ 922 err=U_ILLEGAL_ARGUMENT_ERROR; 923 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n"); 924 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 925 if (targetcapacity !=0) { 926 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 927 } 928 err=U_ZERO_ERROR; 929 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n"); 930 targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err); 931 if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) { 932 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n"); 933 } 934 err=U_ZERO_ERROR; 935 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n"); 936 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err); 937 if (targetcapacity !=0) { 938 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n"); 939 } 940 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n"); 941 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err); 942 if (err != U_BUFFER_OVERFLOW_ERROR) { 943 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n"); 944 } 945 /*toUChars with error conditions*/ 946 targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err); 947 if(targetsize != 0){ 948 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 949 } 950 err=U_ZERO_ERROR; 951 targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err); 952 if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){ 953 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n"); 954 } 955 err=U_ZERO_ERROR; 956 targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err); 957 if (targetsize !=0) { 958 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n"); 959 } 960 targetcapacity2=0; 961 targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err); 962 if (err != U_STRING_NOT_TERMINATED_WARNING) { 963 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n", 964 u_errorName(err)); 965 } 966 err=U_ZERO_ERROR; 967 /*-----*/ 968 969 970 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */ 971 /*Clean up re-usable vars*/ 972 j=0; 973 log_verbose("Testing ucnv_fromUnicode().....\n"); 974 tmp_ucs_buf=ucs_file_buffer_use; 975 ucnv_fromUnicode(myConverter, &mytarget_1, 976 mytarget + MAX_FILE_LEN, 977 &tmp_ucs_buf, 978 ucs_file_buffer_use+i, 979 NULL, 980 TRUE, 981 &err); 982 consumedUni = (UChar*)tmp_consumedUni; 983 984 if (U_FAILURE(err)) 985 { 986 log_err("FAILURE! %s\n", myErrorName(err)); 987 } 988 else 989 log_verbose("ucnv_fromUnicode() o.k.\n"); 990 991 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */ 992 log_verbose("Testing ucnv_toUnicode().....\n"); 993 tmp_mytarget_use=mytarget_use; 994 tmp_consumed = consumed; 995 ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1, 996 my_ucs_file_buffer + MAX_FILE_LEN, 997 &tmp_mytarget_use, 998 mytarget_use + (mytarget_1 - mytarget), 999 NULL, 1000 FALSE, 1001 &err); 1002 consumed = (char*)tmp_consumed; 1003 if (U_FAILURE(err)) 1004 { 1005 log_err("FAILURE! %s\n", myErrorName(err)); 1006 } 1007 else 1008 log_verbose("ucnv_toUnicode() o.k.\n"); 1009 1010 1011 log_verbose("\n---Testing RoundTrip ...\n"); 1012 1013 1014 u_strncpy(uchar3, my_ucs_file_buffer,i); 1015 uchar3[i] = 0; 1016 1017 if(u_strcmp(uchar1,uchar3)==0) 1018 log_verbose("Equality test o.k.\n"); 1019 else 1020 log_err("Equality test failed\n"); 1021 1022 /*sanity compare */ 1023 if(uchar2 == NULL) 1024 { 1025 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__); 1026 } 1027 else 1028 { 1029 if(u_strcmp(uchar2, uchar3)==0) 1030 log_verbose("Equality test o.k.\n"); 1031 else 1032 log_err("Equality test failed\n"); 1033 } 1034 1035 fclose(ucs_file_in); 1036 ucnv_close(myConverter); 1037 if (uchar1 != 0) free(uchar1); 1038 if (uchar2 != 0) free(uchar2); 1039 if (uchar3 != 0) free(uchar3); 1040 } 1041 1042 free((void*)mytarget); 1043 free((void*)output_cp_buffer); 1044 free((void*)ucs_file_buffer); 1045 free((void*)my_ucs_file_buffer); 1046 #endif 1047 } 1048 1049 #if !UCONFIG_NO_LEGACY_CONVERSION 1050 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA) 1051 { 1052 return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP; 1053 } 1054 1055 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA) 1056 { 1057 return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP; 1058 } 1059 #endif 1060 1061 static void TestFlushCache(void) { 1062 #if !UCONFIG_NO_LEGACY_CONVERSION 1063 UErrorCode err = U_ZERO_ERROR; 1064 UConverter* someConverters[5]; 1065 int flushCount = 0; 1066 1067 /* flush the converter cache to get a consistent state before the flushing is tested */ 1068 ucnv_flushCache(); 1069 1070 /*Testing ucnv_open()*/ 1071 /* Note: These converters have been chosen because they do NOT 1072 encode the Latin characters (U+0041, ...), and therefore are 1073 highly unlikely to be chosen as system default codepages */ 1074 1075 someConverters[0] = ucnv_open("ibm-1047", &err); 1076 if (U_FAILURE(err)) { 1077 log_data_err("FAILURE! %s\n", myErrorName(err)); 1078 } 1079 1080 someConverters[1] = ucnv_open("ibm-1047", &err); 1081 if (U_FAILURE(err)) { 1082 log_data_err("FAILURE! %s\n", myErrorName(err)); 1083 } 1084 1085 someConverters[2] = ucnv_open("ibm-1047", &err); 1086 if (U_FAILURE(err)) { 1087 log_data_err("FAILURE! %s\n", myErrorName(err)); 1088 } 1089 1090 someConverters[3] = ucnv_open("gb18030", &err); 1091 if (U_FAILURE(err)) { 1092 log_data_err("FAILURE! %s\n", myErrorName(err)); 1093 } 1094 1095 someConverters[4] = ucnv_open("ibm-954", &err); 1096 if (U_FAILURE(err)) { 1097 log_data_err("FAILURE! %s\n", myErrorName(err)); 1098 } 1099 1100 1101 /* Testing ucnv_flushCache() */ 1102 log_verbose("\n---Testing ucnv_flushCache...\n"); 1103 if ((flushCount=ucnv_flushCache())==0) 1104 log_verbose("Flush cache ok\n"); 1105 else 1106 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1107 1108 /*testing ucnv_close() and ucnv_flushCache() */ 1109 ucnv_close(someConverters[0]); 1110 ucnv_close(someConverters[1]); 1111 1112 if ((flushCount=ucnv_flushCache())==0) 1113 log_verbose("Flush cache ok\n"); 1114 else 1115 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1116 1117 ucnv_close(someConverters[2]); 1118 ucnv_close(someConverters[3]); 1119 1120 if ((flushCount=ucnv_flushCache())==2) 1121 log_verbose("Flush cache ok\n"); /*because first, second and third are same */ 1122 else 1123 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n", 1124 __LINE__, 1125 flushCount); 1126 1127 ucnv_close(someConverters[4]); 1128 if ( (flushCount=ucnv_flushCache())==1) 1129 log_verbose("Flush cache ok\n"); 1130 else 1131 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount); 1132 #endif 1133 } 1134 1135 /** 1136 * Test the converter alias API, specifically the fuzzy matching of 1137 * alias names and the alias table integrity. Make sure each 1138 * converter has at least one alias (itself), and that its listed 1139 * aliases map back to itself. Check some hard-coded UTF-8 and 1140 * ISO_2022 aliases to make sure they work. 1141 */ 1142 static void TestAlias() { 1143 int32_t i, ncnv; 1144 UErrorCode status = U_ZERO_ERROR; 1145 1146 /* Predetermined aliases that we expect to map back to ISO_2022 1147 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */ 1148 const char* ISO_2022_NAMES[] = 1149 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2", 1150 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"}; 1151 int32_t ISO_2022_NAMES_LENGTH = 1152 sizeof(ISO_2022_NAMES) / sizeof(ISO_2022_NAMES[0]); 1153 const char *UTF8_NAMES[] = 1154 { "UTF-8", "utf-8", "utf8", "ibm-1208", 1155 "utf_8", "ibm1208", "cp1208" }; 1156 int32_t UTF8_NAMES_LENGTH = 1157 sizeof(UTF8_NAMES) / sizeof(UTF8_NAMES[0]); 1158 1159 struct { 1160 const char *name; 1161 const char *alias; 1162 } CONVERTERS_NAMES[] = { 1163 { "UTF-32BE", "UTF32_BigEndian" }, 1164 { "UTF-32LE", "UTF32_LittleEndian" }, 1165 { "UTF-32", "ISO-10646-UCS-4" }, 1166 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" }, 1167 { "UTF-32", "ucs-4" } 1168 }; 1169 int32_t CONVERTERS_NAMES_LENGTH = sizeof(CONVERTERS_NAMES) / sizeof(*CONVERTERS_NAMES); 1170 1171 /* When there are bugs in gencnval or in ucnv_io, converters can 1172 appear to have no aliases. */ 1173 ncnv = ucnv_countAvailable(); 1174 log_verbose("%d converters\n", ncnv); 1175 for (i=0; i<ncnv; ++i) { 1176 const char *name = ucnv_getAvailableName(i); 1177 const char *alias0; 1178 uint16_t na = ucnv_countAliases(name, &status); 1179 uint16_t j; 1180 UConverter *cnv; 1181 1182 if (na == 0) { 1183 log_err("FAIL: Converter \"%s\" (i=%d)" 1184 " has no aliases; expect at least one\n", 1185 name, i); 1186 continue; 1187 } 1188 cnv = ucnv_open(name, &status); 1189 if (U_FAILURE(status)) { 1190 log_data_err("FAIL: Converter \"%s\" (i=%d)" 1191 " can't be opened.\n", 1192 name, i); 1193 } 1194 else { 1195 if (strcmp(ucnv_getName(cnv, &status), name) != 0 1196 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) { 1197 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. " 1198 "The should be the same\n", 1199 name, ucnv_getName(cnv, &status)); 1200 } 1201 } 1202 ucnv_close(cnv); 1203 1204 status = U_ZERO_ERROR; 1205 alias0 = ucnv_getAlias(name, 0, &status); 1206 for (j=1; j<na; ++j) { 1207 const char *alias; 1208 /* Make sure each alias maps back to the the same list of 1209 aliases. Assume that if alias 0 is the same, the whole 1210 list is the same (this should always be true). */ 1211 const char *mapBack; 1212 1213 status = U_ZERO_ERROR; 1214 alias = ucnv_getAlias(name, j, &status); 1215 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1216 log_err("FAIL: Converter \"%s\"is ambiguous\n", name); 1217 } 1218 1219 if (alias == NULL) { 1220 log_err("FAIL: Converter \"%s\" -> " 1221 "alias[%d]=NULL\n", 1222 name, j); 1223 continue; 1224 } 1225 1226 mapBack = ucnv_getAlias(alias, 0, &status); 1227 1228 if (mapBack == NULL) { 1229 log_err("FAIL: Converter \"%s\" -> " 1230 "alias[%d]=\"%s\" -> " 1231 "alias[0]=NULL, exp. \"%s\"\n", 1232 name, j, alias, alias0); 1233 continue; 1234 } 1235 1236 if (0 != strcmp(alias0, mapBack)) { 1237 int32_t idx; 1238 UBool foundAlias = FALSE; 1239 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1240 /* Make sure that we only get this mismapping when there is 1241 an ambiguous alias, and the other converter has this alias too. */ 1242 for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) { 1243 if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) { 1244 foundAlias = TRUE; 1245 break; 1246 } 1247 } 1248 } 1249 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */ 1250 1251 if (!foundAlias) { 1252 log_err("FAIL: Converter \"%s\" -> " 1253 "alias[%d]=\"%s\" -> " 1254 "alias[0]=\"%s\", exp. \"%s\"\n", 1255 name, j, alias, mapBack, alias0); 1256 } 1257 } 1258 } 1259 } 1260 1261 1262 /* Check a list of predetermined aliases that we expect to map 1263 * back to ISO_2022 and UTF-8. */ 1264 for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) { 1265 const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status); 1266 if(!mapBack) { 1267 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]); 1268 continue; 1269 } 1270 if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) { 1271 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n", 1272 ISO_2022_NAMES[i], mapBack); 1273 } 1274 } 1275 1276 1277 for (i=1; i<UTF8_NAMES_LENGTH; ++i) { 1278 const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status); 1279 if(!mapBack) { 1280 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]); 1281 continue; 1282 } 1283 if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) { 1284 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n", 1285 UTF8_NAMES[i], mapBack); 1286 } 1287 } 1288 1289 /* 1290 * Check a list of predetermined aliases that we expect to map 1291 * back to predermined converter names. 1292 */ 1293 1294 for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) { 1295 const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status); 1296 if(!mapBack) { 1297 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name); 1298 continue; 1299 } 1300 if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) { 1301 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n", 1302 CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name); 1303 } 1304 } 1305 1306 } 1307 1308 static void TestDuplicateAlias(void) { 1309 const char *alias; 1310 UErrorCode status = U_ZERO_ERROR; 1311 1312 status = U_ZERO_ERROR; 1313 alias = ucnv_getStandardName("Shift_JIS", "IBM", &status); 1314 if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1315 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); 1316 } 1317 status = U_ZERO_ERROR; 1318 alias = ucnv_getStandardName("ibm-943", "IANA", &status); 1319 if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1320 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias); 1321 } 1322 status = U_ZERO_ERROR; 1323 alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status); 1324 if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) { 1325 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias); 1326 } 1327 } 1328 1329 1330 /* Test safe clone callback */ 1331 1332 static uint32_t TSCC_nextSerial() 1333 { 1334 static uint32_t n = 1; 1335 1336 return (n++); 1337 } 1338 1339 typedef struct 1340 { 1341 uint32_t magic; /* 0xC0FFEE to identify that the object is OK */ 1342 uint32_t serial; /* minted from nextSerial, above */ 1343 UBool wasClosed; /* close happened on the object */ 1344 } TSCCContext; 1345 1346 static TSCCContext *TSCC_clone(TSCCContext *ctx) 1347 { 1348 TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext)); 1349 1350 newCtx->serial = TSCC_nextSerial(); 1351 newCtx->wasClosed = 0; 1352 newCtx->magic = 0xC0FFEE; 1353 1354 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial); 1355 1356 return newCtx; 1357 } 1358 1359 #if !UCONFIG_NO_LEGACY_CONVERSION 1360 static void TSCC_fromU(const void *context, 1361 UConverterFromUnicodeArgs *fromUArgs, 1362 const UChar* codeUnits, 1363 int32_t length, 1364 UChar32 codePoint, 1365 UConverterCallbackReason reason, 1366 UErrorCode * err) 1367 { 1368 TSCCContext *ctx = (TSCCContext*)context; 1369 UConverterFromUCallback junkFrom; 1370 1371 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter); 1372 1373 if(ctx->magic != 0xC0FFEE) { 1374 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1375 return; 1376 } 1377 1378 if(reason == UCNV_CLONE) { 1379 UErrorCode subErr = U_ZERO_ERROR; 1380 TSCCContext *newCtx; 1381 TSCCContext *junkCtx; 1382 TSCCContext **pjunkCtx = &junkCtx; 1383 1384 /* "recreate" it */ 1385 log_verbose("TSCC_fromU: cloning..\n"); 1386 newCtx = TSCC_clone(ctx); 1387 1388 if(newCtx == NULL) { 1389 log_err("TSCC_fromU: internal clone failed on %p\n", ctx); 1390 } 1391 1392 /* now, SET it */ 1393 ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1394 ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1395 1396 if(U_FAILURE(subErr)) { 1397 *err = subErr; 1398 } 1399 } 1400 1401 if(reason == UCNV_CLOSE) { 1402 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial); 1403 ctx->wasClosed = TRUE; 1404 } 1405 } 1406 1407 static void TSCC_toU(const void *context, 1408 UConverterToUnicodeArgs *toUArgs, 1409 const char* codeUnits, 1410 int32_t length, 1411 UConverterCallbackReason reason, 1412 UErrorCode * err) 1413 { 1414 TSCCContext *ctx = (TSCCContext*)context; 1415 UConverterToUCallback junkFrom; 1416 1417 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter); 1418 1419 if(ctx->magic != 0xC0FFEE) { 1420 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1421 return; 1422 } 1423 1424 if(reason == UCNV_CLONE) { 1425 UErrorCode subErr = U_ZERO_ERROR; 1426 TSCCContext *newCtx; 1427 TSCCContext *junkCtx; 1428 TSCCContext **pjunkCtx = &junkCtx; 1429 1430 /* "recreate" it */ 1431 log_verbose("TSCC_toU: cloning..\n"); 1432 newCtx = TSCC_clone(ctx); 1433 1434 if(newCtx == NULL) { 1435 log_err("TSCC_toU: internal clone failed on %p\n", ctx); 1436 } 1437 1438 /* now, SET it */ 1439 ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1440 ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1441 1442 if(U_FAILURE(subErr)) { 1443 *err = subErr; 1444 } 1445 } 1446 1447 if(reason == UCNV_CLOSE) { 1448 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial); 1449 ctx->wasClosed = TRUE; 1450 } 1451 } 1452 1453 static void TSCC_init(TSCCContext *q) 1454 { 1455 q->magic = 0xC0FFEE; 1456 q->serial = TSCC_nextSerial(); 1457 q->wasClosed = 0; 1458 } 1459 1460 static void TSCC_print_log(TSCCContext *q, const char *name) 1461 { 1462 if(q==NULL) { 1463 log_verbose("TSCContext: %s is NULL!!\n", name); 1464 } else { 1465 if(q->magic != 0xC0FFEE) { 1466 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n", 1467 q,q->serial, q->magic); 1468 } 1469 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n", 1470 q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open"); 1471 } 1472 } 1473 1474 static void TestConvertSafeCloneCallback() 1475 { 1476 UErrorCode err = U_ZERO_ERROR; 1477 TSCCContext from1, to1; 1478 TSCCContext *from2, *from3, *to2, *to3; 1479 TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3; 1480 char hunk[8192]; 1481 int32_t hunkSize = 8192; 1482 UConverterFromUCallback junkFrom; 1483 UConverterToUCallback junkTo; 1484 UConverter *conv1, *conv2 = NULL; 1485 1486 conv1 = ucnv_open("iso-8859-3", &err); 1487 1488 if(U_FAILURE(err)) { 1489 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err)); 1490 return; 1491 } 1492 1493 log_verbose("Opened conv1=%p\n", conv1); 1494 1495 TSCC_init(&from1); 1496 TSCC_init(&to1); 1497 1498 TSCC_print_log(&from1, "from1"); 1499 TSCC_print_log(&to1, "to1"); 1500 1501 ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err); 1502 log_verbose("Set from1 on conv1\n"); 1503 TSCC_print_log(&from1, "from1"); 1504 1505 ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err); 1506 log_verbose("Set to1 on conv1\n"); 1507 TSCC_print_log(&to1, "to1"); 1508 1509 conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err); 1510 if(U_FAILURE(err)) { 1511 log_err("safeClone failed: %s\n", u_errorName(err)); 1512 return; 1513 } 1514 log_verbose("Cloned to conv2=%p.\n", conv2); 1515 1516 /********** from *********************/ 1517 ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2); 1518 ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3); 1519 1520 TSCC_print_log(from2, "from2"); 1521 TSCC_print_log(from3, "from3(==from1)"); 1522 1523 if(from2 == NULL) { 1524 log_err("FAIL! from2 is null \n"); 1525 return; 1526 } 1527 1528 if(from3 == NULL) { 1529 log_err("FAIL! from3 is null \n"); 1530 return; 1531 } 1532 1533 if(from3 != (&from1) ) { 1534 log_err("FAIL! conv1's FROM context changed!\n"); 1535 } 1536 1537 if(from2 == (&from1) ) { 1538 log_err("FAIL! conv1's FROM context is the same as conv2's!\n"); 1539 } 1540 1541 if(from1.wasClosed) { 1542 log_err("FAIL! from1 is closed \n"); 1543 } 1544 1545 if(from2->wasClosed) { 1546 log_err("FAIL! from2 was closed\n"); 1547 } 1548 1549 /********** to *********************/ 1550 ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2); 1551 ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3); 1552 1553 TSCC_print_log(to2, "to2"); 1554 TSCC_print_log(to3, "to3(==to1)"); 1555 1556 if(to2 == NULL) { 1557 log_err("FAIL! to2 is null \n"); 1558 return; 1559 } 1560 1561 if(to3 == NULL) { 1562 log_err("FAIL! to3 is null \n"); 1563 return; 1564 } 1565 1566 if(to3 != (&to1) ) { 1567 log_err("FAIL! conv1's TO context changed!\n"); 1568 } 1569 1570 if(to2 == (&to1) ) { 1571 log_err("FAIL! conv1's TO context is the same as conv2's!\n"); 1572 } 1573 1574 if(to1.wasClosed) { 1575 log_err("FAIL! to1 is closed \n"); 1576 } 1577 1578 if(to2->wasClosed) { 1579 log_err("FAIL! to2 was closed\n"); 1580 } 1581 1582 /*************************************/ 1583 1584 ucnv_close(conv1); 1585 log_verbose("ucnv_closed (conv1)\n"); 1586 TSCC_print_log(&from1, "from1"); 1587 TSCC_print_log(from2, "from2"); 1588 TSCC_print_log(&to1, "to1"); 1589 TSCC_print_log(to2, "to2"); 1590 1591 if(from1.wasClosed == FALSE) { 1592 log_err("FAIL! from1 is NOT closed \n"); 1593 } 1594 1595 if(from2->wasClosed) { 1596 log_err("FAIL! from2 was closed\n"); 1597 } 1598 1599 if(to1.wasClosed == FALSE) { 1600 log_err("FAIL! to1 is NOT closed \n"); 1601 } 1602 1603 if(to2->wasClosed) { 1604 log_err("FAIL! to2 was closed\n"); 1605 } 1606 1607 ucnv_close(conv2); 1608 log_verbose("ucnv_closed (conv2)\n"); 1609 1610 TSCC_print_log(&from1, "from1"); 1611 TSCC_print_log(from2, "from2"); 1612 1613 if(from1.wasClosed == FALSE) { 1614 log_err("FAIL! from1 is NOT closed \n"); 1615 } 1616 1617 if(from2->wasClosed == FALSE) { 1618 log_err("FAIL! from2 was NOT closed\n"); 1619 } 1620 1621 TSCC_print_log(&to1, "to1"); 1622 TSCC_print_log(to2, "to2"); 1623 1624 if(to1.wasClosed == FALSE) { 1625 log_err("FAIL! to1 is NOT closed \n"); 1626 } 1627 1628 if(to2->wasClosed == FALSE) { 1629 log_err("FAIL! to2 was NOT closed\n"); 1630 } 1631 1632 if(to2 != (&to1)) { 1633 free(to2); /* to1 is stack based */ 1634 } 1635 if(from2 != (&from1)) { 1636 free(from2); /* from1 is stack based */ 1637 } 1638 } 1639 #endif 1640 1641 static UBool 1642 containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) { 1643 while(length>0) { 1644 if(*p!=b) { 1645 return TRUE; 1646 } 1647 ++p; 1648 --length; 1649 } 1650 return FALSE; 1651 } 1652 1653 static void TestConvertSafeClone() 1654 { 1655 /* one 'regular' & all the 'private stateful' converters */ 1656 static const char *const names[] = { 1657 #if !UCONFIG_NO_LEGACY_CONVERSION 1658 "ibm-1047", 1659 "ISO_2022,locale=zh,version=1", 1660 #endif 1661 "SCSU", 1662 #if !UCONFIG_NO_LEGACY_CONVERSION 1663 "HZ", 1664 "lmbcs", 1665 "ISCII,version=0", 1666 "ISO_2022,locale=kr,version=1", 1667 "ISO_2022,locale=jp,version=2", 1668 #endif 1669 "BOCU-1", 1670 "UTF-7", 1671 #if !UCONFIG_NO_LEGACY_CONVERSION 1672 "IMAP-mailbox-name", 1673 "ibm-1047-s390" 1674 #else 1675 "IMAP=mailbox-name" 1676 #endif 1677 }; 1678 1679 /* store the actual sizes of each converter */ 1680 int32_t actualSizes[LENGTHOF(names)]; 1681 1682 static const int32_t bufferSizes[] = { 1683 U_CNV_SAFECLONE_BUFFERSIZE, 1684 (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */ 1685 (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */ 1686 }; 1687 1688 char charBuffer[21]; /* Leave at an odd number for alignment testing */ 1689 uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE]; 1690 int32_t bufferSize, maxBufferSize; 1691 const char *maxName; 1692 UConverter * cnv, *cnv2; 1693 UErrorCode err; 1694 1695 char *pCharBuffer; 1696 const char *pConstCharBuffer; 1697 const char *charBufferLimit = charBuffer + sizeof(charBuffer)/sizeof(*charBuffer); 1698 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */ 1699 UChar uniCharBuffer[20]; 1700 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 }; 1701 const char *pCharSource = charSourceBuffer; 1702 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer); 1703 UChar *pUCharTarget = uniCharBuffer; 1704 UChar *pUCharTargetLimit = uniCharBuffer + sizeof(uniCharBuffer)/sizeof(*uniCharBuffer); 1705 const UChar * pUniBuffer; 1706 const UChar *uniBufferLimit = uniBuffer + sizeof(uniBuffer)/sizeof(*uniBuffer); 1707 int32_t idx, j; 1708 1709 err = U_ZERO_ERROR; 1710 cnv = ucnv_open(names[0], &err); 1711 if(U_SUCCESS(err)) { 1712 /* Check the various error & informational states: */ 1713 1714 /* Null status - just returns NULL */ 1715 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1716 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, 0)) 1717 { 1718 log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); 1719 } 1720 /* error status - should return 0 & keep error the same */ 1721 err = U_MEMORY_ALLOCATION_ERROR; 1722 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) 1723 { 1724 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); 1725 } 1726 err = U_ZERO_ERROR; 1727 1728 /* Null buffer size pointer - just returns NULL & set error to U_ILLEGAL_ARGUMENT_ERROR*/ 1729 if (0 != ucnv_safeClone(cnv, buffer[0], 0, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) 1730 { 1731 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); 1732 } 1733 err = U_ZERO_ERROR; 1734 1735 /* buffer size pointer is 0 - fill in pbufferSize with a size */ 1736 bufferSize = 0; 1737 if (0 != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) 1738 { 1739 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); 1740 } 1741 /* Verify our define is large enough */ 1742 if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize) 1743 { 1744 log_err("FAIL: Pre-calculated buffer size is too small\n"); 1745 } 1746 /* Verify we can use this run-time calculated size */ 1747 if (0 == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) 1748 { 1749 log_err("FAIL: Converter can't be cloned with run-time size\n"); 1750 } 1751 if (cnv2) { 1752 ucnv_close(cnv2); 1753 } 1754 1755 /* size one byte too small - should allocate & let us know */ 1756 --bufferSize; 1757 if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1758 { 1759 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); 1760 } 1761 if (cnv2) { 1762 ucnv_close(cnv2); 1763 } 1764 1765 err = U_ZERO_ERROR; 1766 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1767 1768 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ 1769 if (0 == (cnv2 = ucnv_safeClone(cnv, 0, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1770 { 1771 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); 1772 } 1773 if (cnv2) { 1774 ucnv_close(cnv2); 1775 } 1776 1777 err = U_ZERO_ERROR; 1778 1779 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ 1780 if (0 != ucnv_safeClone(0, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) 1781 { 1782 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); 1783 } 1784 1785 ucnv_close(cnv); 1786 } 1787 1788 maxBufferSize = 0; 1789 maxName = ""; 1790 1791 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/ 1792 1793 for(j = 0; j < LENGTHOF(bufferSizes); ++j) { 1794 for (idx = 0; idx < LENGTHOF(names); idx++) 1795 { 1796 err = U_ZERO_ERROR; 1797 cnv = ucnv_open(names[idx], &err); 1798 if(U_FAILURE(err)) { 1799 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err)); 1800 continue; 1801 } 1802 1803 if(j == 0) { 1804 /* preflight to get maxBufferSize */ 1805 actualSizes[idx] = 0; 1806 ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err); 1807 if(actualSizes[idx] > maxBufferSize) { 1808 maxBufferSize = actualSizes[idx]; 1809 maxName = names[idx]; 1810 } 1811 } 1812 1813 memset(buffer, 0xaa, sizeof(buffer)); 1814 1815 bufferSize = bufferSizes[j]; 1816 cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err); 1817 1818 /* close the original immediately to make sure that the clone works by itself */ 1819 ucnv_close(cnv); 1820 1821 if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) && 1822 err == U_SAFECLONE_ALLOCATED_WARNING 1823 ) { 1824 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]); 1825 } 1826 1827 /* check if the clone function overwrote any bytes that it is not supposed to touch */ 1828 if(bufferSize <= bufferSizes[j]) { 1829 /* used the stack buffer */ 1830 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) || 1831 containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa) 1832 ) { 1833 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n", 1834 names[idx], bufferSize, bufferSizes[j]); 1835 } 1836 } else { 1837 /* heap-allocated the clone */ 1838 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) { 1839 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n", 1840 names[idx], bufferSize, bufferSizes[j]); 1841 } 1842 } 1843 1844 pCharBuffer = charBuffer; 1845 pUniBuffer = uniBuffer; 1846 1847 ucnv_fromUnicode(cnv2, 1848 &pCharBuffer, 1849 charBufferLimit, 1850 &pUniBuffer, 1851 uniBufferLimit, 1852 NULL, 1853 TRUE, 1854 &err); 1855 if(U_FAILURE(err)){ 1856 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err)); 1857 } 1858 ucnv_toUnicode(cnv2, 1859 &pUCharTarget, 1860 pUCharTargetLimit, 1861 &pCharSource, 1862 pCharSourceLimit, 1863 NULL, 1864 TRUE, 1865 &err 1866 ); 1867 1868 if(U_FAILURE(err)){ 1869 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err)); 1870 } 1871 1872 pConstCharBuffer = charBuffer; 1873 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err)) 1874 { 1875 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err)); 1876 } 1877 ucnv_close(cnv2); 1878 } 1879 } 1880 1881 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1882 sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1883 if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) { 1884 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1885 maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1886 } 1887 } 1888 1889 static void TestCCSID() { 1890 #if !UCONFIG_NO_LEGACY_CONVERSION 1891 UConverter *cnv; 1892 UErrorCode errorCode; 1893 int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 }; 1894 int32_t i, ccsid; 1895 1896 for(i=0; i<(int32_t)(sizeof(ccsids)/sizeof(int32_t)); ++i) { 1897 ccsid=ccsids[i]; 1898 1899 errorCode=U_ZERO_ERROR; 1900 cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode); 1901 if(U_FAILURE(errorCode)) { 1902 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode)); 1903 continue; 1904 } 1905 1906 if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) { 1907 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode)); 1908 } 1909 1910 /* skip gb18030(ccsid 1392) */ 1911 if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) { 1912 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode)); 1913 } 1914 1915 ucnv_close(cnv); 1916 } 1917 #endif 1918 } 1919 1920 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */ 1921 1922 /* CHUNK_SIZE defined in common\ucnv.c: */ 1923 #define CHUNK_SIZE 1024 1924 1925 static void bug1(void); 1926 static void bug2(void); 1927 static void bug3(void); 1928 1929 static void 1930 TestJ932(void) 1931 { 1932 bug1(); /* Unicode intermediate buffer straddle bug */ 1933 bug2(); /* pre-flighting size incorrect caused by simple overflow */ 1934 bug3(); /* pre-flighting size incorrect caused by expansion overflow */ 1935 } 1936 1937 /* 1938 * jitterbug 932: test chunking boundary conditions in 1939 1940 int32_t ucnv_convert(const char *toConverterName, 1941 const char *fromConverterName, 1942 char *target, 1943 int32_t targetSize, 1944 const char *source, 1945 int32_t sourceSize, 1946 UErrorCode * err) 1947 1948 * See discussions on the icu mailing list in 1949 * 2001-April with the subject "converter 'flush' question". 1950 * 1951 * Bug report and test code provided by Edward J. Batutis. 1952 */ 1953 static void bug1() 1954 { 1955 #if !UCONFIG_NO_LEGACY_CONVERSION 1956 char char_in[CHUNK_SIZE+32]; 1957 char char_out[CHUNK_SIZE*2]; 1958 1959 /* GB 18030 equivalent of U+10000 is 90308130 */ 1960 static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 }; 1961 1962 UErrorCode err = U_ZERO_ERROR; 1963 int32_t i, test_seq_len = sizeof(test_seq); 1964 1965 /* 1966 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward 1967 * until the straddle bug appears. I didn't want to hard-code everything so this test could 1968 * be expanded - however this is the only type of straddle bug I can think of at the moment - 1969 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no 1970 * other Unicode sequences cause a bug since combining sequences are not supported by the 1971 * converters. 1972 */ 1973 1974 for (i = test_seq_len; i >= 0; i--) { 1975 /* put character sequence into input buffer */ 1976 memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */ 1977 memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len); 1978 1979 /* do the conversion */ 1980 ucnv_convert("us-ascii", /* out */ 1981 "gb18030", /* in */ 1982 char_out, 1983 sizeof(char_out), 1984 char_in, 1985 sizeof(char_in), 1986 &err); 1987 1988 /* bug1: */ 1989 if (err == U_TRUNCATED_CHAR_FOUND) { 1990 /* this happens when surrogate pair straddles the intermediate buffer in 1991 * T_UConverter_fromCodepageToCodepage */ 1992 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n"); 1993 } 1994 } 1995 #endif 1996 } 1997 1998 /* bug2: pre-flighting loop bug: simple overflow causes bug */ 1999 static void bug2() 2000 { 2001 /* US-ASCII "1234567890" */ 2002 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 }; 2003 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 }; 2004 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30, 2005 0x00, 0x00, 0x00, 0x31, 2006 0x00, 0x00, 0x00, 0x32, 2007 0x00, 0x00, 0x00, 0x33, 2008 0x00, 0x00, 0x00, 0x34, 2009 0x00, 0x00, 0x00, 0x35, 2010 0x00, 0x00, 0x00, 0x36, 2011 0x00, 0x00, 0x00, 0x37, 2012 0x00, 0x00, 0x00, 0x38, 2013 0x00, 0x00, (char)0xf0, 0x00}; 2014 static char target[5]; 2015 2016 UErrorCode err = U_ZERO_ERROR; 2017 int32_t size; 2018 2019 /* do the conversion */ 2020 size = ucnv_convert("iso-8859-1", /* out */ 2021 "us-ascii", /* in */ 2022 target, 2023 sizeof(target), 2024 source, 2025 sizeof(source), 2026 &err); 2027 2028 if ( size != 10 ) { 2029 /* bug2: size is 5, should be 10 */ 2030 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size); 2031 } 2032 2033 err = U_ZERO_ERROR; 2034 /* do the conversion */ 2035 size = ucnv_convert("UTF-32BE", /* out */ 2036 "UTF-8", /* in */ 2037 target, 2038 sizeof(target), 2039 sourceUTF8, 2040 sizeof(sourceUTF8), 2041 &err); 2042 2043 if ( size != 32 ) { 2044 /* bug2: size is 5, should be 32 */ 2045 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size); 2046 } 2047 2048 err = U_ZERO_ERROR; 2049 /* do the conversion */ 2050 size = ucnv_convert("UTF-8", /* out */ 2051 "UTF-32BE", /* in */ 2052 target, 2053 sizeof(target), 2054 sourceUTF32, 2055 sizeof(sourceUTF32), 2056 &err); 2057 2058 if ( size != 12 ) { 2059 /* bug2: size is 5, should be 12 */ 2060 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size); 2061 } 2062 } 2063 2064 /* 2065 * bug3: when the characters expand going from source to target codepage 2066 * you get bug3 in addition to bug2 2067 */ 2068 static void bug3() 2069 { 2070 #if !UCONFIG_NO_LEGACY_CONVERSION 2071 char char_in[CHUNK_SIZE*4]; 2072 char target[5]; 2073 UErrorCode err = U_ZERO_ERROR; 2074 int32_t size; 2075 2076 /* 2077 * first get the buggy size from bug2 then 2078 * compare it to buggy size with an expansion 2079 */ 2080 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */ 2081 2082 /* do the conversion */ 2083 size = ucnv_convert("lmbcs", /* out */ 2084 "us-ascii", /* in */ 2085 target, 2086 sizeof(target), 2087 char_in, 2088 sizeof(char_in), 2089 &err); 2090 2091 if ( size != sizeof(char_in) ) { 2092 /* 2093 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer 2094 * in the converter?), should be CHUNK_SIZE*4 2095 * 2096 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize... 2097 */ 2098 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size); 2099 } 2100 2101 /* 2102 * now do the conversion with expansion 2103 * ascii 0x08 expands to 0x0F 0x28 in lmbcs 2104 */ 2105 memset(char_in, 8, sizeof(char_in)); 2106 err = U_ZERO_ERROR; 2107 2108 /* do the conversion */ 2109 size = ucnv_convert("lmbcs", /* out */ 2110 "us-ascii", /* in */ 2111 target, 2112 sizeof(target), 2113 char_in, 2114 sizeof(char_in), 2115 &err); 2116 2117 /* expect 2X expansion */ 2118 if ( size != sizeof(char_in) * 2 ) { 2119 /* 2120 * bug3: 2121 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05: 2122 */ 2123 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size); 2124 } 2125 #endif 2126 } 2127 2128 static void 2129 convertExStreaming(UConverter *srcCnv, UConverter *targetCnv, 2130 const char *src, int32_t srcLength, 2131 const char *expectTarget, int32_t expectTargetLength, 2132 int32_t chunkSize, 2133 const char *testName, 2134 UErrorCode expectCode) { 2135 UChar pivotBuffer[CHUNK_SIZE]; 2136 UChar *pivotSource, *pivotTarget; 2137 const UChar *pivotLimit; 2138 2139 char targetBuffer[CHUNK_SIZE]; 2140 char *target; 2141 const char *srcLimit, *finalSrcLimit, *targetLimit; 2142 2143 int32_t targetLength; 2144 2145 UBool flush; 2146 2147 UErrorCode errorCode; 2148 2149 /* setup */ 2150 if(chunkSize>CHUNK_SIZE) { 2151 chunkSize=CHUNK_SIZE; 2152 } 2153 2154 pivotSource=pivotTarget=pivotBuffer; 2155 pivotLimit=pivotBuffer+chunkSize; 2156 2157 finalSrcLimit=src+srcLength; 2158 target=targetBuffer; 2159 targetLimit=targetBuffer+chunkSize; 2160 2161 ucnv_resetToUnicode(srcCnv); 2162 ucnv_resetFromUnicode(targetCnv); 2163 2164 errorCode=U_ZERO_ERROR; 2165 flush=FALSE; 2166 2167 /* convert, streaming-style (both converters and pivot keep state) */ 2168 for(;;) { 2169 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */ 2170 if(src+chunkSize<=finalSrcLimit) { 2171 srcLimit=src+chunkSize; 2172 } else { 2173 srcLimit=finalSrcLimit; 2174 } 2175 ucnv_convertEx(targetCnv, srcCnv, 2176 &target, targetLimit, 2177 &src, srcLimit, 2178 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit, 2179 FALSE, flush, &errorCode); 2180 targetLength=(int32_t)(target-targetBuffer); 2181 if(target>targetLimit) { 2182 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n", 2183 testName, chunkSize, target, targetLimit); 2184 break; /* TODO: major problem! */ 2185 } 2186 if(errorCode==U_BUFFER_OVERFLOW_ERROR) { 2187 /* continue converting another chunk */ 2188 errorCode=U_ZERO_ERROR; 2189 if(targetLength+chunkSize<=sizeof(targetBuffer)) { 2190 targetLimit=target+chunkSize; 2191 } else { 2192 targetLimit=targetBuffer+sizeof(targetBuffer); 2193 } 2194 } else if(U_FAILURE(errorCode)) { 2195 /* failure */ 2196 break; 2197 } else if(flush) { 2198 /* all done */ 2199 break; 2200 } else if(src==finalSrcLimit && pivotSource==pivotTarget) { 2201 /* all consumed, now flush without input (separate from conversion for testing) */ 2202 flush=TRUE; 2203 } 2204 } 2205 2206 if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) { 2207 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n", 2208 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode)); 2209 } else if(targetLength!=expectTargetLength) { 2210 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n", 2211 testName, chunkSize, targetLength, expectTargetLength); 2212 } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) { 2213 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n", 2214 testName, chunkSize); 2215 } 2216 } 2217 2218 static void 2219 convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv, 2220 const char *src, int32_t srcLength, 2221 const char *expectTarget, int32_t expectTargetLength, 2222 const char *testName, 2223 UErrorCode expectCode) { 2224 convertExStreaming(srcCnv, targetCnv, 2225 src, srcLength, 2226 expectTarget, expectTargetLength, 2227 1, testName, expectCode); 2228 convertExStreaming(srcCnv, targetCnv, 2229 src, srcLength, 2230 expectTarget, expectTargetLength, 2231 3, testName, expectCode); 2232 convertExStreaming(srcCnv, targetCnv, 2233 src, srcLength, 2234 expectTarget, expectTargetLength, 2235 7, testName, expectCode); 2236 } 2237 2238 static void TestConvertEx() { 2239 #if !UCONFIG_NO_LEGACY_CONVERSION 2240 static const uint8_t 2241 utf8[]={ 2242 /* 4e00 30a1 ff61 0410 */ 2243 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2244 }, 2245 shiftJIS[]={ 2246 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2247 }, 2248 errorTarget[]={ 2249 /* 2250 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2251 * SUB, SUB, 0x40, SUB, SUB, 0x40 2252 */ 2253 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40 2254 }; 2255 2256 char srcBuffer[100], targetBuffer[100]; 2257 2258 const char *src; 2259 char *target; 2260 2261 UChar pivotBuffer[100]; 2262 UChar *pivotSource, *pivotTarget; 2263 2264 UConverter *cnv1, *cnv2; 2265 UErrorCode errorCode; 2266 2267 errorCode=U_ZERO_ERROR; 2268 cnv1=ucnv_open("UTF-8", &errorCode); 2269 if(U_FAILURE(errorCode)) { 2270 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode)); 2271 return; 2272 } 2273 2274 cnv2=ucnv_open("Shift-JIS", &errorCode); 2275 if(U_FAILURE(errorCode)) { 2276 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2277 ucnv_close(cnv1); 2278 return; 2279 } 2280 2281 /* test ucnv_convertEx() with streaming conversion style */ 2282 convertExMultiStreaming(cnv1, cnv2, 2283 (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS), 2284 "UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2285 2286 convertExMultiStreaming(cnv2, cnv1, 2287 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), 2288 "Shift-JIS -> UTF-8", U_ZERO_ERROR); 2289 2290 /* U_ZERO_ERROR because by default the SUB callbacks are set */ 2291 convertExMultiStreaming(cnv1, cnv2, 2292 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget), 2293 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2294 2295 /* test some simple conversions */ 2296 2297 /* NUL-terminated source and target */ 2298 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2299 memcpy(srcBuffer, utf8, sizeof(utf8)); 2300 srcBuffer[sizeof(utf8)]=0; 2301 src=srcBuffer; 2302 target=targetBuffer; 2303 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2304 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2305 if( errorCode!=U_ZERO_ERROR || 2306 target-targetBuffer!=sizeof(shiftJIS) || 2307 *target!=0 || 2308 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2309 ) { 2310 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n", 2311 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2312 } 2313 2314 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */ 2315 errorCode=U_AMBIGUOUS_ALIAS_WARNING; 2316 memset(targetBuffer, 0xff, sizeof(targetBuffer)); 2317 src=srcBuffer; 2318 target=targetBuffer; 2319 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL, 2320 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2321 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2322 target-targetBuffer!=sizeof(shiftJIS) || 2323 *target!=(char)0xff || 2324 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2325 ) { 2326 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n", 2327 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2328 } 2329 2330 /* bad arguments */ 2331 errorCode=U_MESSAGE_PARSE_ERROR; 2332 src=srcBuffer; 2333 target=targetBuffer; 2334 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2335 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2336 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2337 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2338 } 2339 2340 /* pivotLimit==pivotStart */ 2341 errorCode=U_ZERO_ERROR; 2342 pivotSource=pivotTarget=pivotBuffer; 2343 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2344 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, TRUE, TRUE, &errorCode); 2345 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2346 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode)); 2347 } 2348 2349 /* *pivotSource==NULL */ 2350 errorCode=U_ZERO_ERROR; 2351 pivotSource=NULL; 2352 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2353 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2354 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2355 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode)); 2356 } 2357 2358 /* *source==NULL */ 2359 errorCode=U_ZERO_ERROR; 2360 src=NULL; 2361 pivotSource=pivotBuffer; 2362 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2363 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2364 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2365 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode)); 2366 } 2367 2368 /* streaming conversion without a pivot buffer */ 2369 errorCode=U_ZERO_ERROR; 2370 src=srcBuffer; 2371 pivotSource=pivotBuffer; 2372 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2373 NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode); 2374 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2375 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode)); 2376 } 2377 2378 ucnv_close(cnv1); 2379 ucnv_close(cnv2); 2380 #endif 2381 } 2382 2383 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */ 2384 static const char *const badUTF8[]={ 2385 /* trail byte */ 2386 "\x80", 2387 2388 /* truncated multi-byte sequences */ 2389 "\xd0", 2390 "\xe0", 2391 "\xe1", 2392 "\xed", 2393 "\xee", 2394 "\xf0", 2395 "\xf1", 2396 "\xf4", 2397 "\xf8", 2398 "\xfc", 2399 2400 "\xe0\x80", 2401 "\xe0\xa0", 2402 "\xe1\x80", 2403 "\xed\x80", 2404 "\xed\xa0", 2405 "\xee\x80", 2406 "\xf0\x80", 2407 "\xf0\x90", 2408 "\xf1\x80", 2409 "\xf4\x80", 2410 "\xf4\x90", 2411 "\xf8\x80", 2412 "\xfc\x80", 2413 2414 "\xf0\x80\x80", 2415 "\xf0\x90\x80", 2416 "\xf1\x80\x80", 2417 "\xf4\x80\x80", 2418 "\xf4\x90\x80", 2419 "\xf8\x80\x80", 2420 "\xfc\x80\x80", 2421 2422 "\xf8\x80\x80\x80", 2423 "\xfc\x80\x80\x80", 2424 2425 "\xfc\x80\x80\x80\x80", 2426 2427 /* complete sequences but non-shortest forms or out of range etc. */ 2428 "\xc0\x80", 2429 "\xe0\x80\x80", 2430 "\xed\xa0\x80", 2431 "\xf0\x80\x80\x80", 2432 "\xf4\x90\x80\x80", 2433 "\xf8\x80\x80\x80\x80", 2434 "\xfc\x80\x80\x80\x80\x80", 2435 "\xfe", 2436 "\xff" 2437 }; 2438 2439 #define ARG_CHAR_ARR_SIZE 8 2440 2441 /* get some character that can be converted and convert it */ 2442 static UBool getTestChar(UConverter *cnv, const char *converterName, 2443 char charUTF8[4], int32_t *pCharUTF8Length, 2444 char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length, 2445 char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) { 2446 UChar utf16[U16_MAX_LENGTH]; 2447 int32_t utf16Length; 2448 2449 const UChar *utf16Source; 2450 char *target; 2451 2452 USet *set; 2453 UChar32 c; 2454 UErrorCode errorCode; 2455 2456 errorCode=U_ZERO_ERROR; 2457 set=uset_open(1, 0); 2458 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2459 c=uset_charAt(set, uset_size(set)/2); 2460 uset_close(set); 2461 2462 utf16Length=0; 2463 U16_APPEND_UNSAFE(utf16, utf16Length, c); 2464 *pCharUTF8Length=0; 2465 U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c); 2466 2467 utf16Source=utf16; 2468 target=char0; 2469 ucnv_fromUnicode(cnv, 2470 &target, char0+ARG_CHAR_ARR_SIZE, 2471 &utf16Source, utf16+utf16Length, 2472 NULL, FALSE, &errorCode); 2473 *pChar0Length=(int32_t)(target-char0); 2474 2475 utf16Source=utf16; 2476 target=char1; 2477 ucnv_fromUnicode(cnv, 2478 &target, char1+ARG_CHAR_ARR_SIZE, 2479 &utf16Source, utf16+utf16Length, 2480 NULL, FALSE, &errorCode); 2481 *pChar1Length=(int32_t)(target-char1); 2482 2483 if(U_FAILURE(errorCode)) { 2484 log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode)); 2485 return FALSE; 2486 } 2487 return TRUE; 2488 } 2489 2490 static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2491 char charUTF8[4], int32_t charUTF8Length, 2492 char char0[8], int32_t char0Length, 2493 char char1[8], int32_t char1Length) { 2494 char utf8[16]; 2495 int32_t utf8Length; 2496 2497 char output[16]; 2498 int32_t outputLength; 2499 2500 char invalidChars[8]; 2501 int8_t invalidLength; 2502 2503 const char *source; 2504 char *target; 2505 2506 UChar pivotBuffer[8]; 2507 UChar *pivotSource, *pivotTarget; 2508 2509 UErrorCode errorCode; 2510 int32_t i; 2511 2512 /* test truncated sequences */ 2513 errorCode=U_ZERO_ERROR; 2514 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2515 2516 memcpy(utf8, charUTF8, charUTF8Length); 2517 2518 for(i=0; i<LENGTHOF(badUTF8); ++i) { 2519 /* truncated sequence? */ 2520 int32_t length=strlen(badUTF8[i]); 2521 if(length>=(1+U8_COUNT_TRAIL_BYTES(badUTF8[i][0]))) { 2522 continue; 2523 } 2524 2525 /* assemble a string with the test character and the truncated sequence */ 2526 memcpy(utf8+charUTF8Length, badUTF8[i], length); 2527 utf8Length=charUTF8Length+length; 2528 2529 /* convert and check the invalidChars */ 2530 source=utf8; 2531 target=output; 2532 pivotSource=pivotTarget=pivotBuffer; 2533 errorCode=U_ZERO_ERROR; 2534 ucnv_convertEx(cnv, utf8Cnv, 2535 &target, output+sizeof(output), 2536 &source, utf8+utf8Length, 2537 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer), 2538 TRUE, TRUE, /* reset & flush */ 2539 &errorCode); 2540 outputLength=(int32_t)(target-output); 2541 if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) { 2542 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i); 2543 continue; 2544 } 2545 2546 errorCode=U_ZERO_ERROR; 2547 invalidLength=(int8_t)sizeof(invalidChars); 2548 ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode); 2549 if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) { 2550 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i); 2551 } 2552 } 2553 } 2554 2555 static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2556 char charUTF8[4], int32_t charUTF8Length, 2557 char char0[8], int32_t char0Length, 2558 char char1[8], int32_t char1Length) { 2559 char utf8[600], expect[600]; 2560 int32_t utf8Length, expectLength; 2561 2562 char testName[32]; 2563 2564 UErrorCode errorCode; 2565 int32_t i; 2566 2567 errorCode=U_ZERO_ERROR; 2568 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode); 2569 2570 /* 2571 * assemble an input string with the test character between each 2572 * bad sequence, 2573 * and an expected string with repeated test character output 2574 */ 2575 memcpy(utf8, charUTF8, charUTF8Length); 2576 utf8Length=charUTF8Length; 2577 2578 memcpy(expect, char0, char0Length); 2579 expectLength=char0Length; 2580 2581 for(i=0; i<LENGTHOF(badUTF8); ++i) { 2582 int32_t length=strlen(badUTF8[i]); 2583 memcpy(utf8+utf8Length, badUTF8[i], length); 2584 utf8Length+=length; 2585 2586 memcpy(utf8+utf8Length, charUTF8, charUTF8Length); 2587 utf8Length+=charUTF8Length; 2588 2589 memcpy(expect+expectLength, char1, char1Length); 2590 expectLength+=char1Length; 2591 } 2592 2593 /* expect that each bad UTF-8 sequence is detected and skipped */ 2594 strcpy(testName, "from bad UTF-8 to "); 2595 strcat(testName, converterName); 2596 2597 convertExMultiStreaming(utf8Cnv, cnv, 2598 utf8, utf8Length, 2599 expect, expectLength, 2600 testName, 2601 U_ZERO_ERROR); 2602 } 2603 2604 /* Test illegal UTF-8 input. */ 2605 static void TestConvertExFromUTF8() { 2606 static const char *const converterNames[]={ 2607 #if !UCONFIG_NO_LEGACY_CONVERSION 2608 "windows-1252", 2609 "shift-jis", 2610 #endif 2611 "us-ascii", 2612 "iso-8859-1", 2613 "utf-8" 2614 }; 2615 2616 UConverter *utf8Cnv, *cnv; 2617 UErrorCode errorCode; 2618 int32_t i; 2619 2620 /* fromUnicode versions of some character, from initial state and later */ 2621 char charUTF8[4], char0[8], char1[8]; 2622 int32_t charUTF8Length, char0Length, char1Length; 2623 2624 errorCode=U_ZERO_ERROR; 2625 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2626 if(U_FAILURE(errorCode)) { 2627 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2628 return; 2629 } 2630 2631 for(i=0; i<LENGTHOF(converterNames); ++i) { 2632 errorCode=U_ZERO_ERROR; 2633 cnv=ucnv_open(converterNames[i], &errorCode); 2634 if(U_FAILURE(errorCode)) { 2635 log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode)); 2636 continue; 2637 } 2638 if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) { 2639 continue; 2640 } 2641 testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2642 testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2643 ucnv_close(cnv); 2644 } 2645 ucnv_close(utf8Cnv); 2646 } 2647 2648 static void TestConvertExFromUTF8_C5F0() { 2649 static const char *const converterNames[]={ 2650 #if !UCONFIG_NO_LEGACY_CONVERSION 2651 "windows-1251", 2652 "shift-jis", 2653 #endif 2654 "us-ascii", 2655 "iso-8859-1", 2656 "utf-8" 2657 }; 2658 2659 UConverter *utf8Cnv, *cnv; 2660 UErrorCode errorCode; 2661 int32_t i; 2662 2663 static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 }; 2664 /* Expect "��" (2x U+FFFD as decimal NCRs) */ 2665 static const char twoNCRs[16]={ 2666 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B, 2667 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B 2668 }; 2669 static const char twoFFFD[6]={ 2670 (char)0xef, (char)0xbf, (char)0xbd, 2671 (char)0xef, (char)0xbf, (char)0xbd 2672 }; 2673 const char *expected; 2674 int32_t expectedLength; 2675 char dest[20]; /* longer than longest expectedLength */ 2676 2677 const char *src; 2678 char *target; 2679 2680 UChar pivotBuffer[128]; 2681 UChar *pivotSource, *pivotTarget; 2682 2683 errorCode=U_ZERO_ERROR; 2684 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2685 if(U_FAILURE(errorCode)) { 2686 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2687 return; 2688 } 2689 2690 for(i=0; i<LENGTHOF(converterNames); ++i) { 2691 errorCode=U_ZERO_ERROR; 2692 cnv=ucnv_open(converterNames[i], &errorCode); 2693 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 2694 NULL, NULL, &errorCode); 2695 if(U_FAILURE(errorCode)) { 2696 log_data_err("unable to open %s converter - %s\n", 2697 converterNames[i], u_errorName(errorCode)); 2698 continue; 2699 } 2700 src=bad_utf8; 2701 target=dest; 2702 uprv_memset(dest, 9, sizeof(dest)); 2703 if(i==LENGTHOF(converterNames)-1) { 2704 /* conversion to UTF-8 yields two U+FFFD directly */ 2705 expected=twoFFFD; 2706 expectedLength=6; 2707 } else { 2708 /* conversion to a non-Unicode charset yields two NCRs */ 2709 expected=twoNCRs; 2710 expectedLength=16; 2711 } 2712 pivotBuffer[0]=0; 2713 pivotBuffer[1]=1; 2714 pivotBuffer[2]=2; 2715 pivotSource=pivotTarget=pivotBuffer; 2716 ucnv_convertEx( 2717 cnv, utf8Cnv, 2718 &target, dest+expectedLength, 2719 &src, bad_utf8+sizeof(bad_utf8), 2720 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+LENGTHOF(pivotBuffer), 2721 TRUE, TRUE, &errorCode); 2722 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 || 2723 target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) || 2724 dest[expectedLength]!=9 2725 ) { 2726 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]); 2727 } 2728 ucnv_close(cnv); 2729 } 2730 ucnv_close(utf8Cnv); 2731 } 2732 2733 static void 2734 TestConvertAlgorithmic() { 2735 #if !UCONFIG_NO_LEGACY_CONVERSION 2736 static const uint8_t 2737 utf8[]={ 2738 /* 4e00 30a1 ff61 0410 */ 2739 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2740 }, 2741 shiftJIS[]={ 2742 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2743 }, 2744 /*errorTarget[]={*/ 2745 /* 2746 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2747 * SUB, SUB, 0x40, SUB, SUB, 0x40 2748 */ 2749 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/ 2750 /*},*/ 2751 utf16[]={ 2752 0xfe, 0xff /* BOM only, no text */ 2753 }, 2754 utf32[]={ 2755 0xff, 0xfe, 0, 0 /* BOM only, no text */ 2756 }; 2757 2758 char target[100], utf8NUL[100], shiftJISNUL[100]; 2759 2760 UConverter *cnv; 2761 UErrorCode errorCode; 2762 2763 int32_t length; 2764 2765 errorCode=U_ZERO_ERROR; 2766 cnv=ucnv_open("Shift-JIS", &errorCode); 2767 if(U_FAILURE(errorCode)) { 2768 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2769 ucnv_close(cnv); 2770 return; 2771 } 2772 2773 memcpy(utf8NUL, utf8, sizeof(utf8)); 2774 utf8NUL[sizeof(utf8)]=0; 2775 memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS)); 2776 shiftJISNUL[sizeof(shiftJIS)]=0; 2777 2778 /* 2779 * The to/from algorithmic convenience functions share a common implementation, 2780 * so we need not test all permutations of them. 2781 */ 2782 2783 /* length in, not terminated out */ 2784 errorCode=U_ZERO_ERROR; 2785 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode); 2786 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2787 length!=sizeof(shiftJIS) || 2788 memcmp(target, shiftJIS, length)!=0 2789 ) { 2790 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n", 2791 u_errorName(errorCode), length, sizeof(shiftJIS)); 2792 } 2793 2794 /* terminated in and out */ 2795 memset(target, 0x55, sizeof(target)); 2796 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2797 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode); 2798 if( errorCode!=U_ZERO_ERROR || 2799 length!=sizeof(utf8) || 2800 memcmp(target, utf8, length)!=0 2801 ) { 2802 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n", 2803 u_errorName(errorCode), length, sizeof(shiftJIS)); 2804 } 2805 2806 /* empty string, some target buffer */ 2807 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2808 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode); 2809 if( errorCode!=U_ZERO_ERROR || 2810 length!=0 2811 ) { 2812 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n", 2813 u_errorName(errorCode), length); 2814 } 2815 2816 /* pseudo-empty string, no target buffer */ 2817 errorCode=U_ZERO_ERROR; 2818 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2819 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2820 length!=0 2821 ) { 2822 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2823 u_errorName(errorCode), length); 2824 } 2825 2826 errorCode=U_ZERO_ERROR; 2827 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode); 2828 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2829 length!=0 2830 ) { 2831 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2832 u_errorName(errorCode), length); 2833 } 2834 2835 /* bad arguments */ 2836 errorCode=U_MESSAGE_PARSE_ERROR; 2837 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2838 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2839 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2840 } 2841 2842 /* source==NULL */ 2843 errorCode=U_ZERO_ERROR; 2844 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode); 2845 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2846 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode)); 2847 } 2848 2849 /* illegal alg. type */ 2850 errorCode=U_ZERO_ERROR; 2851 length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode); 2852 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2853 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode)); 2854 } 2855 ucnv_close(cnv); 2856 #endif 2857 } 2858 2859 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 2860 static void TestLMBCSMaxChar(void) { 2861 static const struct { 2862 int8_t maxSize; 2863 const char *name; 2864 } converter[] = { 2865 /* some non-LMBCS converters - perfect test setup here */ 2866 { 1, "US-ASCII"}, 2867 { 1, "ISO-8859-1"}, 2868 2869 { 2, "UTF-16"}, 2870 { 2, "UTF-16BE"}, 2871 { 3, "UTF-8"}, 2872 { 3, "CESU-8"}, 2873 { 3, "SCSU"}, 2874 { 4, "UTF-32"}, 2875 { 4, "UTF-7"}, 2876 { 4, "IMAP-mailbox-name"}, 2877 { 4, "BOCU-1"}, 2878 2879 { 1, "windows-1256"}, 2880 { 2, "Shift-JIS"}, 2881 { 2, "ibm-16684"}, 2882 { 3, "ibm-930"}, 2883 { 3, "ibm-1390"}, 2884 { 4, "*test3"}, 2885 { 16,"*test4"}, 2886 2887 { 4, "ISCII"}, 2888 { 4, "HZ"}, 2889 2890 { 3, "ISO-2022"}, 2891 { 3, "ISO-2022-KR"}, 2892 { 6, "ISO-2022-JP"}, 2893 { 8, "ISO-2022-CN"}, 2894 2895 /* LMBCS */ 2896 { 3, "LMBCS-1"}, 2897 { 3, "LMBCS-2"}, 2898 { 3, "LMBCS-3"}, 2899 { 3, "LMBCS-4"}, 2900 { 3, "LMBCS-5"}, 2901 { 3, "LMBCS-6"}, 2902 { 3, "LMBCS-8"}, 2903 { 3, "LMBCS-11"}, 2904 { 3, "LMBCS-16"}, 2905 { 3, "LMBCS-17"}, 2906 { 3, "LMBCS-18"}, 2907 { 3, "LMBCS-19"} 2908 }; 2909 int32_t idx; 2910 2911 for (idx = 0; idx < LENGTHOF(converter); idx++) { 2912 UErrorCode status = U_ZERO_ERROR; 2913 UConverter *cnv = cnv_open(converter[idx].name, &status); 2914 if (U_FAILURE(status)) { 2915 continue; 2916 } 2917 if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) { 2918 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n", 2919 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv)); 2920 } 2921 ucnv_close(cnv); 2922 } 2923 2924 /* mostly test that the macro compiles */ 2925 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) { 2926 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n"); 2927 } 2928 } 2929 #endif 2930 2931 static void TestJ1968(void) { 2932 UErrorCode err = U_ZERO_ERROR; 2933 UConverter *cnv; 2934 char myConvName[] = "My really really really really really really really really really really really" 2935 " really really really really really really really really really really really" 2936 " really really really really really really really really long converter name"; 2937 UChar myConvNameU[sizeof(myConvName)]; 2938 2939 u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName)); 2940 2941 err = U_ZERO_ERROR; 2942 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0; 2943 cnv = ucnv_openU(myConvNameU, &err); 2944 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2945 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2946 } 2947 2948 err = U_ZERO_ERROR; 2949 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 2950 cnv = ucnv_openU(myConvNameU, &err); 2951 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2952 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2953 } 2954 2955 err = U_ZERO_ERROR; 2956 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 2957 cnv = ucnv_openU(myConvNameU, &err); 2958 if (cnv || err != U_FILE_ACCESS_ERROR) { 2959 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2960 } 2961 2962 2963 2964 2965 err = U_ZERO_ERROR; 2966 cnv = ucnv_open(myConvName, &err); 2967 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2968 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2969 } 2970 2971 err = U_ZERO_ERROR; 2972 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ','; 2973 cnv = ucnv_open(myConvName, &err); 2974 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2975 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2976 } 2977 2978 err = U_ZERO_ERROR; 2979 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 2980 cnv = ucnv_open(myConvName, &err); 2981 if (cnv || err != U_FILE_ACCESS_ERROR) { 2982 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2983 } 2984 2985 err = U_ZERO_ERROR; 2986 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 2987 strncpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7); 2988 cnv = ucnv_open(myConvName, &err); 2989 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2990 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2991 } 2992 2993 /* The comma isn't really a part of the converter name. */ 2994 err = U_ZERO_ERROR; 2995 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 2996 cnv = ucnv_open(myConvName, &err); 2997 if (cnv || err != U_FILE_ACCESS_ERROR) { 2998 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2999 } 3000 3001 err = U_ZERO_ERROR; 3002 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' '; 3003 cnv = ucnv_open(myConvName, &err); 3004 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3005 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3006 } 3007 3008 err = U_ZERO_ERROR; 3009 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 3010 cnv = ucnv_open(myConvName, &err); 3011 if (cnv || err != U_FILE_ACCESS_ERROR) { 3012 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3013 } 3014 3015 } 3016 3017 #if !UCONFIG_NO_LEGACY_CONVERSION 3018 static void 3019 testSwap(const char *name, UBool swap) { 3020 /* 3021 * Test Unicode text. 3022 * Contains characters that are the highest for some of the 3023 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the 3024 * tables copies the entire tables. 3025 */ 3026 static const UChar text[]={ 3027 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a 3028 }; 3029 3030 UChar uNormal[32], uSwapped[32]; 3031 char normal[32], swapped[32]; 3032 const UChar *pcu; 3033 UChar *pu; 3034 char *pc; 3035 int32_t i, normalLength, swappedLength; 3036 UChar u; 3037 char c; 3038 3039 const char *swappedName; 3040 UConverter *cnv, *swapCnv; 3041 UErrorCode errorCode; 3042 3043 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */ 3044 3045 /* open both the normal and the LF/NL-swapping converters */ 3046 strcpy(swapped, name); 3047 strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING); 3048 3049 errorCode=U_ZERO_ERROR; 3050 swapCnv=ucnv_open(swapped, &errorCode); 3051 cnv=ucnv_open(name, &errorCode); 3052 if(U_FAILURE(errorCode)) { 3053 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode)); 3054 goto cleanup; 3055 } 3056 3057 /* the name must contain the swap option if and only if we expect the converter to swap */ 3058 swappedName=ucnv_getName(swapCnv, &errorCode); 3059 if(U_FAILURE(errorCode)) { 3060 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode)); 3061 goto cleanup; 3062 } 3063 3064 pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING); 3065 if(swap != (pc!=NULL)) { 3066 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap); 3067 goto cleanup; 3068 } 3069 3070 /* convert to EBCDIC */ 3071 pcu=text; 3072 pc=normal; 3073 ucnv_fromUnicode(cnv, &pc, normal+LENGTHOF(normal), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); 3074 normalLength=(int32_t)(pc-normal); 3075 3076 pcu=text; 3077 pc=swapped; 3078 ucnv_fromUnicode(swapCnv, &pc, swapped+LENGTHOF(swapped), &pcu, text+LENGTHOF(text), NULL, TRUE, &errorCode); 3079 swappedLength=(int32_t)(pc-swapped); 3080 3081 if(U_FAILURE(errorCode)) { 3082 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode)); 3083 goto cleanup; 3084 } 3085 3086 /* compare EBCDIC output */ 3087 if(normalLength!=swappedLength) { 3088 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3089 goto cleanup; 3090 } 3091 for(i=0; i<normalLength; ++i) { 3092 /* swap EBCDIC LF/NL for comparison */ 3093 c=normal[i]; 3094 if(swap) { 3095 if(c==0x15) { 3096 c=0x25; 3097 } else if(c==0x25) { 3098 c=0x15; 3099 } 3100 } 3101 3102 if(c!=swapped[i]) { 3103 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]); 3104 goto cleanup; 3105 } 3106 } 3107 3108 /* convert back to Unicode (may not roundtrip) */ 3109 pc=normal; 3110 pu=uNormal; 3111 ucnv_toUnicode(cnv, &pu, uNormal+LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); 3112 normalLength=(int32_t)(pu-uNormal); 3113 3114 pc=normal; 3115 pu=uSwapped; 3116 ucnv_toUnicode(swapCnv, &pu, uSwapped+LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); 3117 swappedLength=(int32_t)(pu-uSwapped); 3118 3119 if(U_FAILURE(errorCode)) { 3120 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode)); 3121 goto cleanup; 3122 } 3123 3124 /* compare EBCDIC output */ 3125 if(normalLength!=swappedLength) { 3126 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3127 goto cleanup; 3128 } 3129 for(i=0; i<normalLength; ++i) { 3130 /* swap EBCDIC LF/NL for comparison */ 3131 u=uNormal[i]; 3132 if(swap) { 3133 if(u==0xa) { 3134 u=0x85; 3135 } else if(u==0x85) { 3136 u=0xa; 3137 } 3138 } 3139 3140 if(u!=uSwapped[i]) { 3141 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]); 3142 goto cleanup; 3143 } 3144 } 3145 3146 /* clean up */ 3147 cleanup: 3148 ucnv_close(cnv); 3149 ucnv_close(swapCnv); 3150 } 3151 3152 static void 3153 TestEBCDICSwapLFNL() { 3154 static const struct { 3155 const char *name; 3156 UBool swap; 3157 } tests[]={ 3158 { "ibm-37", TRUE }, 3159 { "ibm-1047", TRUE }, 3160 { "ibm-1140", TRUE }, 3161 { "ibm-930", TRUE }, 3162 { "iso-8859-3", FALSE } 3163 }; 3164 3165 int i; 3166 3167 for(i=0; i<LENGTHOF(tests); ++i) { 3168 testSwap(tests[i].name, tests[i].swap); 3169 } 3170 } 3171 #else 3172 static void 3173 TestEBCDICSwapLFNL() { 3174 /* test nothing... */ 3175 } 3176 #endif 3177 3178 static const UVersionInfo ICU_34 = {3,4,0,0}; 3179 3180 static void TestFromUCountPending(){ 3181 #if !UCONFIG_NO_LEGACY_CONVERSION 3182 UErrorCode status = U_ZERO_ERROR; 3183 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */ 3184 static const struct { 3185 UChar input[6]; 3186 int32_t len; 3187 int32_t exp; 3188 }fromUnicodeTests[] = { 3189 /*m:n conversion*/ 3190 {{0xdbc4},1,1}, 3191 {{ 0xdbc4, 0xde34, 0xd84d},3,1}, 3192 {{ 0xdbc4, 0xde34, 0xd900},3,3}, 3193 }; 3194 int i; 3195 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3196 if(U_FAILURE(status)){ 3197 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3198 return; 3199 } 3200 for(i=0; i<LENGTHOF(fromUnicodeTests); ++i) { 3201 char tgt[10]; 3202 char* target = tgt; 3203 char* targetLimit = target + 10; 3204 const UChar* source = fromUnicodeTests[i].input; 3205 const UChar* sourceLimit = source + fromUnicodeTests[i].len; 3206 int32_t len = 0; 3207 ucnv_reset(cnv); 3208 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3209 len = ucnv_fromUCountPending(cnv, &status); 3210 if(U_FAILURE(status)){ 3211 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3212 status = U_ZERO_ERROR; 3213 continue; 3214 } 3215 if(len != fromUnicodeTests[i].exp){ 3216 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n"); 3217 } 3218 } 3219 status = U_ZERO_ERROR; 3220 { 3221 /* 3222 * The converter has to read the tail before it knows that 3223 * only head alone matches. 3224 * At the end, the output for head will overflow the target, 3225 * middle will be pending, and tail will not have been consumed. 3226 */ 3227 /* 3228 \U00101234 -> x (<U101234> \x07 |0) 3229 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0) 3230 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0) 3231 \U00060007 -> unassigned 3232 */ 3233 static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */ 3234 static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */ 3235 static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */ 3236 char tgt[10]; 3237 char* target = tgt; 3238 char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */ 3239 const UChar* source = head; 3240 const UChar* sourceLimit = source + u_strlen(head); 3241 int32_t len = 0; 3242 ucnv_reset(cnv); 3243 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3244 len = ucnv_fromUCountPending(cnv, &status); 3245 if(U_FAILURE(status)){ 3246 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3247 status = U_ZERO_ERROR; 3248 } 3249 if(len!=4){ 3250 log_err("ucnv_fromUInputHeld did not return correct length for head\n"); 3251 } 3252 source = middle; 3253 sourceLimit = source + u_strlen(middle); 3254 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3255 len = ucnv_fromUCountPending(cnv, &status); 3256 if(U_FAILURE(status)){ 3257 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3258 status = U_ZERO_ERROR; 3259 } 3260 if(len!=5){ 3261 log_err("ucnv_fromUInputHeld did not return correct length for middle\n"); 3262 } 3263 source = tail; 3264 sourceLimit = source + u_strlen(tail); 3265 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3266 if(status != U_BUFFER_OVERFLOW_ERROR){ 3267 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3268 } 3269 status = U_ZERO_ERROR; 3270 len = ucnv_fromUCountPending(cnv, &status); 3271 /* middle[1] is pending, tail has not been consumed */ 3272 if(U_FAILURE(status)){ 3273 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status)); 3274 } 3275 if(len!=1){ 3276 log_err("ucnv_fromUInputHeld did not return correct length for tail\n"); 3277 } 3278 } 3279 ucnv_close(cnv); 3280 #endif 3281 } 3282 3283 static void 3284 TestToUCountPending(){ 3285 #if !UCONFIG_NO_LEGACY_CONVERSION 3286 UErrorCode status = U_ZERO_ERROR; 3287 static const struct { 3288 char input[6]; 3289 int32_t len; 3290 int32_t exp; 3291 }toUnicodeTests[] = { 3292 /*m:n conversion*/ 3293 {{0x05, 0x01, 0x02},3,3}, 3294 {{0x01, 0x02},2,2}, 3295 {{0x07, 0x00, 0x01, 0x02},4,4}, 3296 }; 3297 3298 int i; 3299 UConverterToUCallback *oldToUAction= NULL; 3300 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3301 if(U_FAILURE(status)){ 3302 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3303 return; 3304 } 3305 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3306 for(i=0; i<LENGTHOF(toUnicodeTests); ++i) { 3307 UChar tgt[20]; 3308 UChar* target = tgt; 3309 UChar* targetLimit = target + 20; 3310 const char* source = toUnicodeTests[i].input; 3311 const char* sourceLimit = source + toUnicodeTests[i].len; 3312 int32_t len = 0; 3313 ucnv_reset(cnv); 3314 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3315 len = ucnv_toUCountPending(cnv,&status); 3316 if(U_FAILURE(status)){ 3317 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3318 status = U_ZERO_ERROR; 3319 continue; 3320 } 3321 if(len != toUnicodeTests[i].exp){ 3322 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n"); 3323 } 3324 } 3325 status = U_ZERO_ERROR; 3326 ucnv_close(cnv); 3327 3328 { 3329 /* 3330 * The converter has to read the tail before it knows that 3331 * only head alone matches. 3332 * At the end, the output for head will overflow the target, 3333 * mid will be pending, and tail will not have been consumed. 3334 */ 3335 char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00}; 3336 char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 }; 3337 char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 }; 3338 /* 3339 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0) 3340 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0) 3341 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3) 3342 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar") 3343 */ 3344 UChar tgt[10]; 3345 UChar* target = tgt; 3346 UChar* targetLimit = target + 1; /* expect overflow from converting */ 3347 const char* source = head; 3348 const char* sourceLimit = source + strlen(head); 3349 int32_t len = 0; 3350 cnv = ucnv_openPackage(loadTestData(&status), "test4", &status); 3351 if(U_FAILURE(status)){ 3352 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3353 return; 3354 } 3355 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3356 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3357 len = ucnv_toUCountPending(cnv,&status); 3358 if(U_FAILURE(status)){ 3359 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3360 } 3361 if(len != 4){ 3362 log_err("Did not get the expected len for head.\n"); 3363 } 3364 source=mid; 3365 sourceLimit = source+strlen(mid); 3366 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3367 len = ucnv_toUCountPending(cnv,&status); 3368 if(U_FAILURE(status)){ 3369 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3370 } 3371 if(len != 8){ 3372 log_err("Did not get the expected len for mid.\n"); 3373 } 3374 3375 source=tail; 3376 sourceLimit = source+strlen(tail); 3377 targetLimit = target; 3378 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3379 if(status != U_BUFFER_OVERFLOW_ERROR){ 3380 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3381 } 3382 status = U_ZERO_ERROR; 3383 len = ucnv_toUCountPending(cnv,&status); 3384 /* mid[4] is pending, tail has not been consumed */ 3385 if(U_FAILURE(status)){ 3386 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status)); 3387 } 3388 if(len != 4){ 3389 log_err("Did not get the expected len for tail.\n"); 3390 } 3391 ucnv_close(cnv); 3392 } 3393 #endif 3394 } 3395 3396 static void TestOneDefaultNameChange(const char *name, const char *expected) { 3397 UErrorCode status = U_ZERO_ERROR; 3398 UConverter *cnv; 3399 ucnv_setDefaultName(name); 3400 if(strcmp(ucnv_getDefaultName(), expected)==0) 3401 log_verbose("setDefaultName of %s works.\n", name); 3402 else 3403 log_err("setDefaultName of %s failed\n", name); 3404 cnv=ucnv_open(NULL, &status); 3405 if (U_FAILURE(status) || cnv == NULL) { 3406 log_err("opening the default converter of %s failed\n", name); 3407 return; 3408 } 3409 if(strcmp(ucnv_getName(cnv, &status), expected)==0) 3410 log_verbose("ucnv_getName of %s works.\n", name); 3411 else 3412 log_err("ucnv_getName of %s failed\n", name); 3413 ucnv_close(cnv); 3414 } 3415 3416 static void TestDefaultName(void) { 3417 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/ 3418 static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; 3419 strcpy(defaultName, ucnv_getDefaultName()); 3420 3421 log_verbose("getDefaultName returned %s\n", defaultName); 3422 3423 /*change the default name by setting it */ 3424 TestOneDefaultNameChange("UTF-8", "UTF-8"); 3425 #if U_CHARSET_IS_UTF8 3426 TestOneDefaultNameChange("ISCII,version=1", "UTF-8"); 3427 TestOneDefaultNameChange("ISCII,version=2", "UTF-8"); 3428 TestOneDefaultNameChange("ISO-8859-1", "UTF-8"); 3429 #else 3430 # if !UCONFIG_NO_LEGACY_CONVERSION 3431 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1"); 3432 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2"); 3433 # endif 3434 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1"); 3435 #endif 3436 3437 /*set the default name back*/ 3438 ucnv_setDefaultName(defaultName); 3439 } 3440 3441 /* Test that ucnv_compareNames() matches names according to spec. ----------- */ 3442 3443 static int 3444 sign(int n) { 3445 if(n==0) { 3446 return 0; 3447 } else if(n<0) { 3448 return -1; 3449 } else /* n>0 */ { 3450 return 1; 3451 } 3452 } 3453 3454 static void 3455 compareNames(const char **names) { 3456 const char *relation, *name1, *name2; 3457 int rel, result; 3458 3459 relation=*names++; 3460 if(*relation=='=') { 3461 rel = 0; 3462 } else if(*relation=='<') { 3463 rel = -1; 3464 } else { 3465 rel = 1; 3466 } 3467 3468 name1=*names++; 3469 if(name1==NULL) { 3470 return; 3471 } 3472 while((name2=*names++)!=NULL) { 3473 result=ucnv_compareNames(name1, name2); 3474 if(sign(result)!=rel) { 3475 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel); 3476 } 3477 name1=name2; 3478 } 3479 } 3480 3481 static void 3482 TestCompareNames() { 3483 static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL }; 3484 static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL }; 3485 static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL }; 3486 static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL }; 3487 3488 compareNames(equalUTF8); 3489 compareNames(equalIBM); 3490 compareNames(lessMac); 3491 compareNames(lessUTF080); 3492 } 3493 3494 static void 3495 TestSubstString() { 3496 static const UChar surrogate[1]={ 0xd900 }; 3497 char buffer[16]; 3498 3499 static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3500 static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3501 UConverter *cnv; 3502 UErrorCode errorCode; 3503 int32_t length; 3504 int8_t len8; 3505 3506 /* UTF-16/32: test that the BOM is output before the sub character */ 3507 errorCode=U_ZERO_ERROR; 3508 cnv=ucnv_open("UTF-16", &errorCode); 3509 if(U_FAILURE(errorCode)) { 3510 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode)); 3511 return; 3512 } 3513 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3514 ucnv_close(cnv); 3515 if(U_FAILURE(errorCode) || 3516 length!=4 || 3517 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3518 ) { 3519 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n"); 3520 } 3521 3522 errorCode=U_ZERO_ERROR; 3523 cnv=ucnv_open("UTF-32", &errorCode); 3524 if(U_FAILURE(errorCode)) { 3525 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode)); 3526 return; 3527 } 3528 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3529 ucnv_close(cnv); 3530 if(U_FAILURE(errorCode) || 3531 length!=8 || 3532 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3533 ) { 3534 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n"); 3535 } 3536 3537 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */ 3538 errorCode=U_ZERO_ERROR; 3539 cnv=ucnv_open("ISO-8859-1", &errorCode); 3540 if(U_FAILURE(errorCode)) { 3541 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode)); 3542 return; 3543 } 3544 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode); 3545 if(U_FAILURE(errorCode)) { 3546 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode)); 3547 } else { 3548 len8 = sizeof(buffer); 3549 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3550 /* Stateless converter, we expect the string converted to charset bytes. */ 3551 if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) { 3552 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode)); 3553 } 3554 } 3555 ucnv_close(cnv); 3556 3557 #if !UCONFIG_NO_LEGACY_CONVERSION 3558 errorCode=U_ZERO_ERROR; 3559 cnv=ucnv_open("HZ", &errorCode); 3560 if(U_FAILURE(errorCode)) { 3561 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode)); 3562 return; 3563 } 3564 ucnv_setSubstString(cnv, sub, LENGTHOF(sub), &errorCode); 3565 if(U_FAILURE(errorCode)) { 3566 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode)); 3567 } else { 3568 len8 = sizeof(buffer); 3569 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3570 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */ 3571 if(U_FAILURE(errorCode) || len8!=0) { 3572 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode)); 3573 } 3574 } 3575 ucnv_close(cnv); 3576 #endif 3577 /* 3578 * Further testing of ucnv_setSubstString() is done via intltest convert. 3579 * We do not test edge cases of illegal arguments and similar because the 3580 * function implementation uses all of its parameters in calls to other 3581 * functions with UErrorCode parameters. 3582 */ 3583 } 3584 3585 static void 3586 InvalidArguments() { 3587 UConverter *cnv; 3588 UErrorCode errorCode; 3589 char charBuffer[2] = {1, 1}; 3590 char ucharAsCharBuffer[2] = {2, 2}; 3591 char *charsPtr = charBuffer; 3592 UChar *ucharsPtr = (UChar *)ucharAsCharBuffer; 3593 UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1); 3594 3595 errorCode=U_ZERO_ERROR; 3596 cnv=ucnv_open("UTF-8", &errorCode); 3597 if(U_FAILURE(errorCode)) { 3598 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode)); 3599 return; 3600 } 3601 3602 errorCode=U_ZERO_ERROR; 3603 /* This one should fail because an incomplete UChar is being passed in */ 3604 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode); 3605 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3606 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3607 } 3608 3609 errorCode=U_ZERO_ERROR; 3610 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3611 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode); 3612 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3613 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3614 } 3615 3616 errorCode=U_ZERO_ERROR; 3617 /* This one should fail because an incomplete UChar is being passed in */ 3618 ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3619 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3620 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3621 } 3622 3623 errorCode=U_ZERO_ERROR; 3624 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3625 ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3626 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3627 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3628 } 3629 3630 if (charBuffer[0] != 1 || charBuffer[1] != 1 3631 || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2) 3632 { 3633 log_err("Data was incorrectly written to buffers\n"); 3634 } 3635 3636 ucnv_close(cnv); 3637 } 3638 3639 static void TestGetName() { 3640 static const char *const names[] = { 3641 "Unicode", "UTF-16", 3642 "UnicodeBigUnmarked", "UTF-16BE", 3643 "UnicodeBig", "UTF-16BE,version=1", 3644 "UnicodeLittleUnmarked", "UTF-16LE", 3645 "UnicodeLittle", "UTF-16LE,version=1", 3646 "x-UTF-16LE-BOM", "UTF-16LE,version=1" 3647 }; 3648 int32_t i; 3649 for(i = 0; i < LENGTHOF(names); i += 2) { 3650 UErrorCode errorCode = U_ZERO_ERROR; 3651 UConverter *cnv = ucnv_open(names[i], &errorCode); 3652 if(U_SUCCESS(errorCode)) { 3653 const char *name = ucnv_getName(cnv, &errorCode); 3654 if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) { 3655 log_err("ucnv_getName(%s) = %s != %s -- %s\n", 3656 names[i], name, names[i+1], u_errorName(errorCode)); 3657 } 3658 ucnv_close(cnv); 3659 } 3660 } 3661 } 3662 3663 static void TestUTFBOM() { 3664 static const UChar a16[] = { 0x61 }; 3665 static const char *const names[] = { 3666 "UTF-16", 3667 "UTF-16,version=1", 3668 "UTF-16BE", 3669 "UnicodeBig", 3670 "UTF-16LE", 3671 "UnicodeLittle" 3672 }; 3673 static const uint8_t expected[][5] = { 3674 #if U_IS_BIG_ENDIAN 3675 { 4, 0xfe, 0xff, 0, 0x61 }, 3676 { 4, 0xfe, 0xff, 0, 0x61 }, 3677 #else 3678 { 4, 0xff, 0xfe, 0x61, 0 }, 3679 { 4, 0xff, 0xfe, 0x61, 0 }, 3680 #endif 3681 3682 { 2, 0, 0x61 }, 3683 { 4, 0xfe, 0xff, 0, 0x61 }, 3684 3685 { 2, 0x61, 0 }, 3686 { 4, 0xff, 0xfe, 0x61, 0 } 3687 }; 3688 3689 char bytes[10]; 3690 int32_t i; 3691 3692 for(i = 0; i < LENGTHOF(names); ++i) { 3693 UErrorCode errorCode = U_ZERO_ERROR; 3694 UConverter *cnv = ucnv_open(names[i], &errorCode); 3695 int32_t length = 0; 3696 const uint8_t *exp = expected[i]; 3697 if (U_FAILURE(errorCode)) { 3698 log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode)); 3699 continue; 3700 } 3701 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode); 3702 3703 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) { 3704 log_err("unexpected %s BOM writing behavior -- %s\n", 3705 names[i], u_errorName(errorCode)); 3706 } 3707 ucnv_close(cnv); 3708 } 3709 } 3710