1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 /***************************************************************************** 9 * 10 * File ccapitst.c 11 * 12 * Modification History: 13 * Name Description 14 * Madhu Katragadda Ported for C API 15 ****************************************************************************** 16 */ 17 #include <stdio.h> 18 #include <stdlib.h> 19 #include <string.h> 20 #include <ctype.h> 21 #include "unicode/uloc.h" 22 #include "unicode/ucnv.h" 23 #include "unicode/ucnv_err.h" 24 #include "unicode/putil.h" 25 #include "unicode/uset.h" 26 #include "unicode/ustring.h" 27 #include "ucnv_bld.h" /* for sizeof(UConverter) */ 28 #include "cmemory.h" /* for UAlignedMemory */ 29 #include "cintltst.h" 30 #include "ccapitst.h" 31 #include "cstring.h" 32 33 #define NUM_CODEPAGE 1 34 #define MAX_FILE_LEN 1024*20 35 #define UCS_FILE_NAME_SIZE 512 36 37 /*returns an action other than the one provided*/ 38 #if !UCONFIG_NO_LEGACY_CONVERSION 39 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA); 40 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA); 41 #endif 42 43 static UConverter * 44 cnv_open(const char *name, UErrorCode *pErrorCode) { 45 if(name!=NULL && name[0]=='*') { 46 return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode); 47 } else { 48 return ucnv_open(name, pErrorCode); 49 } 50 } 51 52 53 static void ListNames(void); 54 static void TestFlushCache(void); 55 static void TestDuplicateAlias(void); 56 static void TestCCSID(void); 57 static void TestJ932(void); 58 static void TestJ1968(void); 59 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 60 static void TestLMBCSMaxChar(void); 61 #endif 62 63 #if !UCONFIG_NO_LEGACY_CONVERSION 64 static void TestConvertSafeCloneCallback(void); 65 #endif 66 67 static void TestEBCDICSwapLFNL(void); 68 static void TestConvertEx(void); 69 static void TestConvertExFromUTF8(void); 70 static void TestConvertExFromUTF8_C5F0(void); 71 static void TestConvertAlgorithmic(void); 72 void TestDefaultConverterError(void); /* defined in cctest.c */ 73 void TestDefaultConverterSet(void); /* defined in cctest.c */ 74 static void TestToUCountPending(void); 75 static void TestFromUCountPending(void); 76 static void TestDefaultName(void); 77 static void TestCompareNames(void); 78 static void TestSubstString(void); 79 static void InvalidArguments(void); 80 static void TestGetName(void); 81 static void TestUTFBOM(void); 82 83 void addTestConvert(TestNode** root); 84 85 void addTestConvert(TestNode** root) 86 { 87 addTest(root, &ListNames, "tsconv/ccapitst/ListNames"); 88 addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert"); 89 addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache"); 90 addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias"); 91 addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias"); 92 addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone"); 93 #if !UCONFIG_NO_LEGACY_CONVERSION 94 addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback"); 95 #endif 96 addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID"); 97 addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932"); 98 addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968"); 99 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 100 addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar"); 101 #endif 102 addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL"); 103 addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx"); 104 addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8"); 105 addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0"); 106 addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic"); 107 addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError"); 108 addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet"); 109 #if !UCONFIG_NO_FILE_IO 110 addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending"); 111 addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending"); 112 #endif 113 addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName"); 114 addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames"); 115 addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString"); 116 addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments"); 117 addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName"); 118 addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM"); 119 } 120 121 static void ListNames(void) { 122 UErrorCode err = U_ZERO_ERROR; 123 int32_t testLong1 = 0; 124 const char* available_conv; 125 UEnumeration *allNamesEnum = NULL; 126 int32_t allNamesCount = 0; 127 uint16_t count; 128 129 log_verbose("Testing ucnv_openAllNames()..."); 130 allNamesEnum = ucnv_openAllNames(&err); 131 if(U_FAILURE(err)) { 132 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err)); 133 } 134 else { 135 const char *string = NULL; 136 int32_t len = 0; 137 int32_t count1 = 0; 138 int32_t count2 = 0; 139 allNamesCount = uenum_count(allNamesEnum, &err); 140 while ((string = uenum_next(allNamesEnum, &len, &err))) { 141 count1++; 142 log_verbose("read \"%s\", length %i\n", string, len); 143 } 144 if (U_FAILURE(err)) { 145 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err)); 146 err = U_ZERO_ERROR; 147 } 148 uenum_reset(allNamesEnum, &err); 149 while ((string = uenum_next(allNamesEnum, &len, &err))) { 150 count2++; 151 ucnv_close(ucnv_open(string, &err)); 152 log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable"); 153 err = U_ZERO_ERROR; 154 } 155 if (count1 != count2) { 156 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n"); 157 } 158 } 159 uenum_close(allNamesEnum); 160 err = U_ZERO_ERROR; 161 162 /*Tests ucnv_getAvailableName(), getAvialableCount()*/ 163 164 log_verbose("Testing ucnv_countAvailable()..."); 165 166 testLong1=ucnv_countAvailable(); 167 log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount); 168 169 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */ 170 171 available_conv = ucnv_getAvailableName(testLong1); 172 /*test ucnv_getAvailableName with err condition*/ 173 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 "); 174 available_conv = ucnv_getAvailableName(-1); 175 if(available_conv != NULL){ 176 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n"); 177 } 178 179 /* Test ucnv_countAliases() etc. */ 180 count = ucnv_countAliases("utf-8", &err); 181 if(U_FAILURE(err)) { 182 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err)); 183 } else if(count <= 0) { 184 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count); 185 } else { 186 /* try to get the aliases individually */ 187 const char *alias; 188 alias = ucnv_getAlias("utf-8", 0, &err); 189 if(U_FAILURE(err)) { 190 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err)); 191 } else if(strcmp("UTF-8", alias) != 0) { 192 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias); 193 } else { 194 uint16_t aliasNum; 195 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 196 alias = ucnv_getAlias("utf-8", aliasNum, &err); 197 if(U_FAILURE(err)) { 198 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 199 } else if(strlen(alias) > 20) { 200 /* sanity check */ 201 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias); 202 } else { 203 log_verbose("alias %d for utf-8: %s\n", aliasNum, alias); 204 } 205 } 206 if(U_SUCCESS(err)) { 207 /* try to fill an array with all aliases */ 208 const char **aliases; 209 aliases=(const char **)malloc(count * sizeof(const char *)); 210 if(aliases != 0) { 211 ucnv_getAliases("utf-8", aliases, &err); 212 if(U_FAILURE(err)) { 213 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err)); 214 } else { 215 for(aliasNum = 0; aliasNum < count; ++aliasNum) { 216 /* compare the pointers with the ones returned individually */ 217 alias = ucnv_getAlias("utf-8", aliasNum, &err); 218 if(U_FAILURE(err)) { 219 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err)); 220 } else if(aliases[aliasNum] != alias) { 221 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum); 222 } 223 } 224 } 225 free((char **)aliases); 226 } 227 } 228 } 229 } 230 } 231 232 233 static void TestConvert() 234 { 235 #if !UCONFIG_NO_LEGACY_CONVERSION 236 char myptr[4]; 237 char save[4]; 238 int32_t testLong1 = 0; 239 uint16_t rest = 0; 240 int32_t len = 0; 241 int32_t x = 0; 242 FILE* ucs_file_in = NULL; 243 UChar BOM = 0x0000; 244 UChar myUChar = 0x0000; 245 char* mytarget; /* [MAX_FILE_LEN] */ 246 char* mytarget_1; 247 char* mytarget_use; 248 UChar* consumedUni = NULL; 249 char* consumed = NULL; 250 char* output_cp_buffer; /* [MAX_FILE_LEN] */ 251 UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */ 252 UChar* ucs_file_buffer_use; 253 UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */ 254 UChar* my_ucs_file_buffer_1; 255 int8_t ii = 0; 256 uint16_t codepage_index = 0; 257 int32_t cp = 0; 258 UErrorCode err = U_ZERO_ERROR; 259 char ucs_file_name[UCS_FILE_NAME_SIZE]; 260 UConverterFromUCallback MIA1, MIA1_2; 261 UConverterToUCallback MIA2, MIA2_2; 262 const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2; 263 UConverter* someConverters[5]; 264 UConverter* myConverter = 0; 265 UChar* displayname = 0; 266 267 const char* locale; 268 269 UChar* uchar1 = 0; 270 UChar* uchar2 = 0; 271 UChar* uchar3 = 0; 272 int32_t targetcapacity2; 273 int32_t targetcapacity; 274 int32_t targetsize; 275 int32_t disnamelen; 276 277 const UChar* tmp_ucs_buf; 278 const UChar* tmp_consumedUni=NULL; 279 const char* tmp_mytarget_use; 280 const char* tmp_consumed; 281 282 /****************************************************************** 283 Checking Unicode -> ksc 284 ******************************************************************/ 285 286 const char* CodePagesToTest[NUM_CODEPAGE] = 287 { 288 "ibm-949_P110-1999" 289 290 291 }; 292 const uint16_t CodePageNumberToTest[NUM_CODEPAGE] = 293 { 294 949 295 }; 296 297 298 const int8_t CodePagesMinChars[NUM_CODEPAGE] = 299 { 300 1 301 302 }; 303 304 const int8_t CodePagesMaxChars[NUM_CODEPAGE] = 305 { 306 2 307 308 }; 309 310 const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] = 311 { 312 0xAFFE 313 }; 314 315 const char* CodePagesTestFiles[NUM_CODEPAGE] = 316 { 317 "uni-text.bin" 318 }; 319 320 321 const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] = 322 { 323 UCNV_IBM 324 325 }; 326 327 const char* CodePagesLocale[NUM_CODEPAGE] = 328 { 329 "ko_KR" 330 }; 331 332 UConverterFromUCallback oldFromUAction = NULL; 333 UConverterToUCallback oldToUAction = NULL; 334 const void* oldFromUContext = NULL; 335 const void* oldToUContext = NULL; 336 337 /* Allocate memory */ 338 mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0])); 339 output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0])); 340 ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0])); 341 my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0])); 342 343 ucs_file_buffer_use = ucs_file_buffer; 344 mytarget_1=mytarget; 345 mytarget_use = mytarget; 346 my_ucs_file_buffer_1=my_ucs_file_buffer; 347 348 /* flush the converter cache to get a consistent state before the flushing is tested */ 349 ucnv_flushCache(); 350 351 /*Testing ucnv_openU()*/ 352 { 353 UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/ 354 UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */ 355 UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */ 356 const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"}; 357 UChar illegalName[100]; 358 UConverter *converter=NULL; 359 err=U_ZERO_ERROR; 360 converter=ucnv_openU(converterName, &err); 361 if(U_FAILURE(err)){ 362 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err)); 363 } 364 ucnv_close(converter); 365 err=U_ZERO_ERROR; 366 converter=ucnv_openU(NULL, &err); 367 if(U_FAILURE(err)){ 368 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err)); 369 } 370 ucnv_close(converter); 371 /*testing with error value*/ 372 err=U_ILLEGAL_ARGUMENT_ERROR; 373 converter=ucnv_openU(converterName, &err); 374 if(!(converter == NULL)){ 375 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n"); 376 } 377 ucnv_close(converter); 378 err=U_ZERO_ERROR; 379 u_uastrcpy(illegalName, ""); 380 u_uastrcpy(illegalName, illegalNameChars); 381 ucnv_openU(illegalName, &err); 382 if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){ 383 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n"); 384 } 385 386 err=U_ZERO_ERROR; 387 ucnv_openU(firstSortedName, &err); 388 if(err!=U_FILE_ACCESS_ERROR){ 389 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n"); 390 } 391 392 err=U_ZERO_ERROR; 393 ucnv_openU(lastSortedName, &err); 394 if(err!=U_FILE_ACCESS_ERROR){ 395 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n"); 396 } 397 398 err=U_ZERO_ERROR; 399 } 400 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n"); 401 { 402 UConverter *cnv=NULL; 403 err=U_ZERO_ERROR; 404 cnv=ucnv_open("ibm-949,Madhu", &err); 405 if(U_FAILURE(err)){ 406 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err)); 407 } 408 ucnv_close(cnv); 409 410 } 411 /*Testing ucnv_convert()*/ 412 { 413 int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0; 414 const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00}; 415 const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00}; 416 char *target=0; 417 sourceLimit=UPRV_LENGTHOF(source); 418 err=U_ZERO_ERROR; 419 targetLimit=0; 420 421 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err); 422 if(err == U_BUFFER_OVERFLOW_ERROR){ 423 err=U_ZERO_ERROR; 424 targetLimit=targetCapacity+1; 425 target=(char*)malloc(sizeof(char) * targetLimit); 426 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 427 } 428 if(U_FAILURE(err)){ 429 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err)); 430 } 431 else { 432 for(i=0; i<targetCapacity; i++){ 433 if(target[i] != expectedTarget[i]){ 434 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]); 435 } 436 } 437 438 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err); 439 if(U_FAILURE(err) || i!=7){ 440 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n", 441 u_errorName(err), i); 442 } 443 444 /*Test error conditions*/ 445 err=U_ZERO_ERROR; 446 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err); 447 if(i !=0){ 448 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n"); 449 } 450 451 err=U_ILLEGAL_ARGUMENT_ERROR; 452 sourceLimit=UPRV_LENGTHOF(source); 453 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 454 if(i !=0 ){ 455 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n"); 456 } 457 458 err=U_ZERO_ERROR; 459 sourceLimit=UPRV_LENGTHOF(source); 460 targetLimit=0; 461 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err); 462 if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){ 463 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n"); 464 } 465 err=U_ZERO_ERROR; 466 free(target); 467 } 468 } 469 470 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/ 471 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n"); 472 err=U_ILLEGAL_ARGUMENT_ERROR; 473 if(ucnv_open(NULL, &err) != NULL){ 474 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 475 } 476 if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){ 477 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n"); 478 } 479 err=U_ZERO_ERROR; 480 481 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */ 482 log_verbose("\n---Testing ucnv_open default...\n"); 483 someConverters[0] = ucnv_open(NULL,&err); 484 someConverters[1] = ucnv_open(NULL,&err); 485 someConverters[2] = ucnv_open("utf8", &err); 486 someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err); 487 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */ 488 if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));} 489 490 /* Testing ucnv_getName()*/ 491 /*default code page */ 492 ucnv_getName(someConverters[0], &err); 493 if(U_FAILURE(err)) { 494 log_data_err("getName[0] failed\n"); 495 } else { 496 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err)); 497 } 498 ucnv_getName(someConverters[1], &err); 499 if(U_FAILURE(err)) { 500 log_data_err("getName[1] failed\n"); 501 } else { 502 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err)); 503 } 504 505 ucnv_close(someConverters[0]); 506 ucnv_close(someConverters[1]); 507 ucnv_close(someConverters[2]); 508 ucnv_close(someConverters[3]); 509 510 511 for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index) 512 { 513 int32_t i = 0; 514 515 err = U_ZERO_ERROR; 516 #ifdef U_TOPSRCDIR 517 strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING); 518 #else 519 strcpy(ucs_file_name, loadTestData(&err)); 520 521 if(U_FAILURE(err)){ 522 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err)); 523 return; 524 } 525 526 { 527 char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR); 528 529 if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){ 530 *(index+1)=0; 531 } 532 } 533 534 strcat(ucs_file_name,".."U_FILE_SEP_STRING); 535 #endif 536 strcat(ucs_file_name, CodePagesTestFiles[codepage_index]); 537 538 ucs_file_in = fopen(ucs_file_name,"rb"); 539 if (!ucs_file_in) 540 { 541 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name); 542 return; 543 } 544 545 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/ 546 547 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */ 548 /* ucnv_flushCache(); */ 549 myConverter =ucnv_open( "ibm-949", &err); 550 if (!myConverter || U_FAILURE(err)) 551 { 552 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err)); 553 fclose(ucs_file_in); 554 break; 555 } 556 557 /*testing for ucnv_getName() */ 558 log_verbose("Testing ucnv_getName()...\n"); 559 ucnv_getName(myConverter, &err); 560 if(U_FAILURE(err)) 561 log_err("Error in getName\n"); 562 else 563 { 564 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err)); 565 } 566 if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index])) 567 log_err("getName failed\n"); 568 else 569 log_verbose("getName ok\n"); 570 /*Test getName with error condition*/ 571 { 572 const char* name=0; 573 err=U_ILLEGAL_ARGUMENT_ERROR; 574 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR"); 575 name=ucnv_getName(myConverter, &err); 576 if(name != NULL){ 577 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail"); 578 } 579 err=U_ZERO_ERROR; 580 } 581 582 583 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/ 584 585 log_verbose("Testing ucnv_getMaxCharSize()...\n"); 586 if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index]) 587 log_verbose("Max byte per character OK\n"); 588 else 589 log_err("Max byte per character failed\n"); 590 591 log_verbose("\n---Testing ucnv_getMinCharSize()...\n"); 592 if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index]) 593 log_verbose("Min byte per character OK\n"); 594 else 595 log_err("Min byte per character failed\n"); 596 597 598 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/ 599 log_verbose("\n---Testing ucnv_getSubstChars...\n"); 600 ii=4; 601 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 602 if (ii <= 0) { 603 log_err("ucnv_getSubstChars returned a negative number %d\n", ii); 604 } 605 606 for(x=0;x<ii;x++) 607 rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]); 608 if (rest==CodePagesSubstitutionChars[codepage_index]) 609 log_verbose("Substitution character ok\n"); 610 else 611 log_err("Substitution character failed.\n"); 612 613 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n"); 614 ucnv_setSubstChars(myConverter, myptr, ii, &err); 615 if (U_FAILURE(err)) 616 { 617 log_err("FAILURE! %s\n", myErrorName(err)); 618 } 619 ucnv_getSubstChars(myConverter,save, &ii, &err); 620 if (U_FAILURE(err)) 621 { 622 log_err("FAILURE! %s\n", myErrorName(err)); 623 } 624 625 if (strncmp(save, myptr, ii)) 626 log_err("Saved substitution character failed\n"); 627 else 628 log_verbose("Saved substitution character ok\n"); 629 630 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/ 631 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n"); 632 ii=1; 633 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 634 if(err != U_INDEX_OUTOFBOUNDS_ERROR){ 635 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err)); 636 } 637 err=U_ZERO_ERROR; 638 ii=4; 639 ucnv_getSubstChars(myConverter, myptr, &ii, &err); 640 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n"); 641 ucnv_setSubstChars(myConverter, myptr, 0, &err); 642 if(err != U_ILLEGAL_ARGUMENT_ERROR){ 643 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err)); 644 } 645 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n"); 646 strcpy(myptr, "abc"); 647 ucnv_setSubstChars(myConverter, myptr, ii, &err); 648 err=U_ZERO_ERROR; 649 ucnv_getSubstChars(myConverter, save, &ii, &err); 650 if(strncmp(save, myptr, ii) == 0){ 651 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n"); 652 } 653 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n"); 654 err=U_ZERO_ERROR; 655 strcpy(myptr, "abc"); 656 ucnv_setSubstChars(myConverter, myptr, ii, &err); 657 err=U_ILLEGAL_ARGUMENT_ERROR; 658 ucnv_getSubstChars(myConverter, save, &ii, &err); 659 if(strncmp(save, myptr, ii) == 0){ 660 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n"); 661 } 662 err=U_ZERO_ERROR; 663 /*------*/ 664 665 #ifdef U_ENABLE_GENERIC_ISO_2022 666 /*resetState ucnv_reset()*/ 667 log_verbose("\n---Testing ucnv_reset()..\n"); 668 ucnv_reset(myConverter); 669 { 670 UChar32 c; 671 const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80}; 672 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in); 673 UConverter *cnv=ucnv_open("ISO_2022", &err); 674 if(U_FAILURE(err)) { 675 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err)); 676 } 677 c=ucnv_getNextUChar(cnv, &source, limit, &err); 678 if((U_FAILURE(err) || c != (UChar32)0x0031)) { 679 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err)); 680 } 681 ucnv_reset(cnv); 682 ucnv_close(cnv); 683 684 } 685 #endif 686 687 /*getDisplayName*/ 688 log_verbose("\n---Testing ucnv_getDisplayName()...\n"); 689 locale=CodePagesLocale[codepage_index]; 690 len=0; 691 displayname=NULL; 692 disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err); 693 if(err==U_BUFFER_OVERFLOW_ERROR) { 694 err=U_ZERO_ERROR; 695 displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar)); 696 ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err); 697 if(U_FAILURE(err)) { 698 log_err("getDisplayName failed. The error is %s\n", myErrorName(err)); 699 } 700 else { 701 log_verbose(" getDisplayName o.k.\n"); 702 } 703 free(displayname); 704 displayname=NULL; 705 } 706 else { 707 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err)); 708 } 709 /*test ucnv_getDiaplayName with error condition*/ 710 err= U_ILLEGAL_ARGUMENT_ERROR; 711 len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err); 712 if( len !=0 ){ 713 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n"); 714 } 715 /*test ucnv_getDiaplayName with error condition*/ 716 err=U_ZERO_ERROR; 717 len=ucnv_getDisplayName(NULL,locale,NULL,0, &err); 718 if( len !=0 || U_SUCCESS(err)){ 719 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n"); 720 } 721 err=U_ZERO_ERROR; 722 723 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/ 724 ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context); 725 726 log_verbose("\n---Testing ucnv_setFromUCallBack...\n"); 727 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 728 if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context) 729 { 730 log_err("FAILURE! %s\n", myErrorName(err)); 731 } 732 733 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 734 if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM) 735 log_err("get From UCallBack failed\n"); 736 else 737 log_verbose("get From UCallBack ok\n"); 738 739 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n"); 740 ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err); 741 if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM) 742 { 743 log_err("FAILURE! %s\n", myErrorName(err)); 744 } 745 746 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 747 if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context) 748 log_err("get From UCallBack action failed\n"); 749 else 750 log_verbose("get From UCallBack action ok\n"); 751 752 /*testing ucnv_setToUCallBack with error conditions*/ 753 err=U_ILLEGAL_ARGUMENT_ERROR; 754 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n"); 755 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err); 756 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2); 757 if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){ 758 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 759 } 760 err=U_ZERO_ERROR; 761 762 763 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/ 764 ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context); 765 766 log_verbose("\n---Testing setTo UCallBack...\n"); 767 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err); 768 if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context) 769 { 770 log_err("FAILURE! %s\n", myErrorName(err)); 771 } 772 773 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 774 if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM) 775 log_err("To UCallBack failed\n"); 776 else 777 log_verbose("To UCallBack ok\n"); 778 779 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n"); 780 ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err); 781 if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM) 782 { log_err("FAILURE! %s\n", myErrorName(err)); } 783 784 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 785 if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context) 786 log_err("To UCallBack failed\n"); 787 else 788 log_verbose("To UCallBack ok\n"); 789 790 /*testing ucnv_setToUCallBack with error conditions*/ 791 err=U_ILLEGAL_ARGUMENT_ERROR; 792 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n"); 793 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err); 794 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2); 795 if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){ 796 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n"); 797 } 798 err=U_ZERO_ERROR; 799 800 801 /*getcodepageid testing ucnv_getCCSID() */ 802 log_verbose("\n----Testing getCCSID....\n"); 803 cp = ucnv_getCCSID(myConverter,&err); 804 if (U_FAILURE(err)) 805 { 806 log_err("FAILURE!..... %s\n", myErrorName(err)); 807 } 808 if (cp != CodePageNumberToTest[codepage_index]) 809 log_err("Codepage number test failed\n"); 810 else 811 log_verbose("Codepage number test OK\n"); 812 813 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/ 814 err=U_ILLEGAL_ARGUMENT_ERROR; 815 if( ucnv_getCCSID(myConverter,&err) != -1){ 816 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n"); 817 } 818 err=U_ZERO_ERROR; 819 820 /*getCodepagePlatform testing ucnv_getPlatform()*/ 821 log_verbose("\n---Testing getCodepagePlatform ..\n"); 822 if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err)) 823 log_err("Platform codepage test failed\n"); 824 else 825 log_verbose("Platform codepage test ok\n"); 826 827 if (U_FAILURE(err)) 828 { 829 log_err("FAILURE! %s\n", myErrorName(err)); 830 } 831 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/ 832 err= U_ILLEGAL_ARGUMENT_ERROR; 833 if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){ 834 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n"); 835 } 836 err=U_ZERO_ERROR; 837 838 839 /*Reads the BOM*/ 840 { 841 // Note: gcc produces a compile warning if the return value from fread() is ignored. 842 size_t numRead = fread(&BOM, sizeof(UChar), 1, ucs_file_in); 843 (void)numRead; 844 } 845 if (BOM!=0xFEFF && BOM!=0xFFFE) 846 { 847 log_err("File Missing BOM...Bailing!\n"); 848 fclose(ucs_file_in); 849 break; 850 } 851 852 853 /*Reads in the file*/ 854 while(!feof(ucs_file_in)&&(i+=fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in))) 855 { 856 myUChar = ucs_file_buffer[i-1]; 857 858 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/ 859 } 860 861 myUChar = ucs_file_buffer[i-1]; 862 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/ 863 864 865 /*testing ucnv_fromUChars() and ucnv_toUChars() */ 866 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/ 867 868 uchar1=(UChar*)malloc(sizeof(UChar) * (i+1)); 869 u_uastrcpy(uchar1,""); 870 u_strncpy(uchar1,ucs_file_buffer,i); 871 uchar1[i] = 0; 872 873 uchar3=(UChar*)malloc(sizeof(UChar)*(i+1)); 874 u_uastrcpy(uchar3,""); 875 u_strncpy(uchar3,ucs_file_buffer,i); 876 uchar3[i] = 0; 877 878 /*Calls the Conversion Routine */ 879 testLong1 = MAX_FILE_LEN; 880 log_verbose("\n---Testing ucnv_fromUChars()\n"); 881 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 882 if (U_FAILURE(err)) 883 { 884 log_err("\nFAILURE...%s\n", myErrorName(err)); 885 } 886 else 887 log_verbose(" ucnv_fromUChars() o.k.\n"); 888 889 /*test the conversion routine */ 890 log_verbose("\n---Testing ucnv_toUChars()\n"); 891 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */ 892 targetcapacity2=0; 893 targetsize = ucnv_toUChars(myConverter, 894 NULL, 895 targetcapacity2, 896 output_cp_buffer, 897 strlen(output_cp_buffer), 898 &err); 899 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/ 900 901 if(err==U_BUFFER_OVERFLOW_ERROR) 902 { 903 err=U_ZERO_ERROR; 904 uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar)); 905 targetsize = ucnv_toUChars(myConverter, 906 uchar2, 907 targetsize+1, 908 output_cp_buffer, 909 strlen(output_cp_buffer), 910 &err); 911 912 if(U_FAILURE(err)) 913 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err)); 914 else 915 log_verbose(" ucnv_toUChars() o.k.\n"); 916 917 if(u_strcmp(uchar1,uchar2)!=0) 918 log_err("equality test failed with conversion routine\n"); 919 } 920 else 921 { 922 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n"); 923 } 924 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/ 925 err=U_ILLEGAL_ARGUMENT_ERROR; 926 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n"); 927 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err); 928 if (targetcapacity !=0) { 929 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 930 } 931 err=U_ZERO_ERROR; 932 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n"); 933 targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err); 934 if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) { 935 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n"); 936 } 937 err=U_ZERO_ERROR; 938 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n"); 939 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err); 940 if (targetcapacity !=0) { 941 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n"); 942 } 943 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n"); 944 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err); 945 if (err != U_BUFFER_OVERFLOW_ERROR) { 946 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n"); 947 } 948 /*toUChars with error conditions*/ 949 targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, strlen(output_cp_buffer), &err); 950 if(targetsize != 0){ 951 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n"); 952 } 953 err=U_ZERO_ERROR; 954 targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, strlen(output_cp_buffer), &err); 955 if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){ 956 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n"); 957 } 958 err=U_ZERO_ERROR; 959 targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err); 960 if (targetsize !=0) { 961 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n"); 962 } 963 targetcapacity2=0; 964 targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, strlen(output_cp_buffer), &err); 965 if (err != U_STRING_NOT_TERMINATED_WARNING) { 966 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n", 967 u_errorName(err)); 968 } 969 err=U_ZERO_ERROR; 970 /*-----*/ 971 972 973 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */ 974 /*Clean up re-usable vars*/ 975 log_verbose("Testing ucnv_fromUnicode().....\n"); 976 tmp_ucs_buf=ucs_file_buffer_use; 977 ucnv_fromUnicode(myConverter, &mytarget_1, 978 mytarget + MAX_FILE_LEN, 979 &tmp_ucs_buf, 980 ucs_file_buffer_use+i, 981 NULL, 982 TRUE, 983 &err); 984 consumedUni = (UChar*)tmp_consumedUni; 985 (void)consumedUni; /* Suppress set but not used warning. */ 986 987 if (U_FAILURE(err)) 988 { 989 log_err("FAILURE! %s\n", myErrorName(err)); 990 } 991 else 992 log_verbose("ucnv_fromUnicode() o.k.\n"); 993 994 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */ 995 log_verbose("Testing ucnv_toUnicode().....\n"); 996 tmp_mytarget_use=mytarget_use; 997 tmp_consumed = consumed; 998 ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1, 999 my_ucs_file_buffer + MAX_FILE_LEN, 1000 &tmp_mytarget_use, 1001 mytarget_use + (mytarget_1 - mytarget), 1002 NULL, 1003 FALSE, 1004 &err); 1005 consumed = (char*)tmp_consumed; 1006 if (U_FAILURE(err)) 1007 { 1008 log_err("FAILURE! %s\n", myErrorName(err)); 1009 } 1010 else 1011 log_verbose("ucnv_toUnicode() o.k.\n"); 1012 1013 1014 log_verbose("\n---Testing RoundTrip ...\n"); 1015 1016 1017 u_strncpy(uchar3, my_ucs_file_buffer,i); 1018 uchar3[i] = 0; 1019 1020 if(u_strcmp(uchar1,uchar3)==0) 1021 log_verbose("Equality test o.k.\n"); 1022 else 1023 log_err("Equality test failed\n"); 1024 1025 /*sanity compare */ 1026 if(uchar2 == NULL) 1027 { 1028 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__); 1029 } 1030 else 1031 { 1032 if(u_strcmp(uchar2, uchar3)==0) 1033 log_verbose("Equality test o.k.\n"); 1034 else 1035 log_err("Equality test failed\n"); 1036 } 1037 1038 fclose(ucs_file_in); 1039 ucnv_close(myConverter); 1040 if (uchar1 != 0) free(uchar1); 1041 if (uchar2 != 0) free(uchar2); 1042 if (uchar3 != 0) free(uchar3); 1043 } 1044 1045 free((void*)mytarget); 1046 free((void*)output_cp_buffer); 1047 free((void*)ucs_file_buffer); 1048 free((void*)my_ucs_file_buffer); 1049 #endif 1050 } 1051 1052 #if !UCONFIG_NO_LEGACY_CONVERSION 1053 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA) 1054 { 1055 return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP; 1056 } 1057 1058 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA) 1059 { 1060 return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP; 1061 } 1062 #endif 1063 1064 static void TestFlushCache(void) { 1065 #if !UCONFIG_NO_LEGACY_CONVERSION 1066 UErrorCode err = U_ZERO_ERROR; 1067 UConverter* someConverters[5]; 1068 int flushCount = 0; 1069 1070 /* flush the converter cache to get a consistent state before the flushing is tested */ 1071 ucnv_flushCache(); 1072 1073 /*Testing ucnv_open()*/ 1074 /* Note: These converters have been chosen because they do NOT 1075 encode the Latin characters (U+0041, ...), and therefore are 1076 highly unlikely to be chosen as system default codepages */ 1077 1078 someConverters[0] = ucnv_open("ibm-1047", &err); 1079 if (U_FAILURE(err)) { 1080 log_data_err("FAILURE! %s\n", myErrorName(err)); 1081 } 1082 1083 someConverters[1] = ucnv_open("ibm-1047", &err); 1084 if (U_FAILURE(err)) { 1085 log_data_err("FAILURE! %s\n", myErrorName(err)); 1086 } 1087 1088 someConverters[2] = ucnv_open("ibm-1047", &err); 1089 if (U_FAILURE(err)) { 1090 log_data_err("FAILURE! %s\n", myErrorName(err)); 1091 } 1092 1093 someConverters[3] = ucnv_open("gb18030", &err); 1094 if (U_FAILURE(err)) { 1095 log_data_err("FAILURE! %s\n", myErrorName(err)); 1096 } 1097 1098 someConverters[4] = ucnv_open("ibm-954", &err); 1099 if (U_FAILURE(err)) { 1100 log_data_err("FAILURE! %s\n", myErrorName(err)); 1101 } 1102 1103 1104 /* Testing ucnv_flushCache() */ 1105 log_verbose("\n---Testing ucnv_flushCache...\n"); 1106 if ((flushCount=ucnv_flushCache())==0) 1107 log_verbose("Flush cache ok\n"); 1108 else 1109 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1110 1111 /*testing ucnv_close() and ucnv_flushCache() */ 1112 ucnv_close(someConverters[0]); 1113 ucnv_close(someConverters[1]); 1114 1115 if ((flushCount=ucnv_flushCache())==0) 1116 log_verbose("Flush cache ok\n"); 1117 else 1118 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount); 1119 1120 ucnv_close(someConverters[2]); 1121 ucnv_close(someConverters[3]); 1122 1123 if ((flushCount=ucnv_flushCache())==2) 1124 log_verbose("Flush cache ok\n"); /*because first, second and third are same */ 1125 else 1126 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n", 1127 __LINE__, 1128 flushCount); 1129 1130 ucnv_close(someConverters[4]); 1131 if ( (flushCount=ucnv_flushCache())==1) 1132 log_verbose("Flush cache ok\n"); 1133 else 1134 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount); 1135 #endif 1136 } 1137 1138 /** 1139 * Test the converter alias API, specifically the fuzzy matching of 1140 * alias names and the alias table integrity. Make sure each 1141 * converter has at least one alias (itself), and that its listed 1142 * aliases map back to itself. Check some hard-coded UTF-8 and 1143 * ISO_2022 aliases to make sure they work. 1144 */ 1145 static void TestAlias() { 1146 int32_t i, ncnv; 1147 UErrorCode status = U_ZERO_ERROR; 1148 1149 /* Predetermined aliases that we expect to map back to ISO_2022 1150 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */ 1151 const char* ISO_2022_NAMES[] = 1152 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2", 1153 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"}; 1154 int32_t ISO_2022_NAMES_LENGTH = UPRV_LENGTHOF(ISO_2022_NAMES); 1155 const char *UTF8_NAMES[] = 1156 { "UTF-8", "utf-8", "utf8", "ibm-1208", 1157 "utf_8", "ibm1208", "cp1208" }; 1158 int32_t UTF8_NAMES_LENGTH = UPRV_LENGTHOF(UTF8_NAMES); 1159 1160 struct { 1161 const char *name; 1162 const char *alias; 1163 } CONVERTERS_NAMES[] = { 1164 { "UTF-32BE", "UTF32_BigEndian" }, 1165 { "UTF-32LE", "UTF32_LittleEndian" }, 1166 { "UTF-32", "ISO-10646-UCS-4" }, 1167 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" }, 1168 { "UTF-32", "ucs-4" } 1169 }; 1170 int32_t CONVERTERS_NAMES_LENGTH = UPRV_LENGTHOF(CONVERTERS_NAMES); 1171 1172 /* When there are bugs in gencnval or in ucnv_io, converters can 1173 appear to have no aliases. */ 1174 ncnv = ucnv_countAvailable(); 1175 log_verbose("%d converters\n", ncnv); 1176 for (i=0; i<ncnv; ++i) { 1177 const char *name = ucnv_getAvailableName(i); 1178 const char *alias0; 1179 uint16_t na = ucnv_countAliases(name, &status); 1180 uint16_t j; 1181 UConverter *cnv; 1182 1183 if (na == 0) { 1184 log_err("FAIL: Converter \"%s\" (i=%d)" 1185 " has no aliases; expect at least one\n", 1186 name, i); 1187 continue; 1188 } 1189 cnv = ucnv_open(name, &status); 1190 if (U_FAILURE(status)) { 1191 log_data_err("FAIL: Converter \"%s\" (i=%d)" 1192 " can't be opened.\n", 1193 name, i); 1194 } 1195 else { 1196 if (strcmp(ucnv_getName(cnv, &status), name) != 0 1197 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) { 1198 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. " 1199 "They should be the same\n", 1200 name, ucnv_getName(cnv, &status)); 1201 } 1202 } 1203 ucnv_close(cnv); 1204 1205 status = U_ZERO_ERROR; 1206 alias0 = ucnv_getAlias(name, 0, &status); 1207 for (j=1; j<na; ++j) { 1208 const char *alias; 1209 /* Make sure each alias maps back to the the same list of 1210 aliases. Assume that if alias 0 is the same, the whole 1211 list is the same (this should always be true). */ 1212 const char *mapBack; 1213 1214 status = U_ZERO_ERROR; 1215 alias = ucnv_getAlias(name, j, &status); 1216 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1217 log_err("FAIL: Converter \"%s\"is ambiguous\n", name); 1218 } 1219 1220 if (alias == NULL) { 1221 log_err("FAIL: Converter \"%s\" -> " 1222 "alias[%d]=NULL\n", 1223 name, j); 1224 continue; 1225 } 1226 1227 mapBack = ucnv_getAlias(alias, 0, &status); 1228 1229 if (mapBack == NULL) { 1230 log_err("FAIL: Converter \"%s\" -> " 1231 "alias[%d]=\"%s\" -> " 1232 "alias[0]=NULL, exp. \"%s\"\n", 1233 name, j, alias, alias0); 1234 continue; 1235 } 1236 1237 if (0 != strcmp(alias0, mapBack)) { 1238 int32_t idx; 1239 UBool foundAlias = FALSE; 1240 if (status == U_AMBIGUOUS_ALIAS_WARNING) { 1241 /* Make sure that we only get this mismapping when there is 1242 an ambiguous alias, and the other converter has this alias too. */ 1243 for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) { 1244 if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) { 1245 foundAlias = TRUE; 1246 break; 1247 } 1248 } 1249 } 1250 /* else not ambiguous, and this is a real problem. foundAlias = FALSE */ 1251 1252 if (!foundAlias) { 1253 log_err("FAIL: Converter \"%s\" -> " 1254 "alias[%d]=\"%s\" -> " 1255 "alias[0]=\"%s\", exp. \"%s\"\n", 1256 name, j, alias, mapBack, alias0); 1257 } 1258 } 1259 } 1260 } 1261 1262 1263 /* Check a list of predetermined aliases that we expect to map 1264 * back to ISO_2022 and UTF-8. */ 1265 for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) { 1266 const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status); 1267 if(!mapBack) { 1268 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]); 1269 continue; 1270 } 1271 if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) { 1272 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n", 1273 ISO_2022_NAMES[i], mapBack); 1274 } 1275 } 1276 1277 1278 for (i=1; i<UTF8_NAMES_LENGTH; ++i) { 1279 const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status); 1280 if(!mapBack) { 1281 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]); 1282 continue; 1283 } 1284 if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) { 1285 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n", 1286 UTF8_NAMES[i], mapBack); 1287 } 1288 } 1289 1290 /* 1291 * Check a list of predetermined aliases that we expect to map 1292 * back to predermined converter names. 1293 */ 1294 1295 for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) { 1296 const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status); 1297 if(!mapBack) { 1298 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name); 1299 continue; 1300 } 1301 if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) { 1302 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n", 1303 CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name); 1304 } 1305 } 1306 1307 } 1308 1309 static void TestDuplicateAlias(void) { 1310 const char *alias; 1311 UErrorCode status = U_ZERO_ERROR; 1312 1313 status = U_ZERO_ERROR; 1314 alias = ucnv_getStandardName("Shift_JIS", "IBM", &status); 1315 if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1316 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias); 1317 } 1318 status = U_ZERO_ERROR; 1319 alias = ucnv_getStandardName("ibm-943", "IANA", &status); 1320 if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) { 1321 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias); 1322 } 1323 status = U_ZERO_ERROR; 1324 alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status); 1325 if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) { 1326 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias); 1327 } 1328 } 1329 1330 1331 /* Test safe clone callback */ 1332 1333 static uint32_t TSCC_nextSerial() 1334 { 1335 static uint32_t n = 1; 1336 1337 return (n++); 1338 } 1339 1340 typedef struct 1341 { 1342 uint32_t magic; /* 0xC0FFEE to identify that the object is OK */ 1343 uint32_t serial; /* minted from nextSerial, above */ 1344 UBool wasClosed; /* close happened on the object */ 1345 } TSCCContext; 1346 1347 static TSCCContext *TSCC_clone(TSCCContext *ctx) 1348 { 1349 TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext)); 1350 1351 newCtx->serial = TSCC_nextSerial(); 1352 newCtx->wasClosed = 0; 1353 newCtx->magic = 0xC0FFEE; 1354 1355 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial); 1356 1357 return newCtx; 1358 } 1359 1360 #if !UCONFIG_NO_LEGACY_CONVERSION 1361 static void TSCC_fromU(const void *context, 1362 UConverterFromUnicodeArgs *fromUArgs, 1363 const UChar* codeUnits, 1364 int32_t length, 1365 UChar32 codePoint, 1366 UConverterCallbackReason reason, 1367 UErrorCode * err) 1368 { 1369 TSCCContext *ctx = (TSCCContext*)context; 1370 UConverterFromUCallback junkFrom; 1371 1372 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter); 1373 1374 if(ctx->magic != 0xC0FFEE) { 1375 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1376 return; 1377 } 1378 1379 if(reason == UCNV_CLONE) { 1380 UErrorCode subErr = U_ZERO_ERROR; 1381 TSCCContext *newCtx; 1382 TSCCContext *junkCtx; 1383 TSCCContext **pjunkCtx = &junkCtx; 1384 1385 /* "recreate" it */ 1386 log_verbose("TSCC_fromU: cloning..\n"); 1387 newCtx = TSCC_clone(ctx); 1388 1389 if(newCtx == NULL) { 1390 log_err("TSCC_fromU: internal clone failed on %p\n", ctx); 1391 } 1392 1393 /* now, SET it */ 1394 ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1395 ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1396 1397 if(U_FAILURE(subErr)) { 1398 *err = subErr; 1399 } 1400 } 1401 1402 if(reason == UCNV_CLOSE) { 1403 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial); 1404 ctx->wasClosed = TRUE; 1405 } 1406 } 1407 1408 static void TSCC_toU(const void *context, 1409 UConverterToUnicodeArgs *toUArgs, 1410 const char* codeUnits, 1411 int32_t length, 1412 UConverterCallbackReason reason, 1413 UErrorCode * err) 1414 { 1415 TSCCContext *ctx = (TSCCContext*)context; 1416 UConverterToUCallback junkFrom; 1417 1418 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter); 1419 1420 if(ctx->magic != 0xC0FFEE) { 1421 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic); 1422 return; 1423 } 1424 1425 if(reason == UCNV_CLONE) { 1426 UErrorCode subErr = U_ZERO_ERROR; 1427 TSCCContext *newCtx; 1428 TSCCContext *junkCtx; 1429 TSCCContext **pjunkCtx = &junkCtx; 1430 1431 /* "recreate" it */ 1432 log_verbose("TSCC_toU: cloning..\n"); 1433 newCtx = TSCC_clone(ctx); 1434 1435 if(newCtx == NULL) { 1436 log_err("TSCC_toU: internal clone failed on %p\n", ctx); 1437 } 1438 1439 /* now, SET it */ 1440 ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx); 1441 ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr); 1442 1443 if(U_FAILURE(subErr)) { 1444 *err = subErr; 1445 } 1446 } 1447 1448 if(reason == UCNV_CLOSE) { 1449 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial); 1450 ctx->wasClosed = TRUE; 1451 } 1452 } 1453 1454 static void TSCC_init(TSCCContext *q) 1455 { 1456 q->magic = 0xC0FFEE; 1457 q->serial = TSCC_nextSerial(); 1458 q->wasClosed = 0; 1459 } 1460 1461 static void TSCC_print_log(TSCCContext *q, const char *name) 1462 { 1463 if(q==NULL) { 1464 log_verbose("TSCContext: %s is NULL!!\n", name); 1465 } else { 1466 if(q->magic != 0xC0FFEE) { 1467 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n", 1468 q,q->serial, q->magic); 1469 } 1470 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n", 1471 q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open"); 1472 } 1473 } 1474 1475 static void TestConvertSafeCloneCallback() 1476 { 1477 UErrorCode err = U_ZERO_ERROR; 1478 TSCCContext from1, to1; 1479 TSCCContext *from2, *from3, *to2, *to3; 1480 TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3; 1481 char hunk[8192]; 1482 int32_t hunkSize = 8192; 1483 UConverterFromUCallback junkFrom; 1484 UConverterToUCallback junkTo; 1485 UConverter *conv1, *conv2 = NULL; 1486 1487 conv1 = ucnv_open("iso-8859-3", &err); 1488 1489 if(U_FAILURE(err)) { 1490 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err)); 1491 return; 1492 } 1493 1494 log_verbose("Opened conv1=%p\n", conv1); 1495 1496 TSCC_init(&from1); 1497 TSCC_init(&to1); 1498 1499 TSCC_print_log(&from1, "from1"); 1500 TSCC_print_log(&to1, "to1"); 1501 1502 ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err); 1503 log_verbose("Set from1 on conv1\n"); 1504 TSCC_print_log(&from1, "from1"); 1505 1506 ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err); 1507 log_verbose("Set to1 on conv1\n"); 1508 TSCC_print_log(&to1, "to1"); 1509 1510 conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err); 1511 if(U_FAILURE(err)) { 1512 log_err("safeClone failed: %s\n", u_errorName(err)); 1513 return; 1514 } 1515 log_verbose("Cloned to conv2=%p.\n", conv2); 1516 1517 /********** from *********************/ 1518 ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2); 1519 ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3); 1520 1521 TSCC_print_log(from2, "from2"); 1522 TSCC_print_log(from3, "from3(==from1)"); 1523 1524 if(from2 == NULL) { 1525 log_err("FAIL! from2 is null \n"); 1526 return; 1527 } 1528 1529 if(from3 == NULL) { 1530 log_err("FAIL! from3 is null \n"); 1531 return; 1532 } 1533 1534 if(from3 != (&from1) ) { 1535 log_err("FAIL! conv1's FROM context changed!\n"); 1536 } 1537 1538 if(from2 == (&from1) ) { 1539 log_err("FAIL! conv1's FROM context is the same as conv2's!\n"); 1540 } 1541 1542 if(from1.wasClosed) { 1543 log_err("FAIL! from1 is closed \n"); 1544 } 1545 1546 if(from2->wasClosed) { 1547 log_err("FAIL! from2 was closed\n"); 1548 } 1549 1550 /********** to *********************/ 1551 ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2); 1552 ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3); 1553 1554 TSCC_print_log(to2, "to2"); 1555 TSCC_print_log(to3, "to3(==to1)"); 1556 1557 if(to2 == NULL) { 1558 log_err("FAIL! to2 is null \n"); 1559 return; 1560 } 1561 1562 if(to3 == NULL) { 1563 log_err("FAIL! to3 is null \n"); 1564 return; 1565 } 1566 1567 if(to3 != (&to1) ) { 1568 log_err("FAIL! conv1's TO context changed!\n"); 1569 } 1570 1571 if(to2 == (&to1) ) { 1572 log_err("FAIL! conv1's TO context is the same as conv2's!\n"); 1573 } 1574 1575 if(to1.wasClosed) { 1576 log_err("FAIL! to1 is closed \n"); 1577 } 1578 1579 if(to2->wasClosed) { 1580 log_err("FAIL! to2 was closed\n"); 1581 } 1582 1583 /*************************************/ 1584 1585 ucnv_close(conv1); 1586 log_verbose("ucnv_closed (conv1)\n"); 1587 TSCC_print_log(&from1, "from1"); 1588 TSCC_print_log(from2, "from2"); 1589 TSCC_print_log(&to1, "to1"); 1590 TSCC_print_log(to2, "to2"); 1591 1592 if(from1.wasClosed == FALSE) { 1593 log_err("FAIL! from1 is NOT closed \n"); 1594 } 1595 1596 if(from2->wasClosed) { 1597 log_err("FAIL! from2 was closed\n"); 1598 } 1599 1600 if(to1.wasClosed == FALSE) { 1601 log_err("FAIL! to1 is NOT closed \n"); 1602 } 1603 1604 if(to2->wasClosed) { 1605 log_err("FAIL! to2 was closed\n"); 1606 } 1607 1608 ucnv_close(conv2); 1609 log_verbose("ucnv_closed (conv2)\n"); 1610 1611 TSCC_print_log(&from1, "from1"); 1612 TSCC_print_log(from2, "from2"); 1613 1614 if(from1.wasClosed == FALSE) { 1615 log_err("FAIL! from1 is NOT closed \n"); 1616 } 1617 1618 if(from2->wasClosed == FALSE) { 1619 log_err("FAIL! from2 was NOT closed\n"); 1620 } 1621 1622 TSCC_print_log(&to1, "to1"); 1623 TSCC_print_log(to2, "to2"); 1624 1625 if(to1.wasClosed == FALSE) { 1626 log_err("FAIL! to1 is NOT closed \n"); 1627 } 1628 1629 if(to2->wasClosed == FALSE) { 1630 log_err("FAIL! to2 was NOT closed\n"); 1631 } 1632 1633 if(to2 != (&to1)) { 1634 free(to2); /* to1 is stack based */ 1635 } 1636 if(from2 != (&from1)) { 1637 free(from2); /* from1 is stack based */ 1638 } 1639 } 1640 #endif 1641 1642 static UBool 1643 containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) { 1644 while(length>0) { 1645 if(*p!=b) { 1646 return TRUE; 1647 } 1648 ++p; 1649 --length; 1650 } 1651 return FALSE; 1652 } 1653 1654 static void TestConvertSafeClone() 1655 { 1656 /* one 'regular' & all the 'private stateful' converters */ 1657 static const char *const names[] = { 1658 #if !UCONFIG_NO_LEGACY_CONVERSION 1659 "ibm-1047", 1660 "ISO_2022,locale=zh,version=1", 1661 #endif 1662 "SCSU", 1663 #if !UCONFIG_NO_LEGACY_CONVERSION 1664 "HZ", 1665 "lmbcs", 1666 "ISCII,version=0", 1667 "ISO_2022,locale=kr,version=1", 1668 "ISO_2022,locale=jp,version=2", 1669 #endif 1670 "BOCU-1", 1671 "UTF-7", 1672 #if !UCONFIG_NO_LEGACY_CONVERSION 1673 "IMAP-mailbox-name", 1674 "ibm-1047-s390" 1675 #else 1676 "IMAP=mailbox-name" 1677 #endif 1678 }; 1679 1680 /* store the actual sizes of each converter */ 1681 int32_t actualSizes[UPRV_LENGTHOF(names)]; 1682 1683 static const int32_t bufferSizes[] = { 1684 U_CNV_SAFECLONE_BUFFERSIZE, 1685 (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */ 1686 (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */ 1687 }; 1688 1689 char charBuffer[21]; /* Leave at an odd number for alignment testing */ 1690 uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE]; 1691 int32_t bufferSize, maxBufferSize; 1692 const char *maxName; 1693 UConverter * cnv, *cnv2; 1694 UErrorCode err; 1695 1696 char *pCharBuffer; 1697 const char *pConstCharBuffer; 1698 const char *charBufferLimit = charBuffer + UPRV_LENGTHOF(charBuffer); 1699 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */ 1700 UChar uniCharBuffer[20]; 1701 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 }; 1702 const char *pCharSource = charSourceBuffer; 1703 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer); 1704 UChar *pUCharTarget = uniCharBuffer; 1705 UChar *pUCharTargetLimit = uniCharBuffer + UPRV_LENGTHOF(uniCharBuffer); 1706 const UChar * pUniBuffer; 1707 const UChar *uniBufferLimit = uniBuffer + UPRV_LENGTHOF(uniBuffer); 1708 int32_t idx, j; 1709 1710 err = U_ZERO_ERROR; 1711 cnv = ucnv_open(names[0], &err); 1712 if(U_SUCCESS(err)) { 1713 /* Check the various error & informational states: */ 1714 1715 /* Null status - just returns NULL */ 1716 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1717 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, NULL)) 1718 { 1719 log_err("FAIL: Cloned converter failed to deal correctly with null status\n"); 1720 } 1721 /* error status - should return 0 & keep error the same */ 1722 err = U_MEMORY_ALLOCATION_ERROR; 1723 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR) 1724 { 1725 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n"); 1726 } 1727 err = U_ZERO_ERROR; 1728 1729 /* Null buffer size pointer is ok */ 1730 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], NULL, &err)) || U_FAILURE(err)) 1731 { 1732 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n"); 1733 } 1734 ucnv_close(cnv2); 1735 err = U_ZERO_ERROR; 1736 1737 /* buffer size pointer is 0 - fill in pbufferSize with a size */ 1738 bufferSize = 0; 1739 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0) 1740 { 1741 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n"); 1742 } 1743 /* Verify our define is large enough */ 1744 if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize) 1745 { 1746 log_err("FAIL: Pre-calculated buffer size is too small\n"); 1747 } 1748 /* Verify we can use this run-time calculated size */ 1749 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err)) 1750 { 1751 log_err("FAIL: Converter can't be cloned with run-time size\n"); 1752 } 1753 if (cnv2) { 1754 ucnv_close(cnv2); 1755 } 1756 1757 /* size one byte too small - should allocate & let us know */ 1758 --bufferSize; 1759 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1760 { 1761 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n"); 1762 } 1763 if (cnv2) { 1764 ucnv_close(cnv2); 1765 } 1766 1767 err = U_ZERO_ERROR; 1768 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE; 1769 1770 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */ 1771 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING) 1772 { 1773 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n"); 1774 } 1775 if (cnv2) { 1776 ucnv_close(cnv2); 1777 } 1778 1779 err = U_ZERO_ERROR; 1780 1781 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ 1782 if (NULL != ucnv_safeClone(NULL, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR) 1783 { 1784 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n"); 1785 } 1786 1787 ucnv_close(cnv); 1788 } 1789 1790 maxBufferSize = 0; 1791 maxName = ""; 1792 1793 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/ 1794 1795 for(j = 0; j < UPRV_LENGTHOF(bufferSizes); ++j) { 1796 for (idx = 0; idx < UPRV_LENGTHOF(names); idx++) 1797 { 1798 err = U_ZERO_ERROR; 1799 cnv = ucnv_open(names[idx], &err); 1800 if(U_FAILURE(err)) { 1801 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err)); 1802 continue; 1803 } 1804 1805 if(j == 0) { 1806 /* preflight to get maxBufferSize */ 1807 actualSizes[idx] = 0; 1808 ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err); 1809 if(actualSizes[idx] > maxBufferSize) { 1810 maxBufferSize = actualSizes[idx]; 1811 maxName = names[idx]; 1812 } 1813 } 1814 1815 memset(buffer, 0xaa, sizeof(buffer)); 1816 1817 bufferSize = bufferSizes[j]; 1818 cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err); 1819 1820 /* close the original immediately to make sure that the clone works by itself */ 1821 ucnv_close(cnv); 1822 1823 if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)sizeof(UAlignedMemory)) && 1824 err == U_SAFECLONE_ALLOCATED_WARNING 1825 ) { 1826 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]); 1827 } 1828 1829 /* check if the clone function overwrote any bytes that it is not supposed to touch */ 1830 if(bufferSize <= bufferSizes[j]) { 1831 /* used the stack buffer */ 1832 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) || 1833 containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa) 1834 ) { 1835 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n", 1836 names[idx], bufferSize, bufferSizes[j]); 1837 } 1838 } else { 1839 /* heap-allocated the clone */ 1840 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) { 1841 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n", 1842 names[idx], bufferSize, bufferSizes[j]); 1843 } 1844 } 1845 1846 pCharBuffer = charBuffer; 1847 pUniBuffer = uniBuffer; 1848 1849 ucnv_fromUnicode(cnv2, 1850 &pCharBuffer, 1851 charBufferLimit, 1852 &pUniBuffer, 1853 uniBufferLimit, 1854 NULL, 1855 TRUE, 1856 &err); 1857 if(U_FAILURE(err)){ 1858 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err)); 1859 } 1860 ucnv_toUnicode(cnv2, 1861 &pUCharTarget, 1862 pUCharTargetLimit, 1863 &pCharSource, 1864 pCharSourceLimit, 1865 NULL, 1866 TRUE, 1867 &err 1868 ); 1869 1870 if(U_FAILURE(err)){ 1871 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err)); 1872 } 1873 1874 pConstCharBuffer = charBuffer; 1875 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err)) 1876 { 1877 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err)); 1878 } 1879 ucnv_close(cnv2); 1880 } 1881 } 1882 1883 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1884 sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1885 if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) { 1886 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n", 1887 maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE); 1888 } 1889 } 1890 1891 static void TestCCSID() { 1892 #if !UCONFIG_NO_LEGACY_CONVERSION 1893 UConverter *cnv; 1894 UErrorCode errorCode; 1895 int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 }; 1896 int32_t i, ccsid; 1897 1898 for(i=0; i<UPRV_LENGTHOF(ccsids); ++i) { 1899 ccsid=ccsids[i]; 1900 1901 errorCode=U_ZERO_ERROR; 1902 cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode); 1903 if(U_FAILURE(errorCode)) { 1904 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode)); 1905 continue; 1906 } 1907 1908 if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) { 1909 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode)); 1910 } 1911 1912 /* skip gb18030(ccsid 1392) */ 1913 if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) { 1914 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode)); 1915 } 1916 1917 ucnv_close(cnv); 1918 } 1919 #endif 1920 } 1921 1922 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */ 1923 1924 /* CHUNK_SIZE defined in common\ucnv.c: */ 1925 #define CHUNK_SIZE 1024 1926 1927 static void bug1(void); 1928 static void bug2(void); 1929 static void bug3(void); 1930 1931 static void 1932 TestJ932(void) 1933 { 1934 bug1(); /* Unicode intermediate buffer straddle bug */ 1935 bug2(); /* pre-flighting size incorrect caused by simple overflow */ 1936 bug3(); /* pre-flighting size incorrect caused by expansion overflow */ 1937 } 1938 1939 /* 1940 * jitterbug 932: test chunking boundary conditions in 1941 1942 int32_t ucnv_convert(const char *toConverterName, 1943 const char *fromConverterName, 1944 char *target, 1945 int32_t targetSize, 1946 const char *source, 1947 int32_t sourceSize, 1948 UErrorCode * err) 1949 1950 * See discussions on the icu mailing list in 1951 * 2001-April with the subject "converter 'flush' question". 1952 * 1953 * Bug report and test code provided by Edward J. Batutis. 1954 */ 1955 static void bug1() 1956 { 1957 #if !UCONFIG_NO_LEGACY_CONVERSION 1958 char char_in[CHUNK_SIZE+32]; 1959 char char_out[CHUNK_SIZE*2]; 1960 1961 /* GB 18030 equivalent of U+10000 is 90308130 */ 1962 static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 }; 1963 1964 UErrorCode err = U_ZERO_ERROR; 1965 int32_t i, test_seq_len = sizeof(test_seq); 1966 1967 /* 1968 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward 1969 * until the straddle bug appears. I didn't want to hard-code everything so this test could 1970 * be expanded - however this is the only type of straddle bug I can think of at the moment - 1971 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no 1972 * other Unicode sequences cause a bug since combining sequences are not supported by the 1973 * converters. 1974 */ 1975 1976 for (i = test_seq_len; i >= 0; i--) { 1977 /* put character sequence into input buffer */ 1978 memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */ 1979 memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len); 1980 1981 /* do the conversion */ 1982 ucnv_convert("us-ascii", /* out */ 1983 "gb18030", /* in */ 1984 char_out, 1985 sizeof(char_out), 1986 char_in, 1987 sizeof(char_in), 1988 &err); 1989 1990 /* bug1: */ 1991 if (err == U_TRUNCATED_CHAR_FOUND) { 1992 /* this happens when surrogate pair straddles the intermediate buffer in 1993 * T_UConverter_fromCodepageToCodepage */ 1994 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n"); 1995 } 1996 } 1997 #endif 1998 } 1999 2000 /* bug2: pre-flighting loop bug: simple overflow causes bug */ 2001 static void bug2() 2002 { 2003 /* US-ASCII "1234567890" */ 2004 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 }; 2005 #if !UCONFIG_ONLY_HTML_CONVERSION 2006 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 }; 2007 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30, 2008 0x00, 0x00, 0x00, 0x31, 2009 0x00, 0x00, 0x00, 0x32, 2010 0x00, 0x00, 0x00, 0x33, 2011 0x00, 0x00, 0x00, 0x34, 2012 0x00, 0x00, 0x00, 0x35, 2013 0x00, 0x00, 0x00, 0x36, 2014 0x00, 0x00, 0x00, 0x37, 2015 0x00, 0x00, 0x00, 0x38, 2016 0x00, 0x00, (char)0xf0, 0x00}; 2017 #endif 2018 2019 static char target[5]; 2020 2021 UErrorCode err = U_ZERO_ERROR; 2022 int32_t size; 2023 2024 /* do the conversion */ 2025 size = ucnv_convert("iso-8859-1", /* out */ 2026 "us-ascii", /* in */ 2027 target, 2028 sizeof(target), 2029 source, 2030 sizeof(source), 2031 &err); 2032 2033 if ( size != 10 ) { 2034 /* bug2: size is 5, should be 10 */ 2035 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size); 2036 } 2037 2038 #if !UCONFIG_ONLY_HTML_CONVERSION 2039 err = U_ZERO_ERROR; 2040 /* do the conversion */ 2041 size = ucnv_convert("UTF-32BE", /* out */ 2042 "UTF-8", /* in */ 2043 target, 2044 sizeof(target), 2045 sourceUTF8, 2046 sizeof(sourceUTF8), 2047 &err); 2048 2049 if ( size != 32 ) { 2050 /* bug2: size is 5, should be 32 */ 2051 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size); 2052 } 2053 2054 err = U_ZERO_ERROR; 2055 /* do the conversion */ 2056 size = ucnv_convert("UTF-8", /* out */ 2057 "UTF-32BE", /* in */ 2058 target, 2059 sizeof(target), 2060 sourceUTF32, 2061 sizeof(sourceUTF32), 2062 &err); 2063 2064 if ( size != 12 ) { 2065 /* bug2: size is 5, should be 12 */ 2066 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size); 2067 } 2068 #endif 2069 } 2070 2071 /* 2072 * bug3: when the characters expand going from source to target codepage 2073 * you get bug3 in addition to bug2 2074 */ 2075 static void bug3() 2076 { 2077 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 2078 char char_in[CHUNK_SIZE*4]; 2079 char target[5]; 2080 UErrorCode err = U_ZERO_ERROR; 2081 int32_t size; 2082 2083 /* 2084 * first get the buggy size from bug2 then 2085 * compare it to buggy size with an expansion 2086 */ 2087 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */ 2088 2089 /* do the conversion */ 2090 size = ucnv_convert("lmbcs", /* out */ 2091 "us-ascii", /* in */ 2092 target, 2093 sizeof(target), 2094 char_in, 2095 sizeof(char_in), 2096 &err); 2097 2098 if ( size != sizeof(char_in) ) { 2099 /* 2100 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer 2101 * in the converter?), should be CHUNK_SIZE*4 2102 * 2103 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize... 2104 */ 2105 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size); 2106 } 2107 2108 /* 2109 * now do the conversion with expansion 2110 * ascii 0x08 expands to 0x0F 0x28 in lmbcs 2111 */ 2112 memset(char_in, 8, sizeof(char_in)); 2113 err = U_ZERO_ERROR; 2114 2115 /* do the conversion */ 2116 size = ucnv_convert("lmbcs", /* out */ 2117 "us-ascii", /* in */ 2118 target, 2119 sizeof(target), 2120 char_in, 2121 sizeof(char_in), 2122 &err); 2123 2124 /* expect 2X expansion */ 2125 if ( size != sizeof(char_in) * 2 ) { 2126 /* 2127 * bug3: 2128 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05: 2129 */ 2130 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size); 2131 } 2132 #endif 2133 } 2134 2135 static void 2136 convertExStreaming(UConverter *srcCnv, UConverter *targetCnv, 2137 const char *src, int32_t srcLength, 2138 const char *expectTarget, int32_t expectTargetLength, 2139 int32_t chunkSize, 2140 const char *testName, 2141 UErrorCode expectCode) { 2142 UChar pivotBuffer[CHUNK_SIZE]; 2143 UChar *pivotSource, *pivotTarget; 2144 const UChar *pivotLimit; 2145 2146 char targetBuffer[CHUNK_SIZE]; 2147 char *target; 2148 const char *srcLimit, *finalSrcLimit, *targetLimit; 2149 2150 int32_t targetLength; 2151 2152 UBool flush; 2153 2154 UErrorCode errorCode; 2155 2156 /* setup */ 2157 if(chunkSize>CHUNK_SIZE) { 2158 chunkSize=CHUNK_SIZE; 2159 } 2160 2161 pivotSource=pivotTarget=pivotBuffer; 2162 pivotLimit=pivotBuffer+chunkSize; 2163 2164 finalSrcLimit=src+srcLength; 2165 target=targetBuffer; 2166 targetLimit=targetBuffer+chunkSize; 2167 2168 ucnv_resetToUnicode(srcCnv); 2169 ucnv_resetFromUnicode(targetCnv); 2170 2171 errorCode=U_ZERO_ERROR; 2172 flush=FALSE; 2173 2174 /* convert, streaming-style (both converters and pivot keep state) */ 2175 for(;;) { 2176 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */ 2177 if(src+chunkSize<=finalSrcLimit) { 2178 srcLimit=src+chunkSize; 2179 } else { 2180 srcLimit=finalSrcLimit; 2181 } 2182 ucnv_convertEx(targetCnv, srcCnv, 2183 &target, targetLimit, 2184 &src, srcLimit, 2185 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit, 2186 FALSE, flush, &errorCode); 2187 targetLength=(int32_t)(target-targetBuffer); 2188 if(target>targetLimit) { 2189 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n", 2190 testName, chunkSize, target, targetLimit); 2191 break; /* TODO: major problem! */ 2192 } 2193 if(errorCode==U_BUFFER_OVERFLOW_ERROR) { 2194 /* continue converting another chunk */ 2195 errorCode=U_ZERO_ERROR; 2196 if(targetLength+chunkSize<=sizeof(targetBuffer)) { 2197 targetLimit=target+chunkSize; 2198 } else { 2199 targetLimit=targetBuffer+sizeof(targetBuffer); 2200 } 2201 } else if(U_FAILURE(errorCode)) { 2202 /* failure */ 2203 break; 2204 } else if(flush) { 2205 /* all done */ 2206 break; 2207 } else if(src==finalSrcLimit && pivotSource==pivotTarget) { 2208 /* all consumed, now flush without input (separate from conversion for testing) */ 2209 flush=TRUE; 2210 } 2211 } 2212 2213 if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) { 2214 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n", 2215 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode)); 2216 } else if(targetLength!=expectTargetLength) { 2217 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n", 2218 testName, chunkSize, targetLength, expectTargetLength); 2219 } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) { 2220 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n", 2221 testName, chunkSize); 2222 } 2223 } 2224 2225 static void 2226 convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv, 2227 const char *src, int32_t srcLength, 2228 const char *expectTarget, int32_t expectTargetLength, 2229 const char *testName, 2230 UErrorCode expectCode) { 2231 convertExStreaming(srcCnv, targetCnv, 2232 src, srcLength, 2233 expectTarget, expectTargetLength, 2234 1, testName, expectCode); 2235 convertExStreaming(srcCnv, targetCnv, 2236 src, srcLength, 2237 expectTarget, expectTargetLength, 2238 3, testName, expectCode); 2239 convertExStreaming(srcCnv, targetCnv, 2240 src, srcLength, 2241 expectTarget, expectTargetLength, 2242 7, testName, expectCode); 2243 } 2244 2245 static void TestConvertEx() { 2246 #if !UCONFIG_NO_LEGACY_CONVERSION 2247 static const uint8_t 2248 utf8[]={ 2249 /* 4e00 30a1 ff61 0410 */ 2250 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2251 }, 2252 shiftJIS[]={ 2253 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2254 }, 2255 errorTarget[]={ 2256 /* 2257 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2258 * SUB, SUB, 0x40, SUB, SUB, 0x40 2259 */ 2260 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40 2261 }; 2262 2263 char srcBuffer[100], targetBuffer[100]; 2264 2265 const char *src; 2266 char *target; 2267 2268 UChar pivotBuffer[100]; 2269 UChar *pivotSource, *pivotTarget; 2270 2271 UConverter *cnv1, *cnv2; 2272 UErrorCode errorCode; 2273 2274 errorCode=U_ZERO_ERROR; 2275 cnv1=ucnv_open("UTF-8", &errorCode); 2276 if(U_FAILURE(errorCode)) { 2277 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode)); 2278 return; 2279 } 2280 2281 cnv2=ucnv_open("Shift-JIS", &errorCode); 2282 if(U_FAILURE(errorCode)) { 2283 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2284 ucnv_close(cnv1); 2285 return; 2286 } 2287 2288 /* test ucnv_convertEx() with streaming conversion style */ 2289 convertExMultiStreaming(cnv1, cnv2, 2290 (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS), 2291 "UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2292 2293 convertExMultiStreaming(cnv2, cnv1, 2294 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), 2295 "Shift-JIS -> UTF-8", U_ZERO_ERROR); 2296 2297 /* U_ZERO_ERROR because by default the SUB callbacks are set */ 2298 convertExMultiStreaming(cnv1, cnv2, 2299 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget), 2300 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR); 2301 2302 /* test some simple conversions */ 2303 2304 /* NUL-terminated source and target */ 2305 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2306 memcpy(srcBuffer, utf8, sizeof(utf8)); 2307 srcBuffer[sizeof(utf8)]=0; 2308 src=srcBuffer; 2309 target=targetBuffer; 2310 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2311 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2312 if( errorCode!=U_ZERO_ERROR || 2313 target-targetBuffer!=sizeof(shiftJIS) || 2314 *target!=0 || 2315 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2316 ) { 2317 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n", 2318 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2319 } 2320 2321 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */ 2322 errorCode=U_AMBIGUOUS_ALIAS_WARNING; 2323 memset(targetBuffer, 0xff, sizeof(targetBuffer)); 2324 src=srcBuffer; 2325 target=targetBuffer; 2326 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL, 2327 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2328 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2329 target-targetBuffer!=sizeof(shiftJIS) || 2330 *target!=(char)0xff || 2331 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0 2332 ) { 2333 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n", 2334 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS)); 2335 } 2336 2337 /* bad arguments */ 2338 errorCode=U_MESSAGE_PARSE_ERROR; 2339 src=srcBuffer; 2340 target=targetBuffer; 2341 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2342 NULL, NULL, NULL, NULL, TRUE, TRUE, &errorCode); 2343 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2344 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2345 } 2346 2347 /* pivotLimit==pivotStart */ 2348 errorCode=U_ZERO_ERROR; 2349 pivotSource=pivotTarget=pivotBuffer; 2350 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2351 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, TRUE, TRUE, &errorCode); 2352 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2353 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode)); 2354 } 2355 2356 /* *pivotSource==NULL */ 2357 errorCode=U_ZERO_ERROR; 2358 pivotSource=NULL; 2359 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2360 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2361 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2362 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode)); 2363 } 2364 2365 /* *source==NULL */ 2366 errorCode=U_ZERO_ERROR; 2367 src=NULL; 2368 pivotSource=pivotBuffer; 2369 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2370 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, TRUE, &errorCode); 2371 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2372 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode)); 2373 } 2374 2375 /* streaming conversion without a pivot buffer */ 2376 errorCode=U_ZERO_ERROR; 2377 src=srcBuffer; 2378 pivotSource=pivotBuffer; 2379 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL, 2380 NULL, &pivotSource, &pivotTarget, pivotBuffer+1, TRUE, FALSE, &errorCode); 2381 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2382 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode)); 2383 } 2384 2385 ucnv_close(cnv1); 2386 ucnv_close(cnv2); 2387 #endif 2388 } 2389 2390 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */ 2391 static const char *const badUTF8[]={ 2392 /* trail byte */ 2393 "\x80", 2394 2395 /* truncated multi-byte sequences */ 2396 "\xd0", 2397 "\xe0", 2398 "\xe1", 2399 "\xed", 2400 "\xee", 2401 "\xf0", 2402 "\xf1", 2403 "\xf4", 2404 "\xf8", 2405 "\xfc", 2406 2407 "\xe0\x80", 2408 "\xe0\xa0", 2409 "\xe1\x80", 2410 "\xed\x80", 2411 "\xed\xa0", 2412 "\xee\x80", 2413 "\xf0\x80", 2414 "\xf0\x90", 2415 "\xf1\x80", 2416 "\xf4\x80", 2417 "\xf4\x90", 2418 "\xf8\x80", 2419 "\xfc\x80", 2420 2421 "\xf0\x80\x80", 2422 "\xf0\x90\x80", 2423 "\xf1\x80\x80", 2424 "\xf4\x80\x80", 2425 "\xf4\x90\x80", 2426 "\xf8\x80\x80", 2427 "\xfc\x80\x80", 2428 2429 "\xf8\x80\x80\x80", 2430 "\xfc\x80\x80\x80", 2431 2432 "\xfc\x80\x80\x80\x80", 2433 2434 /* complete sequences but non-shortest forms or out of range etc. */ 2435 "\xc0\x80", 2436 "\xe0\x80\x80", 2437 "\xed\xa0\x80", 2438 "\xf0\x80\x80\x80", 2439 "\xf4\x90\x80\x80", 2440 "\xf8\x80\x80\x80\x80", 2441 "\xfc\x80\x80\x80\x80\x80", 2442 "\xfe", 2443 "\xff" 2444 }; 2445 2446 #define ARG_CHAR_ARR_SIZE 8 2447 2448 /* get some character that can be converted and convert it */ 2449 static UBool getTestChar(UConverter *cnv, const char *converterName, 2450 char charUTF8[4], int32_t *pCharUTF8Length, 2451 char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length, 2452 char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) { 2453 UChar utf16[U16_MAX_LENGTH]; 2454 int32_t utf16Length; 2455 2456 const UChar *utf16Source; 2457 char *target; 2458 2459 USet *set; 2460 UChar32 c; 2461 UErrorCode errorCode; 2462 2463 errorCode=U_ZERO_ERROR; 2464 set=uset_open(1, 0); 2465 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode); 2466 c=uset_charAt(set, uset_size(set)/2); 2467 uset_close(set); 2468 2469 utf16Length=0; 2470 U16_APPEND_UNSAFE(utf16, utf16Length, c); 2471 *pCharUTF8Length=0; 2472 U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c); 2473 2474 utf16Source=utf16; 2475 target=char0; 2476 ucnv_fromUnicode(cnv, 2477 &target, char0+ARG_CHAR_ARR_SIZE, 2478 &utf16Source, utf16+utf16Length, 2479 NULL, FALSE, &errorCode); 2480 *pChar0Length=(int32_t)(target-char0); 2481 2482 utf16Source=utf16; 2483 target=char1; 2484 ucnv_fromUnicode(cnv, 2485 &target, char1+ARG_CHAR_ARR_SIZE, 2486 &utf16Source, utf16+utf16Length, 2487 NULL, FALSE, &errorCode); 2488 *pChar1Length=(int32_t)(target-char1); 2489 2490 if(U_FAILURE(errorCode)) { 2491 log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode)); 2492 return FALSE; 2493 } 2494 return TRUE; 2495 } 2496 2497 static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2498 char charUTF8[4], int32_t charUTF8Length, 2499 char char0[8], int32_t char0Length, 2500 char char1[8], int32_t char1Length) { 2501 char utf8[16]; 2502 int32_t utf8Length; 2503 2504 char output[16]; 2505 int32_t outputLength; 2506 2507 char invalidChars[8]; 2508 int8_t invalidLength; 2509 2510 const char *source; 2511 char *target; 2512 2513 UChar pivotBuffer[8]; 2514 UChar *pivotSource, *pivotTarget; 2515 2516 UErrorCode errorCode; 2517 int32_t i; 2518 2519 /* test truncated sequences */ 2520 errorCode=U_ZERO_ERROR; 2521 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode); 2522 2523 memcpy(utf8, charUTF8, charUTF8Length); 2524 2525 for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) { 2526 /* truncated sequence? */ 2527 int32_t length=strlen(badUTF8[i]); 2528 if(length>=(1+U8_COUNT_TRAIL_BYTES(badUTF8[i][0]))) { 2529 continue; 2530 } 2531 2532 /* assemble a string with the test character and the truncated sequence */ 2533 memcpy(utf8+charUTF8Length, badUTF8[i], length); 2534 utf8Length=charUTF8Length+length; 2535 2536 /* convert and check the invalidChars */ 2537 source=utf8; 2538 target=output; 2539 pivotSource=pivotTarget=pivotBuffer; 2540 errorCode=U_ZERO_ERROR; 2541 ucnv_convertEx(cnv, utf8Cnv, 2542 &target, output+sizeof(output), 2543 &source, utf8+utf8Length, 2544 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer), 2545 TRUE, TRUE, /* reset & flush */ 2546 &errorCode); 2547 outputLength=(int32_t)(target-output); 2548 (void)outputLength; /* Suppress set but not used warning. */ 2549 if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) { 2550 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i); 2551 continue; 2552 } 2553 2554 errorCode=U_ZERO_ERROR; 2555 invalidLength=(int8_t)sizeof(invalidChars); 2556 ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode); 2557 if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) { 2558 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i); 2559 } 2560 } 2561 } 2562 2563 static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName, 2564 char charUTF8[4], int32_t charUTF8Length, 2565 char char0[8], int32_t char0Length, 2566 char char1[8], int32_t char1Length) { 2567 char utf8[600], expect[600]; 2568 int32_t utf8Length, expectLength; 2569 2570 char testName[32]; 2571 2572 UErrorCode errorCode; 2573 int32_t i; 2574 2575 errorCode=U_ZERO_ERROR; 2576 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode); 2577 2578 /* 2579 * assemble an input string with the test character between each 2580 * bad sequence, 2581 * and an expected string with repeated test character output 2582 */ 2583 memcpy(utf8, charUTF8, charUTF8Length); 2584 utf8Length=charUTF8Length; 2585 2586 memcpy(expect, char0, char0Length); 2587 expectLength=char0Length; 2588 2589 for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) { 2590 int32_t length=strlen(badUTF8[i]); 2591 memcpy(utf8+utf8Length, badUTF8[i], length); 2592 utf8Length+=length; 2593 2594 memcpy(utf8+utf8Length, charUTF8, charUTF8Length); 2595 utf8Length+=charUTF8Length; 2596 2597 memcpy(expect+expectLength, char1, char1Length); 2598 expectLength+=char1Length; 2599 } 2600 2601 /* expect that each bad UTF-8 sequence is detected and skipped */ 2602 strcpy(testName, "from bad UTF-8 to "); 2603 strcat(testName, converterName); 2604 2605 convertExMultiStreaming(utf8Cnv, cnv, 2606 utf8, utf8Length, 2607 expect, expectLength, 2608 testName, 2609 U_ZERO_ERROR); 2610 } 2611 2612 /* Test illegal UTF-8 input. */ 2613 static void TestConvertExFromUTF8() { 2614 static const char *const converterNames[]={ 2615 #if !UCONFIG_NO_LEGACY_CONVERSION 2616 "windows-1252", 2617 "shift-jis", 2618 #endif 2619 "us-ascii", 2620 "iso-8859-1", 2621 "utf-8" 2622 }; 2623 2624 UConverter *utf8Cnv, *cnv; 2625 UErrorCode errorCode; 2626 int32_t i; 2627 2628 /* fromUnicode versions of some character, from initial state and later */ 2629 char charUTF8[4], char0[8], char1[8]; 2630 int32_t charUTF8Length, char0Length, char1Length; 2631 2632 errorCode=U_ZERO_ERROR; 2633 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2634 if(U_FAILURE(errorCode)) { 2635 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2636 return; 2637 } 2638 2639 for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) { 2640 errorCode=U_ZERO_ERROR; 2641 cnv=ucnv_open(converterNames[i], &errorCode); 2642 if(U_FAILURE(errorCode)) { 2643 log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode)); 2644 continue; 2645 } 2646 if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) { 2647 continue; 2648 } 2649 testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2650 testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length); 2651 ucnv_close(cnv); 2652 } 2653 ucnv_close(utf8Cnv); 2654 } 2655 2656 static void TestConvertExFromUTF8_C5F0() { 2657 static const char *const converterNames[]={ 2658 #if !UCONFIG_NO_LEGACY_CONVERSION 2659 "windows-1251", 2660 "shift-jis", 2661 #endif 2662 "us-ascii", 2663 "iso-8859-1", 2664 "utf-8" 2665 }; 2666 2667 UConverter *utf8Cnv, *cnv; 2668 UErrorCode errorCode; 2669 int32_t i; 2670 2671 static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 }; 2672 /* Expect "��" (2x U+FFFD as decimal NCRs) */ 2673 static const char twoNCRs[16]={ 2674 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B, 2675 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B 2676 }; 2677 static const char twoFFFD[6]={ 2678 (char)0xef, (char)0xbf, (char)0xbd, 2679 (char)0xef, (char)0xbf, (char)0xbd 2680 }; 2681 const char *expected; 2682 int32_t expectedLength; 2683 char dest[20]; /* longer than longest expectedLength */ 2684 2685 const char *src; 2686 char *target; 2687 2688 UChar pivotBuffer[128]; 2689 UChar *pivotSource, *pivotTarget; 2690 2691 errorCode=U_ZERO_ERROR; 2692 utf8Cnv=ucnv_open("UTF-8", &errorCode); 2693 if(U_FAILURE(errorCode)) { 2694 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode)); 2695 return; 2696 } 2697 2698 for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) { 2699 errorCode=U_ZERO_ERROR; 2700 cnv=ucnv_open(converterNames[i], &errorCode); 2701 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC, 2702 NULL, NULL, &errorCode); 2703 if(U_FAILURE(errorCode)) { 2704 log_data_err("unable to open %s converter - %s\n", 2705 converterNames[i], u_errorName(errorCode)); 2706 continue; 2707 } 2708 src=bad_utf8; 2709 target=dest; 2710 uprv_memset(dest, 9, sizeof(dest)); 2711 if(i==UPRV_LENGTHOF(converterNames)-1) { 2712 /* conversion to UTF-8 yields two U+FFFD directly */ 2713 expected=twoFFFD; 2714 expectedLength=6; 2715 } else { 2716 /* conversion to a non-Unicode charset yields two NCRs */ 2717 expected=twoNCRs; 2718 expectedLength=16; 2719 } 2720 pivotBuffer[0]=0; 2721 pivotBuffer[1]=1; 2722 pivotBuffer[2]=2; 2723 pivotSource=pivotTarget=pivotBuffer; 2724 ucnv_convertEx( 2725 cnv, utf8Cnv, 2726 &target, dest+expectedLength, 2727 &src, bad_utf8+sizeof(bad_utf8), 2728 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer), 2729 TRUE, TRUE, &errorCode); 2730 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 || 2731 target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) || 2732 dest[expectedLength]!=9 2733 ) { 2734 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]); 2735 } 2736 ucnv_close(cnv); 2737 } 2738 ucnv_close(utf8Cnv); 2739 } 2740 2741 static void 2742 TestConvertAlgorithmic() { 2743 #if !UCONFIG_NO_LEGACY_CONVERSION 2744 static const uint8_t 2745 utf8[]={ 2746 /* 4e00 30a1 ff61 0410 */ 2747 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90 2748 }, 2749 shiftJIS[]={ 2750 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40 2751 }, 2752 /*errorTarget[]={*/ 2753 /* 2754 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS: 2755 * SUB, SUB, 0x40, SUB, SUB, 0x40 2756 */ 2757 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/ 2758 /*},*/ 2759 utf16[]={ 2760 0xfe, 0xff /* BOM only, no text */ 2761 }; 2762 #if !UCONFIG_ONLY_HTML_CONVERSION 2763 static const uint8_t utf32[]={ 2764 0xff, 0xfe, 0, 0 /* BOM only, no text */ 2765 }; 2766 #endif 2767 2768 char target[100], utf8NUL[100], shiftJISNUL[100]; 2769 2770 UConverter *cnv; 2771 UErrorCode errorCode; 2772 2773 int32_t length; 2774 2775 errorCode=U_ZERO_ERROR; 2776 cnv=ucnv_open("Shift-JIS", &errorCode); 2777 if(U_FAILURE(errorCode)) { 2778 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode)); 2779 ucnv_close(cnv); 2780 return; 2781 } 2782 2783 memcpy(utf8NUL, utf8, sizeof(utf8)); 2784 utf8NUL[sizeof(utf8)]=0; 2785 memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS)); 2786 shiftJISNUL[sizeof(shiftJIS)]=0; 2787 2788 /* 2789 * The to/from algorithmic convenience functions share a common implementation, 2790 * so we need not test all permutations of them. 2791 */ 2792 2793 /* length in, not terminated out */ 2794 errorCode=U_ZERO_ERROR; 2795 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode); 2796 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2797 length!=sizeof(shiftJIS) || 2798 memcmp(target, shiftJIS, length)!=0 2799 ) { 2800 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n", 2801 u_errorName(errorCode), length, sizeof(shiftJIS)); 2802 } 2803 2804 /* terminated in and out */ 2805 memset(target, 0x55, sizeof(target)); 2806 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2807 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode); 2808 if( errorCode!=U_ZERO_ERROR || 2809 length!=sizeof(utf8) || 2810 memcmp(target, utf8, length)!=0 2811 ) { 2812 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n", 2813 u_errorName(errorCode), length, sizeof(shiftJIS)); 2814 } 2815 2816 /* empty string, some target buffer */ 2817 errorCode=U_STRING_NOT_TERMINATED_WARNING; 2818 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode); 2819 if( errorCode!=U_ZERO_ERROR || 2820 length!=0 2821 ) { 2822 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n", 2823 u_errorName(errorCode), length); 2824 } 2825 2826 /* pseudo-empty string, no target buffer */ 2827 errorCode=U_ZERO_ERROR; 2828 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2829 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2830 length!=0 2831 ) { 2832 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2833 u_errorName(errorCode), length); 2834 } 2835 2836 #if !UCONFIG_ONLY_HTML_CONVERSION 2837 errorCode=U_ZERO_ERROR; 2838 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode); 2839 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || 2840 length!=0 2841 ) { 2842 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n", 2843 u_errorName(errorCode), length); 2844 } 2845 #endif 2846 2847 /* bad arguments */ 2848 errorCode=U_MESSAGE_PARSE_ERROR; 2849 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode); 2850 if(errorCode!=U_MESSAGE_PARSE_ERROR) { 2851 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode)); 2852 } 2853 2854 /* source==NULL */ 2855 errorCode=U_ZERO_ERROR; 2856 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode); 2857 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2858 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode)); 2859 } 2860 2861 /* illegal alg. type */ 2862 errorCode=U_ZERO_ERROR; 2863 length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode); 2864 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) { 2865 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode)); 2866 } 2867 ucnv_close(cnv); 2868 #endif 2869 } 2870 2871 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION 2872 static void TestLMBCSMaxChar(void) { 2873 static const struct { 2874 int8_t maxSize; 2875 const char *name; 2876 } converter[] = { 2877 /* some non-LMBCS converters - perfect test setup here */ 2878 { 1, "US-ASCII"}, 2879 { 1, "ISO-8859-1"}, 2880 2881 { 2, "UTF-16"}, 2882 { 2, "UTF-16BE"}, 2883 { 3, "UTF-8"}, 2884 { 3, "CESU-8"}, 2885 { 3, "SCSU"}, 2886 { 4, "UTF-32"}, 2887 { 4, "UTF-7"}, 2888 { 4, "IMAP-mailbox-name"}, 2889 { 4, "BOCU-1"}, 2890 2891 { 1, "windows-1256"}, 2892 { 2, "Shift-JIS"}, 2893 { 2, "ibm-16684"}, 2894 { 3, "ibm-930"}, 2895 { 3, "ibm-1390"}, 2896 { 4, "*test3"}, 2897 { 16,"*test4"}, 2898 2899 { 4, "ISCII"}, 2900 { 4, "HZ"}, 2901 2902 { 3, "ISO-2022"}, 2903 { 8, "ISO-2022-KR"}, 2904 { 6, "ISO-2022-JP"}, 2905 { 8, "ISO-2022-CN"}, 2906 2907 /* LMBCS */ 2908 { 3, "LMBCS-1"}, 2909 { 3, "LMBCS-2"}, 2910 { 3, "LMBCS-3"}, 2911 { 3, "LMBCS-4"}, 2912 { 3, "LMBCS-5"}, 2913 { 3, "LMBCS-6"}, 2914 { 3, "LMBCS-8"}, 2915 { 3, "LMBCS-11"}, 2916 { 3, "LMBCS-16"}, 2917 { 3, "LMBCS-17"}, 2918 { 3, "LMBCS-18"}, 2919 { 3, "LMBCS-19"} 2920 }; 2921 int32_t idx; 2922 2923 for (idx = 0; idx < UPRV_LENGTHOF(converter); idx++) { 2924 UErrorCode status = U_ZERO_ERROR; 2925 UConverter *cnv = cnv_open(converter[idx].name, &status); 2926 if (U_FAILURE(status)) { 2927 continue; 2928 } 2929 if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) { 2930 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n", 2931 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv)); 2932 } 2933 ucnv_close(cnv); 2934 } 2935 2936 /* mostly test that the macro compiles */ 2937 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) { 2938 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n"); 2939 } 2940 } 2941 #endif 2942 2943 static void TestJ1968(void) { 2944 UErrorCode err = U_ZERO_ERROR; 2945 UConverter *cnv; 2946 char myConvName[] = "My really really really really really really really really really really really" 2947 " really really really really really really really really really really really" 2948 " really really really really really really really really long converter name"; 2949 UChar myConvNameU[sizeof(myConvName)]; 2950 2951 u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName)); 2952 2953 err = U_ZERO_ERROR; 2954 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0; 2955 cnv = ucnv_openU(myConvNameU, &err); 2956 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2957 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2958 } 2959 2960 err = U_ZERO_ERROR; 2961 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 2962 cnv = ucnv_openU(myConvNameU, &err); 2963 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2964 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2965 } 2966 2967 err = U_ZERO_ERROR; 2968 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 2969 cnv = ucnv_openU(myConvNameU, &err); 2970 if (cnv || err != U_FILE_ACCESS_ERROR) { 2971 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2972 } 2973 2974 2975 2976 2977 err = U_ZERO_ERROR; 2978 cnv = ucnv_open(myConvName, &err); 2979 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2980 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2981 } 2982 2983 err = U_ZERO_ERROR; 2984 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ','; 2985 cnv = ucnv_open(myConvName, &err); 2986 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 2987 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 2988 } 2989 2990 err = U_ZERO_ERROR; 2991 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 2992 cnv = ucnv_open(myConvName, &err); 2993 if (cnv || err != U_FILE_ACCESS_ERROR) { 2994 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 2995 } 2996 2997 err = U_ZERO_ERROR; 2998 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ','; 2999 strncpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7); 3000 cnv = ucnv_open(myConvName, &err); 3001 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3002 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3003 } 3004 3005 /* The comma isn't really a part of the converter name. */ 3006 err = U_ZERO_ERROR; 3007 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0; 3008 cnv = ucnv_open(myConvName, &err); 3009 if (cnv || err != U_FILE_ACCESS_ERROR) { 3010 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3011 } 3012 3013 err = U_ZERO_ERROR; 3014 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' '; 3015 cnv = ucnv_open(myConvName, &err); 3016 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) { 3017 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err)); 3018 } 3019 3020 err = U_ZERO_ERROR; 3021 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0; 3022 cnv = ucnv_open(myConvName, &err); 3023 if (cnv || err != U_FILE_ACCESS_ERROR) { 3024 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err)); 3025 } 3026 3027 } 3028 3029 #if !UCONFIG_NO_LEGACY_CONVERSION 3030 static void 3031 testSwap(const char *name, UBool swap) { 3032 /* 3033 * Test Unicode text. 3034 * Contains characters that are the highest for some of the 3035 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the 3036 * tables copies the entire tables. 3037 */ 3038 static const UChar text[]={ 3039 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a 3040 }; 3041 3042 UChar uNormal[32], uSwapped[32]; 3043 char normal[32], swapped[32]; 3044 const UChar *pcu; 3045 UChar *pu; 3046 char *pc; 3047 int32_t i, normalLength, swappedLength; 3048 UChar u; 3049 char c; 3050 3051 const char *swappedName; 3052 UConverter *cnv, *swapCnv; 3053 UErrorCode errorCode; 3054 3055 /* if the swap flag is FALSE, then the test encoding is not EBCDIC and must not swap */ 3056 3057 /* open both the normal and the LF/NL-swapping converters */ 3058 strcpy(swapped, name); 3059 strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING); 3060 3061 errorCode=U_ZERO_ERROR; 3062 swapCnv=ucnv_open(swapped, &errorCode); 3063 cnv=ucnv_open(name, &errorCode); 3064 if(U_FAILURE(errorCode)) { 3065 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode)); 3066 goto cleanup; 3067 } 3068 3069 /* the name must contain the swap option if and only if we expect the converter to swap */ 3070 swappedName=ucnv_getName(swapCnv, &errorCode); 3071 if(U_FAILURE(errorCode)) { 3072 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode)); 3073 goto cleanup; 3074 } 3075 3076 pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING); 3077 if(swap != (pc!=NULL)) { 3078 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap); 3079 goto cleanup; 3080 } 3081 3082 /* convert to EBCDIC */ 3083 pcu=text; 3084 pc=normal; 3085 ucnv_fromUnicode(cnv, &pc, normal+UPRV_LENGTHOF(normal), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode); 3086 normalLength=(int32_t)(pc-normal); 3087 3088 pcu=text; 3089 pc=swapped; 3090 ucnv_fromUnicode(swapCnv, &pc, swapped+UPRV_LENGTHOF(swapped), &pcu, text+UPRV_LENGTHOF(text), NULL, TRUE, &errorCode); 3091 swappedLength=(int32_t)(pc-swapped); 3092 3093 if(U_FAILURE(errorCode)) { 3094 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode)); 3095 goto cleanup; 3096 } 3097 3098 /* compare EBCDIC output */ 3099 if(normalLength!=swappedLength) { 3100 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3101 goto cleanup; 3102 } 3103 for(i=0; i<normalLength; ++i) { 3104 /* swap EBCDIC LF/NL for comparison */ 3105 c=normal[i]; 3106 if(swap) { 3107 if(c==0x15) { 3108 c=0x25; 3109 } else if(c==0x25) { 3110 c=0x15; 3111 } 3112 } 3113 3114 if(c!=swapped[i]) { 3115 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]); 3116 goto cleanup; 3117 } 3118 } 3119 3120 /* convert back to Unicode (may not roundtrip) */ 3121 pc=normal; 3122 pu=uNormal; 3123 ucnv_toUnicode(cnv, &pu, uNormal+UPRV_LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, TRUE, &errorCode); 3124 normalLength=(int32_t)(pu-uNormal); 3125 3126 pc=normal; 3127 pu=uSwapped; 3128 ucnv_toUnicode(swapCnv, &pu, uSwapped+UPRV_LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, TRUE, &errorCode); 3129 swappedLength=(int32_t)(pu-uSwapped); 3130 3131 if(U_FAILURE(errorCode)) { 3132 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode)); 3133 goto cleanup; 3134 } 3135 3136 /* compare EBCDIC output */ 3137 if(normalLength!=swappedLength) { 3138 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength); 3139 goto cleanup; 3140 } 3141 for(i=0; i<normalLength; ++i) { 3142 /* swap EBCDIC LF/NL for comparison */ 3143 u=uNormal[i]; 3144 if(swap) { 3145 if(u==0xa) { 3146 u=0x85; 3147 } else if(u==0x85) { 3148 u=0xa; 3149 } 3150 } 3151 3152 if(u!=uSwapped[i]) { 3153 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]); 3154 goto cleanup; 3155 } 3156 } 3157 3158 /* clean up */ 3159 cleanup: 3160 ucnv_close(cnv); 3161 ucnv_close(swapCnv); 3162 } 3163 3164 static void 3165 TestEBCDICSwapLFNL() { 3166 static const struct { 3167 const char *name; 3168 UBool swap; 3169 } tests[]={ 3170 { "ibm-37", TRUE }, 3171 { "ibm-1047", TRUE }, 3172 { "ibm-1140", TRUE }, 3173 { "ibm-930", TRUE }, 3174 { "iso-8859-3", FALSE } 3175 }; 3176 3177 int i; 3178 3179 for(i=0; i<UPRV_LENGTHOF(tests); ++i) { 3180 testSwap(tests[i].name, tests[i].swap); 3181 } 3182 } 3183 #else 3184 static void 3185 TestEBCDICSwapLFNL() { 3186 /* test nothing... */ 3187 } 3188 #endif 3189 3190 static void TestFromUCountPending(){ 3191 #if !UCONFIG_NO_LEGACY_CONVERSION 3192 UErrorCode status = U_ZERO_ERROR; 3193 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */ 3194 static const struct { 3195 UChar input[6]; 3196 int32_t len; 3197 int32_t exp; 3198 }fromUnicodeTests[] = { 3199 /*m:n conversion*/ 3200 {{0xdbc4},1,1}, 3201 {{ 0xdbc4, 0xde34, 0xd84d},3,1}, 3202 {{ 0xdbc4, 0xde34, 0xd900},3,3}, 3203 }; 3204 int i; 3205 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3206 if(U_FAILURE(status)){ 3207 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3208 return; 3209 } 3210 for(i=0; i<UPRV_LENGTHOF(fromUnicodeTests); ++i) { 3211 char tgt[10]; 3212 char* target = tgt; 3213 char* targetLimit = target + 10; 3214 const UChar* source = fromUnicodeTests[i].input; 3215 const UChar* sourceLimit = source + fromUnicodeTests[i].len; 3216 int32_t len = 0; 3217 ucnv_reset(cnv); 3218 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3219 len = ucnv_fromUCountPending(cnv, &status); 3220 if(U_FAILURE(status)){ 3221 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3222 status = U_ZERO_ERROR; 3223 continue; 3224 } 3225 if(len != fromUnicodeTests[i].exp){ 3226 log_err("Did not get the expeced output for ucnv_fromUInputConsumed.\n"); 3227 } 3228 } 3229 status = U_ZERO_ERROR; 3230 { 3231 /* 3232 * The converter has to read the tail before it knows that 3233 * only head alone matches. 3234 * At the end, the output for head will overflow the target, 3235 * middle will be pending, and tail will not have been consumed. 3236 */ 3237 /* 3238 \U00101234 -> x (<U101234> \x07 |0) 3239 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0) 3240 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0) 3241 \U00060007 -> unassigned 3242 */ 3243 static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */ 3244 static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */ 3245 static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */ 3246 char tgt[10]; 3247 char* target = tgt; 3248 char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */ 3249 const UChar* source = head; 3250 const UChar* sourceLimit = source + u_strlen(head); 3251 int32_t len = 0; 3252 ucnv_reset(cnv); 3253 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3254 len = ucnv_fromUCountPending(cnv, &status); 3255 if(U_FAILURE(status)){ 3256 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3257 status = U_ZERO_ERROR; 3258 } 3259 if(len!=4){ 3260 log_err("ucnv_fromUInputHeld did not return correct length for head\n"); 3261 } 3262 source = middle; 3263 sourceLimit = source + u_strlen(middle); 3264 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3265 len = ucnv_fromUCountPending(cnv, &status); 3266 if(U_FAILURE(status)){ 3267 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3268 status = U_ZERO_ERROR; 3269 } 3270 if(len!=5){ 3271 log_err("ucnv_fromUInputHeld did not return correct length for middle\n"); 3272 } 3273 source = tail; 3274 sourceLimit = source + u_strlen(tail); 3275 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3276 if(status != U_BUFFER_OVERFLOW_ERROR){ 3277 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3278 } 3279 status = U_ZERO_ERROR; 3280 len = ucnv_fromUCountPending(cnv, &status); 3281 /* middle[1] is pending, tail has not been consumed */ 3282 if(U_FAILURE(status)){ 3283 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status)); 3284 } 3285 if(len!=1){ 3286 log_err("ucnv_fromUInputHeld did not return correct length for tail\n"); 3287 } 3288 } 3289 ucnv_close(cnv); 3290 #endif 3291 } 3292 3293 static void 3294 TestToUCountPending(){ 3295 #if !UCONFIG_NO_LEGACY_CONVERSION 3296 UErrorCode status = U_ZERO_ERROR; 3297 static const struct { 3298 char input[6]; 3299 int32_t len; 3300 int32_t exp; 3301 }toUnicodeTests[] = { 3302 /*m:n conversion*/ 3303 {{0x05, 0x01, 0x02},3,3}, 3304 {{0x01, 0x02},2,2}, 3305 {{0x07, 0x00, 0x01, 0x02},4,4}, 3306 }; 3307 3308 int i; 3309 UConverterToUCallback *oldToUAction= NULL; 3310 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status); 3311 if(U_FAILURE(status)){ 3312 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3313 return; 3314 } 3315 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3316 for(i=0; i<UPRV_LENGTHOF(toUnicodeTests); ++i) { 3317 UChar tgt[20]; 3318 UChar* target = tgt; 3319 UChar* targetLimit = target + 20; 3320 const char* source = toUnicodeTests[i].input; 3321 const char* sourceLimit = source + toUnicodeTests[i].len; 3322 int32_t len = 0; 3323 ucnv_reset(cnv); 3324 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3325 len = ucnv_toUCountPending(cnv,&status); 3326 if(U_FAILURE(status)){ 3327 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3328 status = U_ZERO_ERROR; 3329 continue; 3330 } 3331 if(len != toUnicodeTests[i].exp){ 3332 log_err("Did not get the expeced output for ucnv_toUInputConsumed.\n"); 3333 } 3334 } 3335 status = U_ZERO_ERROR; 3336 ucnv_close(cnv); 3337 3338 { 3339 /* 3340 * The converter has to read the tail before it knows that 3341 * only head alone matches. 3342 * At the end, the output for head will overflow the target, 3343 * mid will be pending, and tail will not have been consumed. 3344 */ 3345 char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00}; 3346 char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 }; 3347 char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 }; 3348 /* 3349 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0) 3350 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0) 3351 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3) 3352 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar") 3353 */ 3354 UChar tgt[10]; 3355 UChar* target = tgt; 3356 UChar* targetLimit = target + 1; /* expect overflow from converting */ 3357 const char* source = head; 3358 const char* sourceLimit = source + strlen(head); 3359 int32_t len = 0; 3360 cnv = ucnv_openPackage(loadTestData(&status), "test4", &status); 3361 if(U_FAILURE(status)){ 3362 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status)); 3363 return; 3364 } 3365 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status); 3366 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3367 len = ucnv_toUCountPending(cnv,&status); 3368 if(U_FAILURE(status)){ 3369 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3370 } 3371 if(len != 4){ 3372 log_err("Did not get the expected len for head.\n"); 3373 } 3374 source=mid; 3375 sourceLimit = source+strlen(mid); 3376 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3377 len = ucnv_toUCountPending(cnv,&status); 3378 if(U_FAILURE(status)){ 3379 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3380 } 3381 if(len != 8){ 3382 log_err("Did not get the expected len for mid.\n"); 3383 } 3384 3385 source=tail; 3386 sourceLimit = source+strlen(tail); 3387 targetLimit = target; 3388 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, FALSE, &status); 3389 if(status != U_BUFFER_OVERFLOW_ERROR){ 3390 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status)); 3391 } 3392 status = U_ZERO_ERROR; 3393 len = ucnv_toUCountPending(cnv,&status); 3394 /* mid[4] is pending, tail has not been consumed */ 3395 if(U_FAILURE(status)){ 3396 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status)); 3397 } 3398 if(len != 4){ 3399 log_err("Did not get the expected len for tail.\n"); 3400 } 3401 ucnv_close(cnv); 3402 } 3403 #endif 3404 } 3405 3406 static void TestOneDefaultNameChange(const char *name, const char *expected) { 3407 UErrorCode status = U_ZERO_ERROR; 3408 UConverter *cnv; 3409 ucnv_setDefaultName(name); 3410 if(strcmp(ucnv_getDefaultName(), expected)==0) 3411 log_verbose("setDefaultName of %s works.\n", name); 3412 else 3413 log_err("setDefaultName of %s failed\n", name); 3414 cnv=ucnv_open(NULL, &status); 3415 if (U_FAILURE(status) || cnv == NULL) { 3416 log_err("opening the default converter of %s failed\n", name); 3417 return; 3418 } 3419 if(strcmp(ucnv_getName(cnv, &status), expected)==0) 3420 log_verbose("ucnv_getName of %s works.\n", name); 3421 else 3422 log_err("ucnv_getName of %s failed\n", name); 3423 ucnv_close(cnv); 3424 } 3425 3426 static void TestDefaultName(void) { 3427 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/ 3428 static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; 3429 strcpy(defaultName, ucnv_getDefaultName()); 3430 3431 log_verbose("getDefaultName returned %s\n", defaultName); 3432 3433 /*change the default name by setting it */ 3434 TestOneDefaultNameChange("UTF-8", "UTF-8"); 3435 #if U_CHARSET_IS_UTF8 3436 TestOneDefaultNameChange("ISCII,version=1", "UTF-8"); 3437 TestOneDefaultNameChange("ISCII,version=2", "UTF-8"); 3438 TestOneDefaultNameChange("ISO-8859-1", "UTF-8"); 3439 #else 3440 # if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 3441 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1"); 3442 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2"); 3443 # endif 3444 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1"); 3445 #endif 3446 3447 /*set the default name back*/ 3448 ucnv_setDefaultName(defaultName); 3449 } 3450 3451 /* Test that ucnv_compareNames() matches names according to spec. ----------- */ 3452 3453 static int 3454 sign(int n) { 3455 if(n==0) { 3456 return 0; 3457 } else if(n<0) { 3458 return -1; 3459 } else /* n>0 */ { 3460 return 1; 3461 } 3462 } 3463 3464 static void 3465 compareNames(const char **names) { 3466 const char *relation, *name1, *name2; 3467 int rel, result; 3468 3469 relation=*names++; 3470 if(*relation=='=') { 3471 rel = 0; 3472 } else if(*relation=='<') { 3473 rel = -1; 3474 } else { 3475 rel = 1; 3476 } 3477 3478 name1=*names++; 3479 if(name1==NULL) { 3480 return; 3481 } 3482 while((name2=*names++)!=NULL) { 3483 result=ucnv_compareNames(name1, name2); 3484 if(sign(result)!=rel) { 3485 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel); 3486 } 3487 name1=name2; 3488 } 3489 } 3490 3491 static void 3492 TestCompareNames() { 3493 static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL }; 3494 static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL }; 3495 static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL }; 3496 static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL }; 3497 3498 compareNames(equalUTF8); 3499 compareNames(equalIBM); 3500 compareNames(lessMac); 3501 compareNames(lessUTF080); 3502 } 3503 3504 static void 3505 TestSubstString() { 3506 static const UChar surrogate[1]={ 0xd900 }; 3507 char buffer[16]; 3508 3509 static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3510 static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 }; 3511 UConverter *cnv; 3512 UErrorCode errorCode; 3513 int32_t length; 3514 int8_t len8; 3515 3516 /* UTF-16/32: test that the BOM is output before the sub character */ 3517 errorCode=U_ZERO_ERROR; 3518 cnv=ucnv_open("UTF-16", &errorCode); 3519 if(U_FAILURE(errorCode)) { 3520 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode)); 3521 return; 3522 } 3523 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3524 ucnv_close(cnv); 3525 if(U_FAILURE(errorCode) || 3526 length!=4 || 3527 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3528 ) { 3529 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n"); 3530 } 3531 3532 errorCode=U_ZERO_ERROR; 3533 cnv=ucnv_open("UTF-32", &errorCode); 3534 if(U_FAILURE(errorCode)) { 3535 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode)); 3536 return; 3537 } 3538 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode); 3539 ucnv_close(cnv); 3540 if(U_FAILURE(errorCode) || 3541 length!=8 || 3542 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode) 3543 ) { 3544 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n"); 3545 } 3546 3547 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */ 3548 errorCode=U_ZERO_ERROR; 3549 cnv=ucnv_open("ISO-8859-1", &errorCode); 3550 if(U_FAILURE(errorCode)) { 3551 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode)); 3552 return; 3553 } 3554 ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode); 3555 if(U_FAILURE(errorCode)) { 3556 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode)); 3557 } else { 3558 len8 = sizeof(buffer); 3559 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3560 /* Stateless converter, we expect the string converted to charset bytes. */ 3561 if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) { 3562 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode)); 3563 } 3564 } 3565 ucnv_close(cnv); 3566 3567 #if !UCONFIG_NO_LEGACY_CONVERSION 3568 errorCode=U_ZERO_ERROR; 3569 cnv=ucnv_open("HZ", &errorCode); 3570 if(U_FAILURE(errorCode)) { 3571 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode)); 3572 return; 3573 } 3574 ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode); 3575 if(U_FAILURE(errorCode)) { 3576 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode)); 3577 } else { 3578 len8 = sizeof(buffer); 3579 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode); 3580 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */ 3581 if(U_FAILURE(errorCode) || len8!=0) { 3582 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode)); 3583 } 3584 } 3585 ucnv_close(cnv); 3586 #endif 3587 /* 3588 * Further testing of ucnv_setSubstString() is done via intltest convert. 3589 * We do not test edge cases of illegal arguments and similar because the 3590 * function implementation uses all of its parameters in calls to other 3591 * functions with UErrorCode parameters. 3592 */ 3593 } 3594 3595 static void 3596 InvalidArguments() { 3597 UConverter *cnv; 3598 UErrorCode errorCode; 3599 char charBuffer[2] = {1, 1}; 3600 char ucharAsCharBuffer[2] = {2, 2}; 3601 char *charsPtr = charBuffer; 3602 UChar *ucharsPtr = (UChar *)ucharAsCharBuffer; 3603 UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1); 3604 3605 errorCode=U_ZERO_ERROR; 3606 cnv=ucnv_open("UTF-8", &errorCode); 3607 if(U_FAILURE(errorCode)) { 3608 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode)); 3609 return; 3610 } 3611 3612 errorCode=U_ZERO_ERROR; 3613 /* This one should fail because an incomplete UChar is being passed in */ 3614 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, TRUE, &errorCode); 3615 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3616 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3617 } 3618 3619 errorCode=U_ZERO_ERROR; 3620 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3621 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, TRUE, &errorCode); 3622 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3623 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3624 } 3625 3626 errorCode=U_ZERO_ERROR; 3627 /* This one should fail because an incomplete UChar is being passed in */ 3628 ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3629 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3630 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode)); 3631 } 3632 3633 errorCode=U_ZERO_ERROR; 3634 /* This one should fail because ucharsBadPtr is > than ucharsPtr */ 3635 ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, TRUE, &errorCode); 3636 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) { 3637 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode)); 3638 } 3639 3640 if (charBuffer[0] != 1 || charBuffer[1] != 1 3641 || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2) 3642 { 3643 log_err("Data was incorrectly written to buffers\n"); 3644 } 3645 3646 ucnv_close(cnv); 3647 } 3648 3649 static void TestGetName() { 3650 static const char *const names[] = { 3651 "Unicode", "UTF-16", 3652 "UnicodeBigUnmarked", "UTF-16BE", 3653 "UnicodeBig", "UTF-16BE,version=1", 3654 "UnicodeLittleUnmarked", "UTF-16LE", 3655 "UnicodeLittle", "UTF-16LE,version=1", 3656 "x-UTF-16LE-BOM", "UTF-16LE,version=1" 3657 }; 3658 int32_t i; 3659 for(i = 0; i < UPRV_LENGTHOF(names); i += 2) { 3660 UErrorCode errorCode = U_ZERO_ERROR; 3661 UConverter *cnv = ucnv_open(names[i], &errorCode); 3662 if(U_SUCCESS(errorCode)) { 3663 const char *name = ucnv_getName(cnv, &errorCode); 3664 if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) { 3665 log_err("ucnv_getName(%s) = %s != %s -- %s\n", 3666 names[i], name, names[i+1], u_errorName(errorCode)); 3667 } 3668 ucnv_close(cnv); 3669 } 3670 } 3671 } 3672 3673 static void TestUTFBOM() { 3674 static const UChar a16[] = { 0x61 }; 3675 static const char *const names[] = { 3676 "UTF-16", 3677 "UTF-16,version=1", 3678 "UTF-16BE", 3679 "UnicodeBig", 3680 "UTF-16LE", 3681 "UnicodeLittle" 3682 }; 3683 static const uint8_t expected[][5] = { 3684 #if U_IS_BIG_ENDIAN 3685 { 4, 0xfe, 0xff, 0, 0x61 }, 3686 { 4, 0xfe, 0xff, 0, 0x61 }, 3687 #else 3688 { 4, 0xff, 0xfe, 0x61, 0 }, 3689 { 4, 0xff, 0xfe, 0x61, 0 }, 3690 #endif 3691 3692 { 2, 0, 0x61 }, 3693 { 4, 0xfe, 0xff, 0, 0x61 }, 3694 3695 { 2, 0x61, 0 }, 3696 { 4, 0xff, 0xfe, 0x61, 0 } 3697 }; 3698 3699 char bytes[10]; 3700 int32_t i; 3701 3702 for(i = 0; i < UPRV_LENGTHOF(names); ++i) { 3703 UErrorCode errorCode = U_ZERO_ERROR; 3704 UConverter *cnv = ucnv_open(names[i], &errorCode); 3705 int32_t length = 0; 3706 const uint8_t *exp = expected[i]; 3707 if (U_FAILURE(errorCode)) { 3708 log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode)); 3709 continue; 3710 } 3711 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode); 3712 3713 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) { 3714 log_err("unexpected %s BOM writing behavior -- %s\n", 3715 names[i], u_errorName(errorCode)); 3716 } 3717 ucnv_close(cnv); 3718 } 3719 } 3720