1 /******************************************************************** 2 * Copyright (c) 1997-2010, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************** 5 * 6 * File UCNVSELTST.C 7 * 8 * Modification History: 9 * Name Description 10 * MOHAMED ELDAWY Creation 11 ******************************************************************** 12 */ 13 14 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/ 15 16 #include "ucnvseltst.h" 17 18 #include <stdio.h> 19 20 #include "unicode/utypes.h" 21 #include "unicode/ucnvsel.h" 22 #include "unicode/ustring.h" 23 #include "cmemory.h" 24 #include "cstring.h" 25 #include "propsvec.h" 26 27 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 28 29 #define FILENAME_BUFFER 1024 30 31 #define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING 32 33 static void TestSelector(void); 34 static void TestUPropsVector(void); 35 void addCnvSelTest(TestNode** root); /* Declaration required to suppress compiler warnings. */ 36 37 void addCnvSelTest(TestNode** root) 38 { 39 addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector"); 40 addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector"); 41 } 42 43 static const char **gAvailableNames = NULL; 44 static int32_t gCountAvailable = 0; 45 46 static UBool 47 getAvailableNames() { 48 int32_t i; 49 if (gAvailableNames != NULL) { 50 return TRUE; 51 } 52 gCountAvailable = ucnv_countAvailable(); 53 if (gCountAvailable == 0) { 54 log_data_err("No converters available.\n"); 55 return FALSE; 56 } 57 gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *)); 58 if (gAvailableNames == NULL) { 59 log_err("unable to allocate memory for %ld available converter names\n", 60 (long)gCountAvailable); 61 return FALSE; 62 } 63 for (i = 0; i < gCountAvailable; ++i) { 64 gAvailableNames[i] = ucnv_getAvailableName(i); 65 } 66 return TRUE; 67 } 68 69 static void 70 releaseAvailableNames() { 71 uprv_free((void *)gAvailableNames); 72 gAvailableNames = NULL; 73 gCountAvailable = 0; 74 } 75 76 static const char ** 77 getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) { 78 const char **names; 79 int32_t i; 80 81 *pCount = 0; 82 if (count <= 0) { 83 return NULL; 84 } 85 names = (const char **)uprv_malloc(count * sizeof(char *)); 86 if (names == NULL) { 87 log_err("memory allocation error for %ld pointers\n", (long)count); 88 return NULL; 89 } 90 if (step == 0 && count > 0) { 91 step = 1; 92 } 93 for (i = 0; i < count; ++i) { 94 if (0 <= start && start < gCountAvailable) { 95 names[i] = gAvailableNames[start]; 96 start += step; 97 ++*pCount; 98 } 99 } 100 return names; 101 } 102 103 #if 0 104 /* 105 * ucnvsel_open() does not support "no encodings": 106 * Given 0 encodings it will open a selector for all available ones. 107 */ 108 static const char ** 109 getNoEncodings(int32_t *pCount) { 110 *pCount = 0; 111 return NULL; 112 } 113 #endif 114 115 static const char ** 116 getOneEncoding(int32_t *pCount) { 117 return getEncodings(1, 0, 1, pCount); 118 } 119 120 static const char ** 121 getFirstEvenEncodings(int32_t *pCount) { 122 return getEncodings(0, 2, 25, pCount); 123 } 124 125 static const char ** 126 getMiddleEncodings(int32_t *pCount) { 127 return getEncodings(gCountAvailable - 12, 1, 22, pCount); 128 } 129 130 static const char ** 131 getLastEncodings(int32_t *pCount) { 132 return getEncodings(gCountAvailable - 1, -1, 25, pCount); 133 } 134 135 static const char ** 136 getSomeEncodings(int32_t *pCount) { 137 /* 20 evenly distributed */ 138 return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount); 139 } 140 141 static const char ** 142 getEveryThirdEncoding(int32_t *pCount) { 143 return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount); 144 } 145 146 static const char ** 147 getAllEncodings(int32_t *pCount) { 148 return getEncodings(0, 1, gCountAvailable, pCount); 149 } 150 151 typedef const char **GetEncodingsFn(int32_t *); 152 153 static GetEncodingsFn *const getEncodingsFns[] = { 154 getOneEncoding, 155 getFirstEvenEncodings, 156 getMiddleEncodings, 157 getLastEncodings, 158 getSomeEncodings, 159 getEveryThirdEncoding, 160 getAllEncodings 161 }; 162 163 static FILE *fopenOrError(const char *filename) { 164 int32_t needLen; 165 FILE *f; 166 char fnbuf[FILENAME_BUFFER]; 167 const char* directory= ctest_dataSrcDir(); 168 needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1; 169 if(needLen > FILENAME_BUFFER) { 170 log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n", 171 filename, needLen, FILENAME_BUFFER); 172 return NULL; 173 } 174 175 strcpy(fnbuf, directory); 176 strcat(fnbuf, TDSRCPATH); 177 strcat(fnbuf, filename); 178 179 f = fopen(fnbuf, "rb"); 180 181 if(f == NULL) { 182 log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename); 183 } 184 return f; 185 } 186 187 typedef struct TestText { 188 char *text, *textLimit; 189 char *limit; 190 int32_t number; 191 } TestText; 192 193 static void 194 text_reset(TestText *tt) { 195 tt->limit = tt->text; 196 tt->number = 0; 197 } 198 199 static char * 200 text_nextString(TestText *tt, int32_t *pLength) { 201 char *s = tt->limit; 202 if (s == tt->textLimit) { 203 /* we already delivered the last string */ 204 return NULL; 205 } else if (s == tt->text) { 206 /* first string */ 207 if ((tt->textLimit - tt->text) >= 3 && 208 s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf 209 ) { 210 s += 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */ 211 } 212 } else { 213 /* skip the string terminator */ 214 ++s; 215 ++tt->number; 216 } 217 218 /* find the end of this string */ 219 tt->limit = uprv_strchr(s, 0); 220 *pLength = (int32_t)(tt->limit - s); 221 return s; 222 } 223 224 static UBool 225 text_open(TestText *tt) { 226 FILE *f; 227 char *s; 228 int32_t length; 229 uprv_memset(tt, 0, sizeof(TestText)); 230 f = fopenOrError("ConverterSelectorTestUTF8.txt"); 231 if(!f) { 232 return FALSE; 233 } 234 fseek(f, 0, SEEK_END); 235 length = (int32_t)ftell(f); 236 fseek(f, 0, SEEK_SET); 237 tt->text = (char *)uprv_malloc(length + 1); 238 if (tt->text == NULL) { 239 fclose(f); 240 return FALSE; 241 } 242 if (length != fread(tt->text, 1, length, f)) { 243 log_err("error reading %ld bytes from test text file\n", (long)length); 244 length = 0; 245 uprv_free(tt->text); 246 } 247 fclose(f); 248 tt->textLimit = tt->text + length; 249 *tt->textLimit = 0; 250 /* replace all Unicode '#' (U+0023) with NUL */ 251 for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {} 252 text_reset(tt); 253 return TRUE; 254 } 255 256 static void 257 text_close(TestText *tt) { 258 uprv_free(tt->text); 259 } 260 261 static int32_t findIndex(const char* converterName) { 262 int32_t i; 263 for (i = 0 ; i < gCountAvailable; i++) { 264 if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) { 265 return i; 266 } 267 } 268 return -1; 269 } 270 271 static UBool * 272 getResultsManually(const char** encodings, int32_t num_encodings, 273 const char *utf8, int32_t length, 274 const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) { 275 UBool* resultsManually; 276 int32_t i; 277 278 resultsManually = (UBool*) uprv_malloc(gCountAvailable); 279 uprv_memset(resultsManually, 0, gCountAvailable); 280 281 for(i = 0 ; i < num_encodings ; i++) { 282 UErrorCode status = U_ZERO_ERROR; 283 /* get unicode set for that converter */ 284 USet* set; 285 UConverter* test_converter; 286 UChar32 cp; 287 int32_t encIndex, offset; 288 289 set = uset_openEmpty(); 290 test_converter = ucnv_open(encodings[i], &status); 291 ucnv_getUnicodeSet(test_converter, set, 292 whichSet, &status); 293 if (excludedCodePoints != NULL) { 294 uset_addAll(set, excludedCodePoints); 295 } 296 uset_freeze(set); 297 offset = 0; 298 cp = 0; 299 300 encIndex = findIndex(encodings[i]); 301 /* 302 * The following is almost, but not entirely, the same as 303 * resultsManually[encIndex] = 304 * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length); 305 * They might be different if the set contains strings, 306 * or if the utf8 string contains an illegal sequence. 307 * 308 * The UConverterSelector does not currently handle strings that can be 309 * converted, and it treats an illegal sequence as convertible 310 * while uset_spanUTF8() treats it like U+FFFD which may not be convertible. 311 */ 312 resultsManually[encIndex] = TRUE; 313 while(offset<length) { 314 U8_NEXT(utf8, offset, length, cp); 315 if (cp >= 0 && !uset_contains(set, cp)) { 316 resultsManually[encIndex] = FALSE; 317 break; 318 } 319 } 320 uset_close(set); 321 ucnv_close(test_converter); 322 } 323 return resultsManually; 324 } 325 326 /* closes res but does not free resultsManually */ 327 static void verifyResult(UEnumeration* res, const UBool *resultsManually) { 328 UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool)); 329 const char* name; 330 UErrorCode status = U_ZERO_ERROR; 331 int32_t i; 332 333 /* fill the bool for the selector results! */ 334 uprv_memset(resultsFromSystem, 0, gCountAvailable); 335 while ((name = uenum_next(res,NULL, &status)) != NULL) { 336 resultsFromSystem[findIndex(name)] = TRUE; 337 } 338 for(i = 0 ; i < gCountAvailable; i++) { 339 if(resultsManually[i] != resultsFromSystem[i]) { 340 log_err("failure in converter selector\n" 341 "converter %s had conflicting results -- manual: %d, system %d\n", 342 gAvailableNames[i], resultsManually[i], resultsFromSystem[i]); 343 } 344 } 345 uprv_free(resultsFromSystem); 346 uenum_close(res); 347 } 348 349 static UConverterSelector * 350 serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) { 351 char *new_buffer; 352 int32_t ser_len, ser_len2; 353 /* preflight */ 354 ser_len = ucnvsel_serialize(sel, NULL, 0, status); 355 if (*status != U_BUFFER_OVERFLOW_ERROR) { 356 log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status)); 357 return sel; 358 } 359 new_buffer = (char *)uprv_malloc(ser_len); 360 *status = U_ZERO_ERROR; 361 ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status); 362 if (U_FAILURE(*status) || ser_len != ser_len2) { 363 log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status)); 364 uprv_free(new_buffer); 365 return sel; 366 } 367 ucnvsel_close(sel); 368 uprv_free(*buffer); 369 *buffer = new_buffer; 370 sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status); 371 if (U_FAILURE(*status)) { 372 log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status)); 373 return NULL; 374 } 375 return sel; 376 } 377 378 static void TestSelector() 379 { 380 TestText text; 381 USet* excluded_sets[3] = { NULL }; 382 int32_t i, testCaseIdx; 383 384 if (!getAvailableNames()) { 385 return; 386 } 387 if (!text_open(&text)) { 388 releaseAvailableNames();; 389 } 390 391 excluded_sets[0] = uset_openEmpty(); 392 for(i = 1 ; i < 3 ; i++) { 393 excluded_sets[i] = uset_open(i*30, i*30+500); 394 } 395 396 for(testCaseIdx = 0; testCaseIdx < LENGTHOF(getEncodingsFns); testCaseIdx++) 397 { 398 int32_t excluded_set_id; 399 int32_t num_encodings; 400 const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings); 401 if (getTestOption(QUICK_OPTION) && num_encodings > 25) { 402 uprv_free((void *)encodings); 403 continue; 404 } 405 406 /* 407 * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++) 408 * 409 * This loop was replaced by the following statement because 410 * the loop made the test run longer without adding to the code coverage. 411 * The handling of the exclusion set is independent of the 412 * set of encodings, so there is no need to test every combination. 413 */ 414 excluded_set_id = testCaseIdx % LENGTHOF(excluded_sets); 415 { 416 UConverterSelector *sel_rt, *sel_fb; 417 char *buffer_fb = NULL; 418 UErrorCode status = U_ZERO_ERROR; 419 sel_rt = ucnvsel_open(encodings, num_encodings, 420 excluded_sets[excluded_set_id], 421 UCNV_ROUNDTRIP_SET, &status); 422 if (num_encodings == gCountAvailable) { 423 /* test the special "all converters" parameter values */ 424 sel_fb = ucnvsel_open(NULL, 0, 425 excluded_sets[excluded_set_id], 426 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 427 } else if (uset_isEmpty(excluded_sets[excluded_set_id])) { 428 /* test that a NULL set gives the same results as an empty set */ 429 sel_fb = ucnvsel_open(encodings, num_encodings, 430 NULL, 431 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 432 } else { 433 sel_fb = ucnvsel_open(encodings, num_encodings, 434 excluded_sets[excluded_set_id], 435 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status); 436 } 437 if (U_FAILURE(status)) { 438 log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status)); 439 ucnvsel_close(sel_rt); 440 uprv_free((void *)encodings); 441 continue; 442 } 443 444 text_reset(&text); 445 for (;;) { 446 UBool *manual_rt, *manual_fb; 447 static UChar utf16[10000]; 448 char *s; 449 int32_t length8, length16; 450 451 s = text_nextString(&text, &length8); 452 if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) { 453 break; 454 } 455 456 manual_rt = getResultsManually(encodings, num_encodings, 457 s, length8, 458 excluded_sets[excluded_set_id], 459 UCNV_ROUNDTRIP_SET); 460 manual_fb = getResultsManually(encodings, num_encodings, 461 s, length8, 462 excluded_sets[excluded_set_id], 463 UCNV_ROUNDTRIP_AND_FALLBACK_SET); 464 /* UTF-8 with length */ 465 status = U_ZERO_ERROR; 466 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt); 467 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb); 468 /* UTF-8 NUL-terminated */ 469 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt); 470 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb); 471 472 u_strFromUTF8(utf16, LENGTHOF(utf16), &length16, s, length8, &status); 473 if (U_FAILURE(status)) { 474 log_err("error converting the test text (string %ld) to UTF-16 - %s\n", 475 (long)text.number, u_errorName(status)); 476 } else { 477 if (text.number == 0) { 478 sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status); 479 } 480 if (U_SUCCESS(status)) { 481 /* UTF-16 with length */ 482 verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt); 483 verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb); 484 /* UTF-16 NUL-terminated */ 485 verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt); 486 verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb); 487 } 488 } 489 490 uprv_free(manual_rt); 491 uprv_free(manual_fb); 492 } 493 ucnvsel_close(sel_rt); 494 ucnvsel_close(sel_fb); 495 uprv_free(buffer_fb); 496 } 497 uprv_free((void *)encodings); 498 } 499 500 releaseAvailableNames(); 501 text_close(&text); 502 for(i = 0 ; i < 3 ; i++) { 503 uset_close(excluded_sets[i]); 504 } 505 } 506 507 /* Improve code coverage of UPropsVectors */ 508 static void TestUPropsVector() { 509 uint32_t value; 510 UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR; 511 UPropsVectors *pv = upvec_open(100, &errorCode); 512 if (pv != NULL) { 513 log_err("Should have returned NULL if UErrorCode is an error."); 514 return; 515 } 516 errorCode = U_ZERO_ERROR; 517 pv = upvec_open(-1, &errorCode); 518 if (pv != NULL || U_SUCCESS(errorCode)) { 519 log_err("Should have returned NULL if column is less than 0.\n"); 520 return; 521 } 522 errorCode = U_ZERO_ERROR; 523 pv = upvec_open(100, &errorCode); 524 if (pv == NULL || U_FAILURE(errorCode)) { 525 log_err("Unable to open UPropsVectors.\n"); 526 return; 527 } 528 529 if (upvec_getValue(pv, 0, 1) != 0) { 530 log_err("upvec_getValue should return 0.\n"); 531 } 532 if (upvec_getRow(pv, 0, NULL, NULL) == NULL) { 533 log_err("upvec_getRow should not return NULL.\n"); 534 } 535 if (upvec_getArray(pv, NULL, NULL) != NULL) { 536 log_err("upvec_getArray should return NULL.\n"); 537 } 538 539 upvec_close(pv); 540 } 541