1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2009, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 */ 7 #include "unicode/uset.h" 8 #include "unicode/ustring.h" 9 #include "cintltst.h" 10 #include <stdlib.h> 11 #include <string.h> 12 13 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 14 15 #define TEST(x) addTest(root, &x, "uset/" # x) 16 17 static void TestAPI(void); 18 static void Testj2269(void); 19 static void TestSerialized(void); 20 static void TestNonInvariantPattern(void); 21 static void TestBadPattern(void); 22 static void TestFreezable(void); 23 static void TestSpan(void); 24 25 void addUSetTest(TestNode** root); 26 27 static void expect(const USet* set, 28 const char* inList, 29 const char* outList, 30 UErrorCode* ec); 31 static void expectContainment(const USet* set, 32 const char* list, 33 UBool isIn); 34 static char oneUCharToChar(UChar32 c); 35 static void expectItems(const USet* set, 36 const char* items); 37 38 void 39 addUSetTest(TestNode** root) { 40 TEST(TestAPI); 41 TEST(Testj2269); 42 TEST(TestSerialized); 43 TEST(TestNonInvariantPattern); 44 TEST(TestBadPattern); 45 TEST(TestFreezable); 46 TEST(TestSpan); 47 } 48 49 /*------------------------------------------------------------------ 50 * Tests 51 *------------------------------------------------------------------*/ 52 53 static void Testj2269() { 54 UErrorCode status = U_ZERO_ERROR; 55 UChar a[4] = { 0x61, 0x62, 0x63, 0 }; 56 USet *s = uset_open(1, 0); 57 uset_addString(s, a, 3); 58 a[0] = 0x63; a[1] = 0x63; 59 expect(s, "{abc}", "{ccc}", &status); 60 uset_close(s); 61 } 62 63 static const UChar PAT[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */ 64 static const int32_t PAT_LEN = (sizeof(PAT) / sizeof(PAT[0])) - 1; 65 66 static const UChar PAT_lb[] = {0x6C, 0x62, 0}; /* "lb" */ 67 static const int32_t PAT_lb_LEN = (sizeof(PAT_lb) / sizeof(PAT_lb[0])) - 1; 68 69 static const UChar VAL_SP[] = {0x53, 0x50, 0}; /* "SP" */ 70 static const int32_t VAL_SP_LEN = (sizeof(VAL_SP) / sizeof(VAL_SP[0])) - 1; 71 72 static const UChar STR_bc[] = {98,99,0}; /* "bc" */ 73 static const int32_t STR_bc_LEN = (sizeof(STR_bc) / sizeof(STR_bc[0])) - 1; 74 75 static const UChar STR_ab[] = {97,98,0}; /* "ab" */ 76 static const int32_t STR_ab_LEN = (sizeof(STR_ab) / sizeof(STR_ab[0])) - 1; 77 78 /** 79 * Basic API test for uset.x 80 */ 81 static void TestAPI() { 82 USet* set; 83 USet* set2; 84 UErrorCode ec; 85 86 /* [] */ 87 set = uset_openEmpty(); 88 expect(set, "", "abc{ab}", NULL); 89 uset_close(set); 90 91 set = uset_open(1, 0); 92 expect(set, "", "abc{ab}", NULL); 93 uset_close(set); 94 95 set = uset_open(1, 1); 96 uset_clear(set); 97 expect(set, "", "abc{ab}", NULL); 98 uset_close(set); 99 100 /* [ABC] */ 101 set = uset_open(0x0041, 0x0043); 102 expect(set, "ABC", "DEF{ab}", NULL); 103 uset_close(set); 104 105 /* [a-c{ab}] */ 106 ec = U_ZERO_ERROR; 107 set = uset_openPattern(PAT, PAT_LEN, &ec); 108 if(U_FAILURE(ec)) { 109 log_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec)); 110 return; 111 } 112 if(!uset_resemblesPattern(PAT, PAT_LEN, 0)) { 113 log_err("uset_resemblesPattern of PAT failed\n"); 114 } 115 expect(set, "abc{ab}", "def{bc}", &ec); 116 117 /* [a-d{ab}] */ 118 uset_add(set, 0x64); 119 expect(set, "abcd{ab}", "ef{bc}", NULL); 120 121 /* [acd{ab}{bc}] */ 122 uset_remove(set, 0x62); 123 uset_addString(set, STR_bc, STR_bc_LEN); 124 expect(set, "acd{ab}{bc}", "bef{cd}", NULL); 125 126 /* [acd{bc}] */ 127 uset_removeString(set, STR_ab, STR_ab_LEN); 128 expect(set, "acd{bc}", "bfg{ab}", NULL); 129 130 /* [^acd{bc}] */ 131 uset_complement(set); 132 expect(set, "bef{bc}", "acd{ac}", NULL); 133 134 /* [a-e{bc}] */ 135 uset_complement(set); 136 uset_addRange(set, 0x0062, 0x0065); 137 expect(set, "abcde{bc}", "fg{ab}", NULL); 138 139 /* [de{bc}] */ 140 uset_removeRange(set, 0x0050, 0x0063); 141 expect(set, "de{bc}", "bcfg{ab}", NULL); 142 143 /* [g-l] */ 144 uset_set(set, 0x0067, 0x006C); 145 expect(set, "ghijkl", "de{bc}", NULL); 146 147 if (uset_indexOf(set, 0x0067) != 0) { 148 log_err("uset_indexOf failed finding correct index of 'g'\n"); 149 } 150 151 if (uset_charAt(set, 0) != 0x0067) { 152 log_err("uset_charAt failed finding correct char 'g' at index 0\n"); 153 } 154 155 /* How to test this one...? */ 156 uset_compact(set); 157 158 /* [g-i] */ 159 uset_retain(set, 0x0067, 0x0069); 160 expect(set, "ghi", "dejkl{bc}", NULL); 161 162 /* UCHAR_ASCII_HEX_DIGIT */ 163 uset_applyIntPropertyValue(set, UCHAR_ASCII_HEX_DIGIT, 1, &ec); 164 if(U_FAILURE(ec)) { 165 log_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec)); 166 return; 167 } 168 expect(set, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL); 169 170 /* [ab] */ 171 uset_clear(set); 172 uset_addAllCodePoints(set, STR_ab, STR_ab_LEN); 173 expect(set, "ab", "def{ab}", NULL); 174 if (uset_containsAllCodePoints(set, STR_bc, STR_bc_LEN)){ 175 log_err("set should not conatin all characters of \"bc\" \n"); 176 } 177 178 /* [] */ 179 set2 = uset_open(1, 1); 180 uset_clear(set2); 181 182 /* space */ 183 uset_applyPropertyAlias(set2, PAT_lb, PAT_lb_LEN, VAL_SP, VAL_SP_LEN, &ec); 184 expect(set2, " ", "abcdefghi{bc}", NULL); 185 186 /* [a-c] */ 187 uset_set(set2, 0x0061, 0x0063); 188 /* [g-i] */ 189 uset_set(set, 0x0067, 0x0069); 190 191 /* [a-c g-i] */ 192 if (uset_containsSome(set, set2)) { 193 log_err("set should not contain some of set2 yet\n"); 194 } 195 uset_complementAll(set, set2); 196 if (!uset_containsSome(set, set2)) { 197 log_err("set should contain some of set2\n"); 198 } 199 expect(set, "abcghi", "def{bc}", NULL); 200 201 /* [g-i] */ 202 uset_removeAll(set, set2); 203 expect(set, "ghi", "abcdef{bc}", NULL); 204 205 /* [a-c g-i] */ 206 uset_addAll(set2, set); 207 expect(set2, "abcghi", "def{bc}", NULL); 208 209 /* [g-i] */ 210 uset_retainAll(set2, set); 211 expect(set2, "ghi", "abcdef{bc}", NULL); 212 213 uset_close(set); 214 uset_close(set2); 215 } 216 217 /*------------------------------------------------------------------ 218 * Support 219 *------------------------------------------------------------------*/ 220 221 /** 222 * Verifies that the given set contains the characters and strings in 223 * inList, and does not contain those in outList. Also verifies that 224 * 'set' is not NULL and that 'ec' succeeds. 225 * @param set the set to test, or NULL (on error) 226 * @param inList list of set contents, in iteration order. Format is 227 * list of individual strings, in iteration order, followed by sorted 228 * list of strings, delimited by {}. This means we do not test 229 * characters '{' or '}' and we do not test strings containing those 230 * characters either. 231 * @param outList list of things not in the set. Same format as 232 * inList. 233 * @param ec an error code, checked for success. May be NULL in which 234 * case it is ignored. 235 */ 236 static void expect(const USet* set, 237 const char* inList, 238 const char* outList, 239 UErrorCode* ec) { 240 if (ec!=NULL && U_FAILURE(*ec)) { 241 log_err("FAIL: %s\n", u_errorName(*ec)); 242 return; 243 } 244 if (set == NULL) { 245 log_err("FAIL: USet is NULL\n"); 246 return; 247 } 248 expectContainment(set, inList, TRUE); 249 expectContainment(set, outList, FALSE); 250 expectItems(set, inList); 251 } 252 253 static void expectContainment(const USet* set, 254 const char* list, 255 UBool isIn) { 256 const char* p = list; 257 UChar ustr[4096]; 258 char *pat; 259 UErrorCode ec; 260 int32_t rangeStart = -1, rangeEnd = -1, length; 261 262 ec = U_ZERO_ERROR; 263 length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec); 264 if(U_FAILURE(ec)) { 265 log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec)); 266 return; 267 } 268 pat=aescstrdup(ustr, length); 269 270 while (*p) { 271 if (*p=='{') { 272 const char* stringStart = ++p; 273 int32_t stringLength = 0; 274 char strCopy[64]; 275 276 while (*p++ != '}') { 277 } 278 stringLength = (int32_t)(p - stringStart - 1); 279 strncpy(strCopy, stringStart, stringLength); 280 strCopy[stringLength] = 0; 281 282 u_charsToUChars(stringStart, ustr, stringLength); 283 284 if (uset_containsString(set, ustr, stringLength) == isIn) { 285 log_verbose("Ok: %s %s \"%s\"\n", pat, 286 (isIn ? "contains" : "does not contain"), 287 strCopy); 288 } else { 289 log_data_err("FAIL: %s %s \"%s\" (Are you missing data?)\n", pat, 290 (isIn ? "does not contain" : "contains"), 291 strCopy); 292 } 293 } 294 295 else { 296 UChar32 c; 297 298 u_charsToUChars(p, ustr, 1); 299 c = ustr[0]; 300 301 if (uset_contains(set, c) == isIn) { 302 log_verbose("Ok: %s %s '%c'\n", pat, 303 (isIn ? "contains" : "does not contain"), 304 *p); 305 } else { 306 log_data_err("FAIL: %s %s '%c' (Are you missing data?)\n", pat, 307 (isIn ? "does not contain" : "contains"), 308 *p); 309 } 310 311 /* Test the range API too by looking for ranges */ 312 if (c == rangeEnd+1) { 313 rangeEnd = c; 314 } else { 315 if (rangeStart >= 0) { 316 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) { 317 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat, 318 (isIn ? "contains" : "does not contain"), 319 rangeStart, rangeEnd); 320 } else { 321 log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat, 322 (isIn ? "does not contain" : "contains"), 323 rangeStart, rangeEnd); 324 } 325 } 326 rangeStart = rangeEnd = c; 327 } 328 329 ++p; 330 } 331 } 332 333 if (rangeStart >= 0) { 334 if (uset_containsRange(set, rangeStart, rangeEnd) == isIn) { 335 log_verbose("Ok: %s %s U+%04X-U+%04X\n", pat, 336 (isIn ? "contains" : "does not contain"), 337 rangeStart, rangeEnd); 338 } else { 339 log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat, 340 (isIn ? "does not contain" : "contains"), 341 rangeStart, rangeEnd); 342 } 343 } 344 } 345 346 /* This only works for invariant BMP chars */ 347 static char oneUCharToChar(UChar32 c) { 348 UChar ubuf[1]; 349 char buf[1]; 350 ubuf[0] = (UChar) c; 351 u_UCharsToChars(ubuf, buf, 1); 352 return buf[0]; 353 } 354 355 static void expectItems(const USet* set, 356 const char* items) { 357 const char* p = items; 358 UChar ustr[4096], itemStr[4096]; 359 char buf[4096]; 360 char *pat; 361 UErrorCode ec; 362 int32_t expectedSize = 0; 363 int32_t itemCount = uset_getItemCount(set); 364 int32_t itemIndex = 0; 365 UChar32 start = 1, end = 0; 366 int32_t itemLen = 0, length; 367 368 ec = U_ZERO_ERROR; 369 length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec); 370 if (U_FAILURE(ec)) { 371 log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec)); 372 return; 373 } 374 pat=aescstrdup(ustr, length); 375 376 if (uset_isEmpty(set) != (strlen(items)==0)) { 377 log_data_err("FAIL: %s should return %s from isEmpty (Are you missing data?)\n", 378 pat, 379 strlen(items)==0 ? "TRUE" : "FALSE"); 380 } 381 382 /* Don't test patterns starting with "[^" */ 383 if (u_strlen(ustr) > 2 && ustr[1] == 0x5e /*'^'*/) { 384 return; 385 } 386 387 while (*p) { 388 389 ++expectedSize; 390 391 if (start > end || start == -1) { 392 /* Fetch our next item */ 393 if (itemIndex >= itemCount) { 394 log_data_err("FAIL: ran out of items iterating %s (Are you missing data?)\n", pat); 395 return; 396 } 397 398 itemLen = uset_getItem(set, itemIndex, &start, &end, 399 itemStr, sizeof(itemStr), &ec); 400 if (U_FAILURE(ec) || itemLen < 0) { 401 log_err("FAIL: uset_getItem => %s\n", u_errorName(ec)); 402 return; 403 } 404 405 if (itemLen == 0) { 406 log_verbose("Ok: %s item %d is %c-%c\n", pat, 407 itemIndex, oneUCharToChar(start), 408 oneUCharToChar(end)); 409 } else { 410 itemStr[itemLen] = 0; 411 u_UCharsToChars(itemStr, buf, itemLen+1); 412 log_verbose("Ok: %s item %d is \"%s\"\n", pat, itemIndex, buf); 413 } 414 415 ++itemIndex; 416 } 417 418 if (*p=='{') { 419 const char* stringStart = ++p; 420 int32_t stringLength = 0; 421 char strCopy[64]; 422 423 while (*p++ != '}') { 424 } 425 stringLength = (int32_t)(p - stringStart - 1); 426 strncpy(strCopy, stringStart, stringLength); 427 strCopy[stringLength] = 0; 428 429 u_charsToUChars(stringStart, ustr, stringLength); 430 ustr[stringLength] = 0; 431 432 if (itemLen == 0) { 433 log_err("FAIL: for %s expect \"%s\" next, but got a char\n", 434 pat, strCopy); 435 return; 436 } 437 438 if (u_strcmp(ustr, itemStr) != 0) { 439 log_err("FAIL: for %s expect \"%s\" next\n", 440 pat, strCopy); 441 return; 442 } 443 } 444 445 else { 446 UChar32 c; 447 448 u_charsToUChars(p, ustr, 1); 449 c = ustr[0]; 450 451 if (itemLen != 0) { 452 log_err("FAIL: for %s expect '%c' next, but got a string\n", 453 pat, *p); 454 return; 455 } 456 457 if (c != start++) { 458 log_err("FAIL: for %s expect '%c' next\n", 459 pat, *p); 460 return; 461 } 462 463 ++p; 464 } 465 } 466 467 if (uset_size(set) == expectedSize) { 468 log_verbose("Ok: %s size is %d\n", pat, expectedSize); 469 } else { 470 log_err("FAIL: %s size is %d, expected %d\n", 471 pat, uset_size(set), expectedSize); 472 } 473 } 474 475 static void 476 TestSerialized() { 477 uint16_t buffer[1000]; 478 USerializedSet sset; 479 USet *set; 480 UErrorCode errorCode; 481 UChar32 c; 482 int32_t length; 483 484 /* use a pattern that generates both BMP and supplementary code points */ 485 U_STRING_DECL(pattern, "[:Cf:]", 6); 486 U_STRING_INIT(pattern, "[:Cf:]", 6); 487 488 errorCode=U_ZERO_ERROR; 489 set=uset_openPattern(pattern, -1, &errorCode); 490 if(U_FAILURE(errorCode)) { 491 log_data_err("uset_openPattern([:Cf:]) failed - %s (Are you missing data?)\n", u_errorName(errorCode)); 492 return; 493 } 494 495 length=uset_serialize(set, buffer, LENGTHOF(buffer), &errorCode); 496 if(U_FAILURE(errorCode)) { 497 log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode)); 498 uset_close(set); 499 return; 500 } 501 502 uset_getSerializedSet(&sset, buffer, length); 503 for(c=0; c<=0x10ffff; ++c) { 504 if(uset_contains(set, c)!=uset_serializedContains(&sset, c)) { 505 log_err("uset_contains(U+%04x)!=uset_serializedContains(U+%04x)\n", c); 506 break; 507 } 508 } 509 510 uset_close(set); 511 } 512 513 /** 514 * Make sure that when non-invariant chars are passed to uset_openPattern 515 * they do not cause an ugly failure mode (e.g. assertion failure). 516 * JB#3795. 517 */ 518 static void 519 TestNonInvariantPattern() { 520 UErrorCode ec = U_ZERO_ERROR; 521 /* The critical part of this test is that the following pattern 522 must contain a non-invariant character. */ 523 static const char *pattern = "[:ccc!=0:]"; 524 UChar buf[256]; 525 int32_t len = u_unescape(pattern, buf, 256); 526 /* This test 'fails' by having an assertion failure within the 527 following call. It passes by running to completion with no 528 assertion failure. */ 529 USet *set = uset_openPattern(buf, len, &ec); 530 uset_close(set); 531 } 532 533 static void TestBadPattern(void) { 534 UErrorCode status = U_ZERO_ERROR; 535 USet *pat; 536 U_STRING_DECL(pattern, "[", 1); 537 U_STRING_INIT(pattern, "[", 1); 538 pat = uset_openPatternOptions(pattern, u_strlen(pattern), 0, &status); 539 if (pat != NULL || U_SUCCESS(status)) { 540 log_err("uset_openPatternOptions did not fail as expected %s\n", u_errorName(status)); 541 } 542 } 543 544 static USet *openIDSet() { 545 UErrorCode errorCode = U_ZERO_ERROR; 546 U_STRING_DECL(pattern, "[:ID_Continue:]", 15); 547 U_STRING_INIT(pattern, "[:ID_Continue:]", 15); 548 return uset_openPattern(pattern, 15, &errorCode); 549 } 550 551 static void TestFreezable() { 552 USet *idSet; 553 USet *frozen; 554 USet *thawed; 555 556 idSet=openIDSet(); 557 558 if (idSet == NULL) { 559 log_data_err("openIDSet() returned NULL. (Are you missing data?)\n"); 560 uset_close(idSet); 561 return; 562 } 563 564 frozen=uset_clone(idSet); 565 566 if (frozen == NULL) { 567 log_err("uset_Clone() returned NULL\n"); 568 return; 569 } 570 571 if(!uset_equals(frozen, idSet)) { 572 log_err("uset_clone() did not make an equal copy\n"); 573 } 574 575 uset_freeze(frozen); 576 uset_addRange(frozen, 0xd802, 0xd805); 577 578 if(uset_isFrozen(idSet) || !uset_isFrozen(frozen) || !uset_equals(frozen, idSet)) { 579 log_err("uset_freeze() or uset_isFrozen() does not work\n"); 580 } 581 582 thawed=uset_cloneAsThawed(frozen); 583 584 if (thawed == NULL) { 585 log_err("uset_cloneAsThawed(frozen) returned NULL"); 586 uset_close(frozen); 587 uset_close(idSet); 588 return; 589 } 590 591 uset_addRange(thawed, 0xd802, 0xd805); 592 593 if(uset_isFrozen(thawed) || uset_equals(thawed, idSet) || !uset_containsRange(thawed, 0xd802, 0xd805)) { 594 log_err("uset_cloneAsThawed() does not work\n"); 595 } 596 597 uset_close(idSet); 598 uset_close(frozen); 599 uset_close(thawed); 600 } 601 602 static void TestSpan() { 603 static const UChar s16[2]={ 0xe01, 0x3000 }; 604 static const char* s8="\xE0\xB8\x81\xE3\x80\x80"; 605 606 USet *idSet=openIDSet(); 607 608 if (idSet == NULL) { 609 log_data_err("openIDSet() returned NULL (Are you missing data?)\n"); 610 return; 611 } 612 613 if( 614 1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) || 615 0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) || 616 2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) || 617 1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) 618 ) { 619 log_err("uset_span() or uset_spanBack() does not work\n"); 620 } 621 622 if( 623 3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) || 624 0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) || 625 6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) || 626 3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) 627 ) { 628 log_err("uset_spanUTF8() or uset_spanBackUTF8() does not work\n"); 629 } 630 631 uset_freeze(idSet); 632 633 if( 634 1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) || 635 0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) || 636 2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) || 637 1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) 638 ) { 639 log_err("uset_span(frozen) or uset_spanBack(frozen) does not work\n"); 640 } 641 642 if( 643 3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) || 644 0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) || 645 6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) || 646 3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) 647 ) { 648 log_err("uset_spanUTF8(frozen) or uset_spanBackUTF8(frozen) does not work\n"); 649 } 650 651 uset_close(idSet); 652 } 653 654 /*eof*/ 655