1 /******************************************************************** 2 * COPYRIGHT: 3 * Copyright (c) 1997-2010, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ********************************************************************/ 6 /******************************************************************************* 7 * 8 * File CUCDTST.C 9 * 10 * Modification History: 11 * Name Description 12 * Madhu Katragadda Ported for C API, added tests for string functions 13 ******************************************************************************** 14 */ 15 16 #include <string.h> 17 #include <math.h> 18 #include <stdlib.h> 19 20 #include "unicode/utypes.h" 21 #include "unicode/uchar.h" 22 #include "unicode/putil.h" 23 #include "unicode/ustring.h" 24 #include "unicode/uloc.h" 25 #include "unicode/unorm2.h" 26 27 #include "cintltst.h" 28 #include "putilimp.h" 29 #include "uparse.h" 30 #include "ucase.h" 31 #include "ubidi_props.h" 32 #include "uprops.h" 33 #include "uset_imp.h" 34 #include "usc_impl.h" 35 #include "unormimp.h" 36 #include "udatamem.h" /* for testing ucase_openBinary() */ 37 #include "cucdapi.h" 38 39 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 40 41 /* prototypes --------------------------------------------------------------- */ 42 43 static void TestUpperLower(void); 44 static void TestLetterNumber(void); 45 static void TestMisc(void); 46 static void TestPOSIX(void); 47 static void TestControlPrint(void); 48 static void TestIdentifier(void); 49 static void TestUnicodeData(void); 50 static void TestCodeUnit(void); 51 static void TestCodePoint(void); 52 static void TestCharLength(void); 53 static void TestCharNames(void); 54 static void TestMirroring(void); 55 static void TestUScriptRunAPI(void); 56 static void TestAdditionalProperties(void); 57 static void TestNumericProperties(void); 58 static void TestPropertyNames(void); 59 static void TestPropertyValues(void); 60 static void TestConsistency(void); 61 static void TestUCase(void); 62 static void TestUBiDiProps(void); 63 static void TestCaseFolding(void); 64 65 /* internal methods used */ 66 static int32_t MakeProp(char* str); 67 static int32_t MakeDir(char* str); 68 69 /* helpers ------------------------------------------------------------------ */ 70 71 static void 72 parseUCDFile(const char *filename, 73 char *fields[][2], int32_t fieldCount, 74 UParseLineFn *lineFn, void *context, 75 UErrorCode *pErrorCode) { 76 char path[256]; 77 char backupPath[256]; 78 79 if(U_FAILURE(*pErrorCode)) { 80 return; 81 } 82 83 /* Look inside ICU_DATA first */ 84 strcpy(path, u_getDataDirectory()); 85 strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING); 86 strcat(path, filename); 87 88 /* As a fallback, try to guess where the source data was located 89 * at the time ICU was built, and look there. 90 */ 91 strcpy(backupPath, ctest_dataSrcDir()); 92 strcat(backupPath, U_FILE_SEP_STRING); 93 strcat(backupPath, "unidata" U_FILE_SEP_STRING); 94 strcat(backupPath, filename); 95 96 u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode); 97 if(*pErrorCode==U_FILE_ACCESS_ERROR) { 98 *pErrorCode=U_ZERO_ERROR; 99 u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode); 100 } 101 if(U_FAILURE(*pErrorCode)) { 102 log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode)); 103 } 104 } 105 106 /* test data ---------------------------------------------------------------- */ 107 108 static const UChar LAST_CHAR_CODE_IN_FILE = 0xFFFD; 109 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf"; 110 static const int32_t tagValues[] = 111 { 112 /* Mn */ U_NON_SPACING_MARK, 113 /* Mc */ U_COMBINING_SPACING_MARK, 114 /* Me */ U_ENCLOSING_MARK, 115 /* Nd */ U_DECIMAL_DIGIT_NUMBER, 116 /* Nl */ U_LETTER_NUMBER, 117 /* No */ U_OTHER_NUMBER, 118 /* Zs */ U_SPACE_SEPARATOR, 119 /* Zl */ U_LINE_SEPARATOR, 120 /* Zp */ U_PARAGRAPH_SEPARATOR, 121 /* Cc */ U_CONTROL_CHAR, 122 /* Cf */ U_FORMAT_CHAR, 123 /* Cs */ U_SURROGATE, 124 /* Co */ U_PRIVATE_USE_CHAR, 125 /* Cn */ U_UNASSIGNED, 126 /* Lu */ U_UPPERCASE_LETTER, 127 /* Ll */ U_LOWERCASE_LETTER, 128 /* Lt */ U_TITLECASE_LETTER, 129 /* Lm */ U_MODIFIER_LETTER, 130 /* Lo */ U_OTHER_LETTER, 131 /* Pc */ U_CONNECTOR_PUNCTUATION, 132 /* Pd */ U_DASH_PUNCTUATION, 133 /* Ps */ U_START_PUNCTUATION, 134 /* Pe */ U_END_PUNCTUATION, 135 /* Po */ U_OTHER_PUNCTUATION, 136 /* Sm */ U_MATH_SYMBOL, 137 /* Sc */ U_CURRENCY_SYMBOL, 138 /* Sk */ U_MODIFIER_SYMBOL, 139 /* So */ U_OTHER_SYMBOL, 140 /* Pi */ U_INITIAL_PUNCTUATION, 141 /* Pf */ U_FINAL_PUNCTUATION 142 }; 143 144 static const char dirStrings[][5] = { 145 "L", 146 "R", 147 "EN", 148 "ES", 149 "ET", 150 "AN", 151 "CS", 152 "B", 153 "S", 154 "WS", 155 "ON", 156 "LRE", 157 "LRO", 158 "AL", 159 "RLE", 160 "RLO", 161 "PDF", 162 "NSM", 163 "BN" 164 }; 165 166 void addUnicodeTest(TestNode** root); 167 168 void addUnicodeTest(TestNode** root) 169 { 170 addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit"); 171 addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint"); 172 addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength"); 173 addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues"); 174 addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData"); 175 addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties"); 176 addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties"); 177 addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower"); 178 addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber"); 179 addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc"); 180 addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX"); 181 addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint"); 182 addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier"); 183 addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames"); 184 addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring"); 185 addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI"); 186 addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI"); 187 addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames"); 188 addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues"); 189 addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency"); 190 addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase"); 191 addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps"); 192 addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding"); 193 } 194 195 /*==================================================== */ 196 /* test u_toupper() and u_tolower() */ 197 /*==================================================== */ 198 static void TestUpperLower() 199 { 200 const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000}; 201 const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000}; 202 U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21); 203 U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21); 204 int32_t i; 205 206 U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21); 207 U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21); 208 209 /* 210 Checks LetterLike Symbols which were previously a source of confusion 211 [Bertrand A. D. 02/04/98] 212 */ 213 for (i=0x2100;i<0x2138;i++) 214 { 215 /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */ 216 if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132) 217 { 218 if (i != (int)u_tolower(i)) /* itself */ 219 log_err("Failed case conversion with itself: U+%04x\n", i); 220 if (i != (int)u_toupper(i)) 221 log_err("Failed case conversion with itself: U+%04x\n", i); 222 } 223 } 224 225 for(i=0; i < u_strlen(upper); i++){ 226 if(u_tolower(upper[i]) != lower[i]){ 227 log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i])); 228 } 229 } 230 231 log_verbose("testing upper lower\n"); 232 for (i = 0; i < 21; i++) { 233 234 if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i])) 235 { 236 log_err("Failed isLowerCase test at %c\n", upperTest[i]); 237 } 238 else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i])) 239 { 240 log_err("Failed isUpperCase test at %c\n", lowerTest[i]); 241 } 242 else if (upperTest[i] != u_tolower(lowerTest[i])) 243 { 244 log_err("Failed case conversion from %c To %c :\n", lowerTest[i], upperTest[i]); 245 } 246 else if (lowerTest[i] != u_toupper(upperTest[i])) 247 { 248 log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]); 249 } 250 else if (upperTest[i] != u_tolower(upperTest[i])) 251 { 252 log_err("Failed case conversion with itself: %c\n", upperTest[i]); 253 } 254 else if (lowerTest[i] != u_toupper(lowerTest[i])) 255 { 256 log_err("Failed case conversion with itself: %c\n", lowerTest[i]); 257 } 258 } 259 log_verbose("done testing upper lower\n"); 260 261 log_verbose("testing u_istitle\n"); 262 { 263 static const UChar expected[] = { 264 0x1F88, 265 0x1F89, 266 0x1F8A, 267 0x1F8B, 268 0x1F8C, 269 0x1F8D, 270 0x1F8E, 271 0x1F8F, 272 0x1F88, 273 0x1F89, 274 0x1F8A, 275 0x1F8B, 276 0x1F8C, 277 0x1F8D, 278 0x1F8E, 279 0x1F8F, 280 0x1F98, 281 0x1F99, 282 0x1F9A, 283 0x1F9B, 284 0x1F9C, 285 0x1F9D, 286 0x1F9E, 287 0x1F9F, 288 0x1F98, 289 0x1F99, 290 0x1F9A, 291 0x1F9B, 292 0x1F9C, 293 0x1F9D, 294 0x1F9E, 295 0x1F9F, 296 0x1FA8, 297 0x1FA9, 298 0x1FAA, 299 0x1FAB, 300 0x1FAC, 301 0x1FAD, 302 0x1FAE, 303 0x1FAF, 304 0x1FA8, 305 0x1FA9, 306 0x1FAA, 307 0x1FAB, 308 0x1FAC, 309 0x1FAD, 310 0x1FAE, 311 0x1FAF, 312 0x1FBC, 313 0x1FBC, 314 0x1FCC, 315 0x1FCC, 316 0x1FFC, 317 0x1FFC, 318 }; 319 int32_t num = sizeof(expected)/sizeof(expected[0]); 320 for(i=0; i<num; i++){ 321 if(!u_istitle(expected[i])){ 322 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]); 323 } 324 } 325 326 } 327 } 328 329 /* compare two sets and verify that their difference or intersection is empty */ 330 static UBool 331 showADiffB(const USet *a, const USet *b, 332 const char *a_name, const char *b_name, 333 UBool expect, UBool diffIsError) { 334 USet *aa; 335 int32_t i, start, end, length; 336 UErrorCode errorCode; 337 338 /* 339 * expect: 340 * TRUE -> a-b should be empty, that is, b should contain all of a 341 * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa) 342 */ 343 if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) { 344 return TRUE; 345 } 346 347 /* clone a to aa because a is const */ 348 aa=uset_open(1, 0); 349 if(aa==NULL) { 350 /* unusual problem - out of memory? */ 351 return FALSE; 352 } 353 uset_addAll(aa, a); 354 355 /* compute the set in question */ 356 if(expect) { 357 /* a-b */ 358 uset_removeAll(aa, b); 359 } else { 360 /* a&b */ 361 uset_retainAll(aa, b); 362 } 363 364 /* aa is not empty because of the initial tests above; show its contents */ 365 errorCode=U_ZERO_ERROR; 366 i=0; 367 for(;;) { 368 length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode); 369 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 370 break; /* done */ 371 } 372 if(U_FAILURE(errorCode)) { 373 log_err("error comparing %s with %s at difference item %d: %s\n", 374 a_name, b_name, i, u_errorName(errorCode)); 375 break; 376 } 377 if(length!=0) { 378 break; /* done with code points, got a string or -1 */ 379 } 380 381 if(diffIsError) { 382 if(expect) { 383 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name); 384 } else { 385 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end); 386 } 387 } else { 388 if(expect) { 389 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name); 390 } else { 391 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end); 392 } 393 } 394 395 ++i; 396 } 397 398 uset_close(aa); 399 return FALSE; 400 } 401 402 static UBool 403 showAMinusB(const USet *a, const USet *b, 404 const char *a_name, const char *b_name, 405 UBool diffIsError) { 406 return showADiffB(a, b, a_name, b_name, TRUE, diffIsError); 407 } 408 409 static UBool 410 showAIntersectB(const USet *a, const USet *b, 411 const char *a_name, const char *b_name, 412 UBool diffIsError) { 413 return showADiffB(a, b, a_name, b_name, FALSE, diffIsError); 414 } 415 416 static UBool 417 compareUSets(const USet *a, const USet *b, 418 const char *a_name, const char *b_name, 419 UBool diffIsError) { 420 /* 421 * Use an arithmetic & not a logical && so that both branches 422 * are always taken and all differences are shown. 423 */ 424 return 425 showAMinusB(a, b, a_name, b_name, diffIsError) & 426 showAMinusB(b, a, b_name, a_name, diffIsError); 427 } 428 429 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */ 430 static void TestLetterNumber() 431 { 432 UChar i = 0x0000; 433 434 log_verbose("Testing for isalpha\n"); 435 for (i = 0x0041; i < 0x005B; i++) { 436 if (!u_isalpha(i)) 437 { 438 log_err("Failed isLetter test at %.4X\n", i); 439 } 440 } 441 for (i = 0x0660; i < 0x066A; i++) { 442 if (u_isalpha(i)) 443 { 444 log_err("Failed isLetter test with numbers at %.4X\n", i); 445 } 446 } 447 448 log_verbose("Testing for isdigit\n"); 449 for (i = 0x0660; i < 0x066A; i++) { 450 if (!u_isdigit(i)) 451 { 452 log_verbose("Failed isNumber test at %.4X\n", i); 453 } 454 } 455 456 log_verbose("Testing for isalnum\n"); 457 for (i = 0x0041; i < 0x005B; i++) { 458 if (!u_isalnum(i)) 459 { 460 log_err("Failed isAlNum test at %.4X\n", i); 461 } 462 } 463 for (i = 0x0660; i < 0x066A; i++) { 464 if (!u_isalnum(i)) 465 { 466 log_err("Failed isAlNum test at %.4X\n", i); 467 } 468 } 469 470 { 471 /* 472 * The following checks work only starting from Unicode 4.0. 473 * Check the version number here. 474 */ 475 static UVersionInfo u401={ 4, 0, 1, 0 }; 476 UVersionInfo version; 477 u_getUnicodeVersion(version); 478 if(version[0]<4 || 0==memcmp(version, u401, 4)) { 479 return; 480 } 481 } 482 483 { 484 /* 485 * Sanity check: 486 * Verify that exactly the digit characters have decimal digit values. 487 * This assumption is used in the implementation of u_digit() 488 * (which checks nt=de) 489 * compared with the parallel java.lang.Character.digit() 490 * (which checks Nd). 491 * 492 * This was not true in Unicode 3.2 and earlier. 493 * Unicode 4.0 fixed discrepancies. 494 * Unicode 4.0.1 re-introduced problems in this area due to an 495 * unintentionally incomplete last-minute change. 496 */ 497 U_STRING_DECL(digitsPattern, "[:Nd:]", 6); 498 U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24); 499 500 USet *digits, *decimalValues; 501 UErrorCode errorCode; 502 503 U_STRING_INIT(digitsPattern, "[:Nd:]", 6); 504 U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24); 505 errorCode=U_ZERO_ERROR; 506 digits=uset_openPattern(digitsPattern, 6, &errorCode); 507 decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode); 508 509 if(U_SUCCESS(errorCode)) { 510 compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE); 511 } 512 513 uset_close(digits); 514 uset_close(decimalValues); 515 } 516 } 517 518 static void testSampleCharProps(UBool propFn(UChar32), const char *propName, 519 const UChar32 *sampleChars, int32_t sampleCharsLength, 520 UBool expected) { 521 int32_t i; 522 for (i = 0; i < sampleCharsLength; ++i) { 523 UBool result = propFn(sampleChars[i]); 524 if (result != expected) { 525 log_err("error: character property function %s(U+%04x)=%d is wrong\n", 526 propName, sampleChars[i], result); 527 } 528 } 529 } 530 531 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */ 532 static void TestMisc() 533 { 534 static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005}; 535 static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74}; 536 static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e}; 537 static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd}; 538 static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2}; 539 static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B}; 540 /* static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/ 541 static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5}; 542 static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE}; 543 static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c}; 544 static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef}; 545 546 static const int32_t sampleDigitValues[] = {0, 2, 3, 5}; 547 548 uint32_t mask; 549 550 int32_t i; 551 char icuVersion[U_MAX_VERSION_STRING_LENGTH]; 552 UVersionInfo realVersion; 553 554 memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH); 555 556 testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, LENGTHOF(sampleSpaces), TRUE); 557 testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE); 558 559 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar", 560 sampleSpaces, LENGTHOF(sampleSpaces), TRUE); 561 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar", 562 sampleNonSpaces, LENGTHOF(sampleNonSpaces), FALSE); 563 564 testSampleCharProps(u_isWhitespace, "u_isWhitespace", 565 sampleWhiteSpaces, LENGTHOF(sampleWhiteSpaces), TRUE); 566 testSampleCharProps(u_isWhitespace, "u_isWhitespace", 567 sampleNonWhiteSpaces, LENGTHOF(sampleNonWhiteSpaces), FALSE); 568 569 testSampleCharProps(u_isdefined, "u_isdefined", 570 sampleDefined, LENGTHOF(sampleDefined), TRUE); 571 testSampleCharProps(u_isdefined, "u_isdefined", 572 sampleUndefined, LENGTHOF(sampleUndefined), FALSE); 573 574 testSampleCharProps(u_isbase, "u_isbase", sampleBase, LENGTHOF(sampleBase), TRUE); 575 testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, LENGTHOF(sampleNonBase), FALSE); 576 577 testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, LENGTHOF(sampleDigits), TRUE); 578 testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, LENGTHOF(sampleNonDigits), FALSE); 579 580 for (i = 0; i < LENGTHOF(sampleDigits); i++) { 581 if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) { 582 log_err("error: u_charDigitValue(U+04x)=%d != %d\n", 583 sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]); 584 } 585 } 586 587 /* Tests the ICU version #*/ 588 u_getVersion(realVersion); 589 u_versionToString(realVersion, icuVersion); 590 if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0) 591 { 592 log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion); 593 } 594 #if defined(ICU_VERSION) 595 /* test only happens where we have configure.in with VERSION - sanity check. */ 596 if(strcmp(U_ICU_VERSION, ICU_VERSION)) 597 { 598 log_err("ICU version mismatch: Header says %s, build environment says %s.\n", U_ICU_VERSION, ICU_VERSION); 599 } 600 #endif 601 602 /* test U_GC_... */ 603 if( 604 U_GET_GC_MASK(0x41)!=U_GC_LU_MASK || 605 U_GET_GC_MASK(0x662)!=U_GC_ND_MASK || 606 U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK || 607 U_GET_GC_MASK(0x28)!=U_GC_PS_MASK || 608 U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK || 609 U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK 610 ) { 611 log_err("error: U_GET_GC_MASK does not work properly\n"); 612 } 613 614 mask=0; 615 mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK; 616 617 mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK; 618 mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK; 619 mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK; 620 mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK; 621 mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK; 622 623 mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK; 624 mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK; 625 mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK; 626 627 mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK; 628 mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK; 629 mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK; 630 631 mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK; 632 mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK; 633 mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK; 634 635 mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK; 636 mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK; 637 mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK; 638 mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK; 639 640 mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK; 641 mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK; 642 mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK; 643 mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK; 644 mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK; 645 646 mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK; 647 mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK; 648 mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK; 649 mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK; 650 651 mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK; 652 mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK; 653 654 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) { 655 log_err("error: problems with U_GC_XX_MASK constants\n"); 656 } 657 658 mask=0; 659 mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK; 660 mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK; 661 mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK; 662 mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK; 663 mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK; 664 mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK; 665 mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK; 666 667 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) { 668 log_err("error: problems with U_GC_Y_MASK constants\n"); 669 } 670 { 671 static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 }; 672 for(i=0; i<10; i++){ 673 if(digit[i]!=u_forDigit(i,10)){ 674 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10)); 675 } 676 } 677 } 678 679 /* test u_digit() */ 680 { 681 static const struct { 682 UChar32 c; 683 int8_t radix, value; 684 } data[]={ 685 /* base 16 */ 686 { 0x0031, 16, 1 }, 687 { 0x0038, 16, 8 }, 688 { 0x0043, 16, 12 }, 689 { 0x0066, 16, 15 }, 690 { 0x00e4, 16, -1 }, 691 { 0x0662, 16, 2 }, 692 { 0x06f5, 16, 5 }, 693 { 0xff13, 16, 3 }, 694 { 0xff41, 16, 10 }, 695 696 /* base 8 */ 697 { 0x0031, 8, 1 }, 698 { 0x0038, 8, -1 }, 699 { 0x0043, 8, -1 }, 700 { 0x0066, 8, -1 }, 701 { 0x00e4, 8, -1 }, 702 { 0x0662, 8, 2 }, 703 { 0x06f5, 8, 5 }, 704 { 0xff13, 8, 3 }, 705 { 0xff41, 8, -1 }, 706 707 /* base 36 */ 708 { 0x5a, 36, 35 }, 709 { 0x7a, 36, 35 }, 710 { 0xff3a, 36, 35 }, 711 { 0xff5a, 36, 35 }, 712 713 /* wrong radix values */ 714 { 0x0031, 1, -1 }, 715 { 0xff3a, 37, -1 } 716 }; 717 718 for(i=0; i<LENGTHOF(data); ++i) { 719 if(u_digit(data[i].c, data[i].radix)!=data[i].value) { 720 log_err("u_digit(U+%04x, %d)=%d expected %d\n", 721 data[i].c, 722 data[i].radix, 723 u_digit(data[i].c, data[i].radix), 724 data[i].value); 725 } 726 } 727 } 728 } 729 730 /* test C/POSIX-style functions --------------------------------------------- */ 731 732 /* bit flags */ 733 #define ISAL 1 734 #define ISLO 2 735 #define ISUP 4 736 737 #define ISDI 8 738 #define ISXD 0x10 739 740 #define ISAN 0x20 741 742 #define ISPU 0x40 743 #define ISGR 0x80 744 #define ISPR 0x100 745 746 #define ISSP 0x200 747 #define ISBL 0x400 748 #define ISCN 0x800 749 750 /* C/POSIX-style functions, in the same order as the bit flags */ 751 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c); 752 753 static const struct { 754 IsPOSIXClass *fn; 755 const char *name; 756 } posixClasses[]={ 757 { u_isalpha, "isalpha" }, 758 { u_islower, "islower" }, 759 { u_isupper, "isupper" }, 760 { u_isdigit, "isdigit" }, 761 { u_isxdigit, "isxdigit" }, 762 { u_isalnum, "isalnum" }, 763 { u_ispunct, "ispunct" }, 764 { u_isgraph, "isgraph" }, 765 { u_isprint, "isprint" }, 766 { u_isspace, "isspace" }, 767 { u_isblank, "isblank" }, 768 { u_iscntrl, "iscntrl" } 769 }; 770 771 static const struct { 772 UChar32 c; 773 uint32_t posixResults; 774 } posixData[]={ 775 { 0x0008, ISCN }, /* backspace */ 776 { 0x0009, ISSP|ISBL|ISCN }, /* TAB */ 777 { 0x000a, ISSP| ISCN }, /* LF */ 778 { 0x000c, ISSP| ISCN }, /* FF */ 779 { 0x000d, ISSP| ISCN }, /* CR */ 780 { 0x0020, ISPR|ISSP|ISBL }, /* space */ 781 { 0x0021, ISPU|ISGR|ISPR }, /* ! */ 782 { 0x0033, ISDI|ISXD|ISAN| ISGR|ISPR }, /* 3 */ 783 { 0x0040, ISPU|ISGR|ISPR }, /* @ */ 784 { 0x0041, ISAL| ISUP| ISXD|ISAN| ISGR|ISPR }, /* A */ 785 { 0x007a, ISAL|ISLO| ISAN| ISGR|ISPR }, /* z */ 786 { 0x007b, ISPU|ISGR|ISPR }, /* { */ 787 { 0x0085, ISSP| ISCN }, /* NEL */ 788 { 0x00a0, ISPR|ISSP|ISBL }, /* NBSP */ 789 { 0x00a4, ISGR|ISPR }, /* currency sign */ 790 { 0x00e4, ISAL|ISLO| ISAN| ISGR|ISPR }, /* a-umlaut */ 791 { 0x0300, ISGR|ISPR }, /* combining grave */ 792 { 0x0600, ISCN }, /* arabic number sign */ 793 { 0x0627, ISAL| ISAN| ISGR|ISPR }, /* alef */ 794 { 0x0663, ISDI|ISXD|ISAN| ISGR|ISPR }, /* arabic 3 */ 795 { 0x2002, ISPR|ISSP|ISBL }, /* en space */ 796 { 0x2007, ISPR|ISSP|ISBL }, /* figure space */ 797 { 0x2009, ISPR|ISSP|ISBL }, /* thin space */ 798 { 0x200b, ISCN }, /* ZWSP */ 799 /*{ 0x200b, ISPR|ISSP },*/ /* ZWSP */ /* ZWSP became a control char in 4.0.1*/ 800 { 0x200e, ISCN }, /* LRM */ 801 { 0x2028, ISPR|ISSP| ISCN }, /* LS */ 802 { 0x2029, ISPR|ISSP| ISCN }, /* PS */ 803 { 0x20ac, ISGR|ISPR }, /* Euro */ 804 { 0xff15, ISDI|ISXD|ISAN| ISGR|ISPR }, /* fullwidth 5 */ 805 { 0xff25, ISAL| ISUP| ISXD|ISAN| ISGR|ISPR }, /* fullwidth E */ 806 { 0xff35, ISAL| ISUP| ISAN| ISGR|ISPR }, /* fullwidth U */ 807 { 0xff45, ISAL|ISLO| ISXD|ISAN| ISGR|ISPR }, /* fullwidth e */ 808 { 0xff55, ISAL|ISLO| ISAN| ISGR|ISPR } /* fullwidth u */ 809 }; 810 811 static void 812 TestPOSIX() { 813 uint32_t mask; 814 int32_t cl, i; 815 UBool expect; 816 817 mask=1; 818 for(cl=0; cl<12; ++cl) { 819 for(i=0; i<LENGTHOF(posixData); ++i) { 820 expect=(UBool)((posixData[i].posixResults&mask)!=0); 821 if(posixClasses[cl].fn(posixData[i].c)!=expect) { 822 log_err("u_%s(U+%04x)=%s is wrong\n", 823 posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE"); 824 } 825 } 826 mask<<=1; 827 } 828 } 829 830 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */ 831 static void TestControlPrint() 832 { 833 const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b}; 834 const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2}; 835 const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014}; 836 const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b}; 837 UChar32 c; 838 839 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, LENGTHOF(sampleControl), TRUE); 840 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, LENGTHOF(sampleNonControl), FALSE); 841 842 testSampleCharProps(u_isprint, "u_isprint", 843 samplePrintable, LENGTHOF(samplePrintable), TRUE); 844 testSampleCharProps(u_isprint, "u_isprint", 845 sampleNonPrintable, LENGTHOF(sampleNonPrintable), FALSE); 846 847 /* test all ISO 8 controls */ 848 for(c=0; c<=0x9f; ++c) { 849 if(c==0x20) { 850 /* skip ASCII graphic characters and continue with DEL */ 851 c=0x7f; 852 } 853 if(!u_iscntrl(c)) { 854 log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c); 855 } 856 if(!u_isISOControl(c)) { 857 log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c); 858 } 859 if(u_isprint(c)) { 860 log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c); 861 } 862 } 863 864 /* test all Latin-1 graphic characters */ 865 for(c=0x20; c<=0xff; ++c) { 866 if(c==0x7f) { 867 c=0xa0; 868 } else if(c==0xad) { 869 /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */ 870 ++c; 871 } 872 if(!u_isprint(c)) { 873 log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c); 874 } 875 } 876 } 877 878 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/ 879 static void TestIdentifier() 880 { 881 const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f}; 882 const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082}; 883 const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045}; 884 const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020}; 885 const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061}; 886 const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019}; 887 const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045}; 888 const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020}; 889 const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85}; 890 const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061}; 891 892 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart", 893 sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE); 894 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart", 895 sampleNonJavaIDStart, LENGTHOF(sampleNonJavaIDStart), FALSE); 896 897 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart", 898 sampleJavaIDPart, LENGTHOF(sampleJavaIDPart), TRUE); 899 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart", 900 sampleNonJavaIDPart, LENGTHOF(sampleNonJavaIDPart), FALSE); 901 902 /* IDPart should imply IDStart */ 903 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart", 904 sampleJavaIDStart, LENGTHOF(sampleJavaIDStart), TRUE); 905 906 testSampleCharProps(u_isIDStart, "u_isIDStart", 907 sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE); 908 testSampleCharProps(u_isIDStart, "u_isIDStart", 909 sampleNonUnicodeIDStart, LENGTHOF(sampleNonUnicodeIDStart), FALSE); 910 911 testSampleCharProps(u_isIDPart, "u_isIDPart", 912 sampleUnicodeIDPart, LENGTHOF(sampleUnicodeIDPart), TRUE); 913 testSampleCharProps(u_isIDPart, "u_isIDPart", 914 sampleNonUnicodeIDPart, LENGTHOF(sampleNonUnicodeIDPart), FALSE); 915 916 /* IDPart should imply IDStart */ 917 testSampleCharProps(u_isIDPart, "u_isIDPart", 918 sampleUnicodeIDStart, LENGTHOF(sampleUnicodeIDStart), TRUE); 919 920 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable", 921 sampleIDIgnore, LENGTHOF(sampleIDIgnore), TRUE); 922 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable", 923 sampleNonIDIgnore, LENGTHOF(sampleNonIDIgnore), FALSE); 924 } 925 926 /* for each line of UnicodeData.txt, check some of the properties */ 927 /* 928 * ### TODO 929 * This test fails incorrectly if the First or Last code point of a repetitive area 930 * is overridden, which is allowed and is encouraged for the PUAs. 931 * Currently, this means that both area First/Last and override lines are 932 * tested against the properties from the API, 933 * and the area boundary will not match and cause an error. 934 * 935 * This function should detect area boundaries and skip them for the test of individual 936 * code points' properties. 937 * Then it should check that the areas contain all the same properties except where overridden. 938 * For this, it would have had to set a flag for which code points were listed explicitly. 939 */ 940 static void U_CALLCONV 941 unicodeDataLineFn(void *context, 942 char *fields[][2], int32_t fieldCount, 943 UErrorCode *pErrorCode) 944 { 945 char buffer[100]; 946 char *end; 947 uint32_t value; 948 UChar32 c; 949 int32_t i; 950 int8_t type; 951 952 /* get the character code, field 0 */ 953 c=strtoul(fields[0][0], &end, 16); 954 if(end<=fields[0][0] || end!=fields[0][1]) { 955 log_err("error: syntax error in field 0 at %s\n", fields[0][0]); 956 return; 957 } 958 if((uint32_t)c>=UCHAR_MAX_VALUE + 1) { 959 log_err("error in UnicodeData.txt: code point %lu out of range\n", c); 960 return; 961 } 962 963 /* get general category, field 2 */ 964 *fields[2][1]=0; 965 type = (int8_t)tagValues[MakeProp(fields[2][0])]; 966 if(u_charType(c)!=type) { 967 log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type); 968 } 969 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) { 970 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c); 971 } 972 973 /* get canonical combining class, field 3 */ 974 value=strtoul(fields[3][0], &end, 10); 975 if(end<=fields[3][0] || end!=fields[3][1]) { 976 log_err("error: syntax error in field 3 at code 0x%lx\n", c); 977 return; 978 } 979 if(value>255) { 980 log_err("error in UnicodeData.txt: combining class %lu out of range\n", value); 981 return; 982 } 983 #if !UCONFIG_NO_NORMALIZATION 984 if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) { 985 log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value); 986 } 987 #endif 988 989 /* get BiDi category, field 4 */ 990 *fields[4][1]=0; 991 i=MakeDir(fields[4][0]); 992 if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) { 993 log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]); 994 } 995 996 /* get ISO Comment, field 11 */ 997 *fields[11][1]=0; 998 i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode); 999 if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) { 1000 log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n", 1001 c, u_errorName(*pErrorCode), 1002 U_FAILURE(*pErrorCode) ? buffer : "[error]", 1003 fields[11][0]); 1004 } 1005 1006 /* get uppercase mapping, field 12 */ 1007 if(fields[12][0]!=fields[12][1]) { 1008 value=strtoul(fields[12][0], &end, 16); 1009 if(end!=fields[12][1]) { 1010 log_err("error: syntax error in field 12 at code 0x%lx\n", c); 1011 return; 1012 } 1013 if((UChar32)value!=u_toupper(c)) { 1014 log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value); 1015 } 1016 } else { 1017 /* no case mapping: the API must map the code point to itself */ 1018 if(c!=u_toupper(c)) { 1019 log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c)); 1020 } 1021 } 1022 1023 /* get lowercase mapping, field 13 */ 1024 if(fields[13][0]!=fields[13][1]) { 1025 value=strtoul(fields[13][0], &end, 16); 1026 if(end!=fields[13][1]) { 1027 log_err("error: syntax error in field 13 at code 0x%lx\n", c); 1028 return; 1029 } 1030 if((UChar32)value!=u_tolower(c)) { 1031 log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value); 1032 } 1033 } else { 1034 /* no case mapping: the API must map the code point to itself */ 1035 if(c!=u_tolower(c)) { 1036 log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c)); 1037 } 1038 } 1039 1040 /* get titlecase mapping, field 14 */ 1041 if(fields[14][0]!=fields[14][1]) { 1042 value=strtoul(fields[14][0], &end, 16); 1043 if(end!=fields[14][1]) { 1044 log_err("error: syntax error in field 14 at code 0x%lx\n", c); 1045 return; 1046 } 1047 if((UChar32)value!=u_totitle(c)) { 1048 log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value); 1049 } 1050 } else { 1051 /* no case mapping: the API must map the code point to itself */ 1052 if(c!=u_totitle(c)) { 1053 log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c)); 1054 } 1055 } 1056 } 1057 1058 static UBool U_CALLCONV 1059 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) { 1060 static const UChar32 test[][2]={ 1061 {0x41, U_UPPERCASE_LETTER}, 1062 {0x308, U_NON_SPACING_MARK}, 1063 {0xfffe, U_GENERAL_OTHER_TYPES}, 1064 {0xe0041, U_FORMAT_CHAR}, 1065 {0xeffff, U_UNASSIGNED} 1066 }; 1067 1068 int32_t i, count; 1069 1070 if(0!=strcmp((const char *)context, "a1")) { 1071 log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n"); 1072 return FALSE; 1073 } 1074 1075 count=LENGTHOF(test); 1076 for(i=0; i<count; ++i) { 1077 if(start<=test[i][0] && test[i][0]<limit) { 1078 if(type!=(UCharCategory)test[i][1]) { 1079 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n", 1080 start, limit, (long)type, test[i][0], test[i][1]); 1081 } 1082 /* stop at the range that includes the last test code point (increases code coverage for enumeration) */ 1083 return i==(count-1) ? FALSE : TRUE; 1084 } 1085 } 1086 1087 if(start>test[count-1][0]) { 1088 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n", 1089 start, limit, (long)type); 1090 return FALSE; 1091 } 1092 1093 return TRUE; 1094 } 1095 1096 static UBool U_CALLCONV 1097 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) { 1098 /* default Bidi classes for unassigned code points */ 1099 static const int32_t defaultBidi[][2]={ /* { limit, class } */ 1100 { 0x0590, U_LEFT_TO_RIGHT }, 1101 { 0x0600, U_RIGHT_TO_LEFT }, 1102 { 0x07C0, U_RIGHT_TO_LEFT_ARABIC }, 1103 { 0x0900, U_RIGHT_TO_LEFT }, 1104 { 0xFB1D, U_LEFT_TO_RIGHT }, 1105 { 0xFB50, U_RIGHT_TO_LEFT }, 1106 { 0xFE00, U_RIGHT_TO_LEFT_ARABIC }, 1107 { 0xFE70, U_LEFT_TO_RIGHT }, 1108 { 0xFF00, U_RIGHT_TO_LEFT_ARABIC }, 1109 { 0x10800, U_LEFT_TO_RIGHT }, 1110 { 0x11000, U_RIGHT_TO_LEFT }, 1111 { 0x1E800, U_LEFT_TO_RIGHT }, /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */ 1112 { 0x1F000, U_RIGHT_TO_LEFT }, 1113 { 0x110000, U_LEFT_TO_RIGHT } 1114 }; 1115 1116 UChar32 c; 1117 int32_t i; 1118 UCharDirection shouldBeDir; 1119 1120 /* 1121 * LineBreak.txt specifies: 1122 * # - Assigned characters that are not listed explicitly are given the value 1123 * # "AL". 1124 * # - Unassigned characters are given the value "XX". 1125 * 1126 * PUA characters are listed explicitly with "XX". 1127 * Verify that no assigned character has "XX". 1128 */ 1129 if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) { 1130 c=start; 1131 while(c<limit) { 1132 if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) { 1133 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c); 1134 } 1135 ++c; 1136 } 1137 } 1138 1139 /* 1140 * Verify default Bidi classes. 1141 * For recent Unicode versions, see UCD.html. 1142 * 1143 * For older Unicode versions: 1144 * See table 3-7 "Bidirectional Character Types" in UAX #9. 1145 * http://www.unicode.org/reports/tr9/ 1146 * 1147 * See also DerivedBidiClass.txt for Cn code points! 1148 * 1149 * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html) 1150 * changed some default values. 1151 * In particular, non-characters and unassigned Default Ignorable Code Points 1152 * change from L to BN. 1153 * 1154 * UCD.html version 4.0.1 does not yet reflect these changes. 1155 */ 1156 if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) { 1157 /* enumerate the intersections of defaultBidi ranges with [start..limit[ */ 1158 c=start; 1159 for(i=0; i<LENGTHOF(defaultBidi) && c<limit; ++i) { 1160 if((int32_t)c<defaultBidi[i][0]) { 1161 while(c<limit && (int32_t)c<defaultBidi[i][0]) { 1162 if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) { 1163 shouldBeDir=U_BOUNDARY_NEUTRAL; 1164 } else { 1165 shouldBeDir=(UCharDirection)defaultBidi[i][1]; 1166 } 1167 1168 if( u_charDirection(c)!=shouldBeDir || 1169 u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir 1170 ) { 1171 log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n", 1172 c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]); 1173 } 1174 ++c; 1175 } 1176 } 1177 } 1178 } 1179 1180 return TRUE; 1181 } 1182 1183 /* tests for several properties */ 1184 static void TestUnicodeData() 1185 { 1186 UVersionInfo expectVersionArray; 1187 UVersionInfo versionArray; 1188 char *fields[15][2]; 1189 UErrorCode errorCode; 1190 UChar32 c; 1191 int8_t type; 1192 1193 u_versionFromString(expectVersionArray, U_UNICODE_VERSION); 1194 u_getUnicodeVersion(versionArray); 1195 if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0) 1196 { 1197 log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n", 1198 versionArray[0], versionArray[1], versionArray[2], versionArray[3]); 1199 } 1200 1201 #if defined(ICU_UNICODE_VERSION) 1202 /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */ 1203 if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION)) 1204 { 1205 log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n"); 1206 } 1207 #endif 1208 1209 if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) { 1210 log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041)); 1211 } 1212 1213 errorCode=U_ZERO_ERROR; 1214 parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, NULL, &errorCode); 1215 if(U_FAILURE(errorCode)) { 1216 return; /* if we couldn't parse UnicodeData.txt, we should return */ 1217 } 1218 1219 /* sanity check on repeated properties */ 1220 for(c=0xfffe; c<=0x10ffff;) { 1221 type=u_charType(c); 1222 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) { 1223 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c); 1224 } 1225 if(type!=U_UNASSIGNED) { 1226 log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c)); 1227 } 1228 if((c&0xffff)==0xfffe) { 1229 ++c; 1230 } else { 1231 c+=0xffff; 1232 } 1233 } 1234 1235 /* test that PUA is not "unassigned" */ 1236 for(c=0xe000; c<=0x10fffd;) { 1237 type=u_charType(c); 1238 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) { 1239 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c); 1240 } 1241 if(type==U_UNASSIGNED) { 1242 log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c); 1243 } else if(type!=U_PRIVATE_USE_CHAR) { 1244 log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type); 1245 } 1246 if(c==0xf8ff) { 1247 c=0xf0000; 1248 } else if(c==0xffffd) { 1249 c=0x100000; 1250 } else { 1251 ++c; 1252 } 1253 } 1254 1255 /* test u_enumCharTypes() */ 1256 u_enumCharTypes(enumTypeRange, "a1"); 1257 1258 /* check default properties */ 1259 u_enumCharTypes(enumDefaultsRange, NULL); 1260 } 1261 1262 static void TestCodeUnit(){ 1263 const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0}; 1264 1265 int32_t i; 1266 1267 for(i=0; i<(int32_t)(sizeof(codeunit)/sizeof(codeunit[0])); i++){ 1268 UChar c=codeunit[i]; 1269 if(i<4){ 1270 if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){ 1271 log_err("ERROR: U+%04x is a single", c); 1272 } 1273 1274 } 1275 if(i >= 4 && i< 8){ 1276 if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){ 1277 log_err("ERROR: U+%04x is a first surrogate", c); 1278 } 1279 } 1280 if(i >= 8 && i< 12){ 1281 if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){ 1282 log_err("ERROR: U+%04x is a second surrogate", c); 1283 } 1284 } 1285 } 1286 1287 } 1288 1289 static void TestCodePoint(){ 1290 const UChar32 codePoint[]={ 1291 /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */ 1292 0xd800, 1293 0xdbff, 1294 0xdc00, 1295 0xdfff, 1296 0xdc04, 1297 0xd821, 1298 /*not a surrogate, valid, isUnicodeChar , not Error*/ 1299 0x20ac, 1300 0xd7ff, 1301 0xe000, 1302 0xe123, 1303 0x0061, 1304 0xe065, 1305 0x20402, 1306 0x24506, 1307 0x23456, 1308 0x20402, 1309 0x10402, 1310 0x23456, 1311 /*not a surrogate, not valid, isUnicodeChar, isError */ 1312 0x0015, 1313 0x009f, 1314 /*not a surrogate, not valid, not isUnicodeChar, isError */ 1315 0xffff, 1316 0xfffe, 1317 }; 1318 int32_t i; 1319 for(i=0; i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0])); i++){ 1320 UChar32 c=codePoint[i]; 1321 if(i<6){ 1322 if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){ 1323 log_err("ERROR: isSurrogate() failed for U+%04x\n", c); 1324 } 1325 if(UTF_IS_VALID(c)){ 1326 log_err("ERROR: isValid() failed for U+%04x\n", c); 1327 } 1328 if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){ 1329 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c); 1330 } 1331 if(UTF_IS_ERROR(c)){ 1332 log_err("ERROR: isError() failed for U+%04x\n", c); 1333 } 1334 }else if(i >=6 && i<18){ 1335 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){ 1336 log_err("ERROR: isSurrogate() failed for U+%04x\n", c); 1337 } 1338 if(!UTF_IS_VALID(c)){ 1339 log_err("ERROR: isValid() failed for U+%04x\n", c); 1340 } 1341 if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){ 1342 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c); 1343 } 1344 if(UTF_IS_ERROR(c)){ 1345 log_err("ERROR: isError() failed for U+%04x\n", c); 1346 } 1347 }else if(i >=18 && i<20){ 1348 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){ 1349 log_err("ERROR: isSurrogate() failed for U+%04x\n", c); 1350 } 1351 if(UTF_IS_VALID(c)){ 1352 log_err("ERROR: isValid() failed for U+%04x\n", c); 1353 } 1354 if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){ 1355 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c); 1356 } 1357 if(!UTF_IS_ERROR(c)){ 1358 log_err("ERROR: isError() failed for U+%04x\n", c); 1359 } 1360 } 1361 else if(i >=18 && i<(int32_t)(sizeof(codePoint)/sizeof(codePoint[0]))){ 1362 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){ 1363 log_err("ERROR: isSurrogate() failed for U+%04x\n", c); 1364 } 1365 if(UTF_IS_VALID(c)){ 1366 log_err("ERROR: isValid() failed for U+%04x\n", c); 1367 } 1368 if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){ 1369 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c); 1370 } 1371 if(!UTF_IS_ERROR(c)){ 1372 log_err("ERROR: isError() failed for U+%04x\n", c); 1373 } 1374 } 1375 } 1376 1377 if( 1378 !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) || 1379 !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) || 1380 U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) || 1381 U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff) 1382 ) { 1383 log_err("error with U_IS_BMP()\n"); 1384 } 1385 1386 if( 1387 U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) || 1388 U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) || 1389 U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) || 1390 !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff) 1391 ) { 1392 log_err("error with U_IS_SUPPLEMENTARY()\n"); 1393 } 1394 } 1395 1396 static void TestCharLength() 1397 { 1398 const int32_t codepoint[]={ 1399 1, 0x0061, 1400 1, 0xe065, 1401 1, 0x20ac, 1402 2, 0x20402, 1403 2, 0x23456, 1404 2, 0x24506, 1405 2, 0x20402, 1406 2, 0x10402, 1407 1, 0xd7ff, 1408 1, 0xe000 1409 }; 1410 1411 int32_t i; 1412 UBool multiple; 1413 for(i=0; i<(int32_t)(sizeof(codepoint)/sizeof(codepoint[0])); i=(int16_t)(i+2)){ 1414 UChar32 c=codepoint[i+1]; 1415 if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){ 1416 log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], UTF_CHAR_LENGTH(c)); 1417 } 1418 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE); 1419 if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){ 1420 log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c); 1421 } 1422 } 1423 } 1424 1425 /*internal functions ----*/ 1426 static int32_t MakeProp(char* str) 1427 { 1428 int32_t result = 0; 1429 char* matchPosition =0; 1430 1431 matchPosition = strstr(tagStrings, str); 1432 if (matchPosition == 0) 1433 { 1434 log_err("unrecognized type letter "); 1435 log_err(str); 1436 } 1437 else 1438 result = (int32_t)((matchPosition - tagStrings) / 2); 1439 return result; 1440 } 1441 1442 static int32_t MakeDir(char* str) 1443 { 1444 int32_t pos = 0; 1445 for (pos = 0; pos < 19; pos++) { 1446 if (strcmp(str, dirStrings[pos]) == 0) { 1447 return pos; 1448 } 1449 } 1450 return -1; 1451 } 1452 1453 /* test u_charName() -------------------------------------------------------- */ 1454 1455 static const struct { 1456 uint32_t code; 1457 const char *name, *oldName, *extName, *alias; 1458 } names[]={ 1459 {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"}, 1460 {0x01a2, "LATIN CAPITAL LETTER OI", 1461 "LATIN CAPITAL LETTER O I", 1462 "LATIN CAPITAL LETTER OI", 1463 "LATIN CAPITAL LETTER GHA"}, 1464 {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", 1465 "LATIN SMALL LETTER DOTLESS J BAR HOOK", 1466 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" }, 1467 {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "", 1468 "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", 1469 "TIBETAN MARK BKA- SHOG GI MGO RGYAN"}, 1470 {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" }, 1471 {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" }, 1472 {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" }, 1473 {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" }, 1474 {0xd800, "", "", "<lead surrogate-D800>" }, 1475 {0xdc00, "", "", "<trail surrogate-DC00>" }, 1476 {0xff08, "FULLWIDTH LEFT PARENTHESIS", "FULLWIDTH OPENING PARENTHESIS", "FULLWIDTH LEFT PARENTHESIS" }, 1477 {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" }, 1478 {0xffff, "", "", "<noncharacter-FFFF>" }, 1479 {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "", 1480 "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", 1481 "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"}, 1482 {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" } 1483 }; 1484 1485 static UBool 1486 enumCharNamesFn(void *context, 1487 UChar32 code, UCharNameChoice nameChoice, 1488 const char *name, int32_t length) { 1489 int32_t *pCount=(int32_t *)context; 1490 const char *expected; 1491 int i; 1492 1493 if(length<=0 || length!=(int32_t)strlen(name)) { 1494 /* should not be called with an empty string or invalid length */ 1495 log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length); 1496 return TRUE; 1497 } 1498 1499 ++*pCount; 1500 for(i=0; i<sizeof(names)/sizeof(names[0]); ++i) { 1501 if(code==(UChar32)names[i].code) { 1502 switch (nameChoice) { 1503 case U_EXTENDED_CHAR_NAME: 1504 if(0!=strcmp(name, names[i].extName)) { 1505 log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName); 1506 } 1507 break; 1508 case U_UNICODE_CHAR_NAME: 1509 if(0!=strcmp(name, names[i].name)) { 1510 log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name); 1511 } 1512 break; 1513 case U_UNICODE_10_CHAR_NAME: 1514 expected=names[i].oldName; 1515 if(expected[0]==0 || 0!=strcmp(name, expected)) { 1516 log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected); 1517 } 1518 break; 1519 case U_CHAR_NAME_ALIAS: 1520 expected=names[i].alias; 1521 if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) { 1522 log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected); 1523 } 1524 break; 1525 case U_CHAR_NAME_CHOICE_COUNT: 1526 break; 1527 } 1528 break; 1529 } 1530 } 1531 return TRUE; 1532 } 1533 1534 struct enumExtCharNamesContext { 1535 uint32_t length; 1536 int32_t last; 1537 }; 1538 1539 static UBool 1540 enumExtCharNamesFn(void *context, 1541 UChar32 code, UCharNameChoice nameChoice, 1542 const char *name, int32_t length) { 1543 struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context; 1544 1545 if (ecncp->last != (int32_t) code - 1) { 1546 if (ecncp->last < 0) { 1547 log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1); 1548 } else { 1549 log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code); 1550 } 1551 } 1552 ecncp->last = (int32_t) code; 1553 1554 if (!*name) { 1555 log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code); 1556 } 1557 1558 return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length); 1559 } 1560 1561 /** 1562 * This can be made more efficient by moving it into putil.c and having 1563 * it directly access the ebcdic translation tables. 1564 * TODO: If we get this method in putil.c, then delete it from here. 1565 */ 1566 static UChar 1567 u_charToUChar(char c) { 1568 UChar uc; 1569 u_charsToUChars(&c, &uc, 1); 1570 return uc; 1571 } 1572 1573 static void 1574 TestCharNames() { 1575 static char name[80]; 1576 UErrorCode errorCode=U_ZERO_ERROR; 1577 struct enumExtCharNamesContext extContext; 1578 const char *expected; 1579 int32_t length; 1580 UChar32 c; 1581 int32_t i; 1582 1583 log_verbose("Testing uprv_getMaxCharNameLength()\n"); 1584 length=uprv_getMaxCharNameLength(); 1585 if(length==0) { 1586 /* no names data available */ 1587 return; 1588 } 1589 if(length<83) { /* Unicode 3.2 max char name length */ 1590 log_err("uprv_getMaxCharNameLength()=%d is too short"); 1591 } 1592 /* ### TODO same tests for max ISO comment length as for max name length */ 1593 1594 log_verbose("Testing u_charName()\n"); 1595 for(i=0; i<(int32_t)(sizeof(names)/sizeof(names[0])); ++i) { 1596 /* modern Unicode character name */ 1597 length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode); 1598 if(U_FAILURE(errorCode)) { 1599 log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode)); 1600 return; 1601 } 1602 if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) { 1603 log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name); 1604 } 1605 1606 /* find the modern name */ 1607 if (*names[i].name) { 1608 c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode); 1609 if(U_FAILURE(errorCode)) { 1610 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode)); 1611 return; 1612 } 1613 if(c!=(UChar32)names[i].code) { 1614 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code); 1615 } 1616 } 1617 1618 /* Unicode 1.0 character name */ 1619 length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode); 1620 if(U_FAILURE(errorCode)) { 1621 log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode)); 1622 return; 1623 } 1624 if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) { 1625 log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName); 1626 } 1627 1628 /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */ 1629 if(names[i].oldName[0]!=0 /* && length>0 */) { 1630 c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode); 1631 if(U_FAILURE(errorCode)) { 1632 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode)); 1633 return; 1634 } 1635 if(c!=(UChar32)names[i].code) { 1636 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code); 1637 } 1638 } 1639 1640 /* Unicode character name alias */ 1641 length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode); 1642 if(U_FAILURE(errorCode)) { 1643 log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode)); 1644 return; 1645 } 1646 expected=names[i].alias; 1647 if(expected==NULL) { 1648 expected=""; 1649 } 1650 if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) { 1651 log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n", 1652 names[i].code, name, length, expected); 1653 } 1654 1655 /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */ 1656 if(expected[0]!=0 /* && length>0 */) { 1657 c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode); 1658 if(U_FAILURE(errorCode)) { 1659 log_err("u_charFromName(%s - alias) error %s\n", 1660 expected, u_errorName(errorCode)); 1661 return; 1662 } 1663 if(c!=(UChar32)names[i].code) { 1664 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n", 1665 expected, c, names[i].code); 1666 } 1667 } 1668 } 1669 1670 /* test u_enumCharNames() */ 1671 length=0; 1672 errorCode=U_ZERO_ERROR; 1673 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode); 1674 if(U_FAILURE(errorCode) || length<94140) { 1675 log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length); 1676 } 1677 1678 extContext.length = 0; 1679 extContext.last = -1; 1680 errorCode=U_ZERO_ERROR; 1681 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode); 1682 if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) { 1683 log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length); 1684 } 1685 1686 /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */ 1687 if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) { 1688 log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode)); 1689 } 1690 1691 /* Test getCharNameCharacters */ 1692 if(!QUICK) { 1693 enum { BUFSIZE = 256 }; 1694 UErrorCode ec = U_ZERO_ERROR; 1695 char buf[BUFSIZE]; 1696 int32_t maxLength; 1697 UChar32 cp; 1698 UChar pat[BUFSIZE], dumbPat[BUFSIZE]; 1699 int32_t l1, l2; 1700 UBool map[256]; 1701 UBool ok; 1702 1703 USet* set = uset_open(1, 0); /* empty set */ 1704 USet* dumb = uset_open(1, 0); /* empty set */ 1705 1706 /* 1707 * uprv_getCharNameCharacters() will likely return more lowercase 1708 * letters than actual character names contain because 1709 * it includes all the characters in lowercased names of 1710 * general categories, for the full possible set of extended names. 1711 */ 1712 { 1713 USetAdder sa={ 1714 NULL, 1715 uset_add, 1716 uset_addRange, 1717 uset_addString, 1718 NULL /* don't need remove() */ 1719 }; 1720 sa.set=set; 1721 uprv_getCharNameCharacters(&sa); 1722 } 1723 1724 /* build set the dumb (but sure-fire) way */ 1725 for (i=0; i<256; ++i) { 1726 map[i] = FALSE; 1727 } 1728 1729 maxLength=0; 1730 for (cp=0; cp<0x110000; ++cp) { 1731 int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME, 1732 buf, BUFSIZE, &ec); 1733 if (U_FAILURE(ec)) { 1734 log_err("FAIL: u_charName failed when it shouldn't\n"); 1735 uset_close(set); 1736 uset_close(dumb); 1737 return; 1738 } 1739 if(len>maxLength) { 1740 maxLength=len; 1741 } 1742 1743 for (i=0; i<len; ++i) { 1744 if (!map[(uint8_t) buf[i]]) { 1745 uset_add(dumb, (UChar32)u_charToUChar(buf[i])); 1746 map[(uint8_t) buf[i]] = TRUE; 1747 } 1748 } 1749 1750 /* test for leading/trailing whitespace */ 1751 if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') { 1752 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp); 1753 } 1754 } 1755 1756 if(map[(uint8_t)'\t']) { 1757 log_err("u_charName() returned a name with a TAB for some code point\n", cp); 1758 } 1759 1760 length=uprv_getMaxCharNameLength(); 1761 if(length!=maxLength) { 1762 log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n", 1763 length, maxLength); 1764 } 1765 1766 /* compare the sets. Where is my uset_equals?!! */ 1767 ok=TRUE; 1768 for(i=0; i<256; ++i) { 1769 if(uset_contains(set, i)!=uset_contains(dumb, i)) { 1770 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) { 1771 /* ignore lowercase a-z that are in set but not in dumb */ 1772 ok=TRUE; 1773 } else { 1774 ok=FALSE; 1775 break; 1776 } 1777 } 1778 } 1779 1780 l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec); 1781 l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec); 1782 if (U_FAILURE(ec)) { 1783 log_err("FAIL: uset_toPattern failed when it shouldn't\n"); 1784 uset_close(set); 1785 uset_close(dumb); 1786 return; 1787 } 1788 1789 if (l1 >= BUFSIZE) { 1790 l1 = BUFSIZE-1; 1791 pat[l1] = 0; 1792 } 1793 if (l2 >= BUFSIZE) { 1794 l2 = BUFSIZE-1; 1795 dumbPat[l2] = 0; 1796 } 1797 1798 if (!ok) { 1799 log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n", 1800 aescstrdup(pat, l1), aescstrdup(dumbPat, l2)); 1801 } else if(VERBOSITY) { 1802 log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1)); 1803 } 1804 1805 uset_close(set); 1806 uset_close(dumb); 1807 } 1808 1809 /* ### TODO: test error cases and other interesting things */ 1810 } 1811 1812 /* test u_isMirrored() and u_charMirror() ----------------------------------- */ 1813 1814 static void 1815 TestMirroring() { 1816 USet *set; 1817 UErrorCode errorCode; 1818 1819 UChar32 start, end, c2, c3; 1820 int32_t i; 1821 1822 U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17); 1823 1824 U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17); 1825 1826 log_verbose("Testing u_isMirrored()\n"); 1827 if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) && 1828 !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400) 1829 ) 1830 ) { 1831 log_err("u_isMirrored() does not work correctly\n"); 1832 } 1833 1834 log_verbose("Testing u_charMirror()\n"); 1835 if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 && 1836 u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */ 1837 u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab && 1838 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */ 1839 u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d 1840 ) 1841 ) { 1842 log_err("u_charMirror() does not work correctly\n"); 1843 } 1844 1845 /* verify that Bidi_Mirroring_Glyph roundtrips */ 1846 errorCode=U_ZERO_ERROR; 1847 set=uset_openPattern(mirroredPattern, 17, &errorCode); 1848 1849 if (U_FAILURE(errorCode)) { 1850 log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n"); 1851 } else { 1852 for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) { 1853 do { 1854 c2=u_charMirror(start); 1855 c3=u_charMirror(c2); 1856 if(c3!=start) { 1857 log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3); 1858 } 1859 } while(++start<=end); 1860 } 1861 } 1862 1863 uset_close(set); 1864 } 1865 1866 1867 struct RunTestData 1868 { 1869 const char *runText; 1870 UScriptCode runCode; 1871 }; 1872 1873 typedef struct RunTestData RunTestData; 1874 1875 static void 1876 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns, 1877 const char *prefix) 1878 { 1879 int32_t run, runStart, runLimit; 1880 UScriptCode runCode; 1881 1882 /* iterate over all the runs */ 1883 run = 0; 1884 while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) { 1885 if (runStart != runStarts[run]) { 1886 log_err("%s: incorrect start offset for run %d: expected %d, got %d\n", 1887 prefix, run, runStarts[run], runStart); 1888 } 1889 1890 if (runLimit != runStarts[run + 1]) { 1891 log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n", 1892 prefix, run, runStarts[run + 1], runLimit); 1893 } 1894 1895 if (runCode != testData[run].runCode) { 1896 log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n", 1897 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode)); 1898 } 1899 1900 run += 1; 1901 1902 /* stop when we've seen all the runs we expect to see */ 1903 if (run >= nRuns) { 1904 break; 1905 } 1906 } 1907 1908 /* Complain if we didn't see then number of runs we expected */ 1909 if (run != nRuns) { 1910 log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns); 1911 } 1912 } 1913 1914 static void 1915 TestUScriptRunAPI() 1916 { 1917 static const RunTestData testData1[] = { 1918 {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI}, 1919 {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC}, 1920 {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC}, 1921 {"English (", USCRIPT_LATIN}, 1922 {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI}, 1923 {") ", USCRIPT_LATIN}, 1924 {"\\u6F22\\u5B75", USCRIPT_HAN}, 1925 {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA}, 1926 {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA}, 1927 {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET} 1928 }; 1929 1930 static const RunTestData testData2[] = { 1931 {"((((((((((abc))))))))))", USCRIPT_LATIN} 1932 }; 1933 1934 static const struct { 1935 const RunTestData *testData; 1936 int32_t nRuns; 1937 } testDataEntries[] = { 1938 {testData1, LENGTHOF(testData1)}, 1939 {testData2, LENGTHOF(testData2)} 1940 }; 1941 1942 static const int32_t nTestEntries = LENGTHOF(testDataEntries); 1943 int32_t testEntry; 1944 1945 for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) { 1946 UChar testString[1024]; 1947 int32_t runStarts[256]; 1948 int32_t nTestRuns = testDataEntries[testEntry].nRuns; 1949 const RunTestData *testData = testDataEntries[testEntry].testData; 1950 1951 int32_t run, stringLimit; 1952 UScriptRun *scriptRun = NULL; 1953 UErrorCode err; 1954 1955 /* 1956 * Fill in the test string and the runStarts array. 1957 */ 1958 stringLimit = 0; 1959 for (run = 0; run < nTestRuns; run += 1) { 1960 runStarts[run] = stringLimit; 1961 stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit); 1962 /*stringLimit -= 1;*/ 1963 } 1964 1965 /* The limit of the last run */ 1966 runStarts[nTestRuns] = stringLimit; 1967 1968 /* 1969 * Make sure that calling uscript_OpenRun with a NULL text pointer 1970 * and a non-zero text length returns the correct error. 1971 */ 1972 err = U_ZERO_ERROR; 1973 scriptRun = uscript_openRun(NULL, stringLimit, &err); 1974 1975 if (err != U_ILLEGAL_ARGUMENT_ERROR) { 1976 log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err)); 1977 } 1978 1979 if (scriptRun != NULL) { 1980 log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n"); 1981 uscript_closeRun(scriptRun); 1982 } 1983 1984 /* 1985 * Make sure that calling uscript_OpenRun with a non-NULL text pointer 1986 * and a zero text length returns the correct error. 1987 */ 1988 err = U_ZERO_ERROR; 1989 scriptRun = uscript_openRun(testString, 0, &err); 1990 1991 if (err != U_ILLEGAL_ARGUMENT_ERROR) { 1992 log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err)); 1993 } 1994 1995 if (scriptRun != NULL) { 1996 log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n"); 1997 uscript_closeRun(scriptRun); 1998 } 1999 2000 /* 2001 * Make sure that calling uscript_openRun with a NULL text pointer 2002 * and a zero text length doesn't return an error. 2003 */ 2004 err = U_ZERO_ERROR; 2005 scriptRun = uscript_openRun(NULL, 0, &err); 2006 2007 if (U_FAILURE(err)) { 2008 log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err)); 2009 } 2010 2011 /* Make sure that the empty iterator doesn't find any runs */ 2012 if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) { 2013 log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n"); 2014 } 2015 2016 /* 2017 * Make sure that calling uscript_setRunText with a NULL text pointer 2018 * and a non-zero text length returns the correct error. 2019 */ 2020 err = U_ZERO_ERROR; 2021 uscript_setRunText(scriptRun, NULL, stringLimit, &err); 2022 2023 if (err != U_ILLEGAL_ARGUMENT_ERROR) { 2024 log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err)); 2025 } 2026 2027 /* 2028 * Make sure that calling uscript_OpenRun with a non-NULL text pointer 2029 * and a zero text length returns the correct error. 2030 */ 2031 err = U_ZERO_ERROR; 2032 uscript_setRunText(scriptRun, testString, 0, &err); 2033 2034 if (err != U_ILLEGAL_ARGUMENT_ERROR) { 2035 log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err)); 2036 } 2037 2038 /* 2039 * Now call uscript_setRunText on the empty iterator 2040 * and make sure that it works. 2041 */ 2042 err = U_ZERO_ERROR; 2043 uscript_setRunText(scriptRun, testString, stringLimit, &err); 2044 2045 if (U_FAILURE(err)) { 2046 log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err)); 2047 } else { 2048 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText"); 2049 } 2050 2051 uscript_closeRun(scriptRun); 2052 2053 /* 2054 * Now open an interator over the testString 2055 * using uscript_openRun and make sure that it works 2056 */ 2057 scriptRun = uscript_openRun(testString, stringLimit, &err); 2058 2059 if (U_FAILURE(err)) { 2060 log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err)); 2061 } else { 2062 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun"); 2063 } 2064 2065 /* Now reset the iterator, and make sure 2066 * that it still works. 2067 */ 2068 uscript_resetRun(scriptRun); 2069 2070 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun"); 2071 2072 /* Close the iterator */ 2073 uscript_closeRun(scriptRun); 2074 } 2075 } 2076 2077 /* test additional, non-core properties */ 2078 static void 2079 TestAdditionalProperties() { 2080 /* test data for u_charAge() */ 2081 static const struct { 2082 UChar32 c; 2083 UVersionInfo version; 2084 } charAges[]={ 2085 {0x41, { 1, 1, 0, 0 }}, 2086 {0xffff, { 1, 1, 0, 0 }}, 2087 {0x20ab, { 2, 0, 0, 0 }}, 2088 {0x2fffe, { 2, 0, 0, 0 }}, 2089 {0x20ac, { 2, 1, 0, 0 }}, 2090 {0xfb1d, { 3, 0, 0, 0 }}, 2091 {0x3f4, { 3, 1, 0, 0 }}, 2092 {0x10300, { 3, 1, 0, 0 }}, 2093 {0x220, { 3, 2, 0, 0 }}, 2094 {0xff60, { 3, 2, 0, 0 }} 2095 }; 2096 2097 /* test data for u_hasBinaryProperty() */ 2098 static const int32_t 2099 props[][3]={ /* code point, property, value */ 2100 { 0x0627, UCHAR_ALPHABETIC, TRUE }, 2101 { 0x1034a, UCHAR_ALPHABETIC, TRUE }, 2102 { 0x2028, UCHAR_ALPHABETIC, FALSE }, 2103 2104 { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE }, 2105 { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE }, 2106 2107 { 0x202c, UCHAR_BIDI_CONTROL, TRUE }, 2108 { 0x202f, UCHAR_BIDI_CONTROL, FALSE }, 2109 2110 { 0x003c, UCHAR_BIDI_MIRRORED, TRUE }, 2111 { 0x003d, UCHAR_BIDI_MIRRORED, FALSE }, 2112 2113 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */ 2114 { 0x2018, UCHAR_BIDI_MIRRORED, FALSE }, 2115 { 0x201d, UCHAR_BIDI_MIRRORED, FALSE }, 2116 { 0x201f, UCHAR_BIDI_MIRRORED, FALSE }, 2117 { 0x301e, UCHAR_BIDI_MIRRORED, FALSE }, 2118 2119 { 0x058a, UCHAR_DASH, TRUE }, 2120 { 0x007e, UCHAR_DASH, FALSE }, 2121 2122 { 0x0c4d, UCHAR_DIACRITIC, TRUE }, 2123 { 0x3000, UCHAR_DIACRITIC, FALSE }, 2124 2125 { 0x0e46, UCHAR_EXTENDER, TRUE }, 2126 { 0x0020, UCHAR_EXTENDER, FALSE }, 2127 2128 #if !UCONFIG_NO_NORMALIZATION 2129 { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE }, 2130 { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE }, 2131 { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE }, 2132 2133 { 0x110a, UCHAR_NFD_INERT, TRUE }, /* Jamo L */ 2134 { 0x0308, UCHAR_NFD_INERT, FALSE }, 2135 2136 { 0x1164, UCHAR_NFKD_INERT, TRUE }, /* Jamo V */ 2137 { 0x1d79d, UCHAR_NFKD_INERT, FALSE }, /* math compat version of xi */ 2138 2139 { 0x0021, UCHAR_NFC_INERT, TRUE }, /* ! */ 2140 { 0x0061, UCHAR_NFC_INERT, FALSE }, /* a */ 2141 { 0x00e4, UCHAR_NFC_INERT, FALSE }, /* a-umlaut */ 2142 { 0x0102, UCHAR_NFC_INERT, FALSE }, /* a-breve */ 2143 { 0xac1c, UCHAR_NFC_INERT, FALSE }, /* Hangul LV */ 2144 { 0xac1d, UCHAR_NFC_INERT, TRUE }, /* Hangul LVT */ 2145 2146 { 0x1d79d, UCHAR_NFKC_INERT, FALSE }, /* math compat version of xi */ 2147 { 0x2a6d6, UCHAR_NFKC_INERT, TRUE }, /* Han, last of CJK ext. B */ 2148 2149 { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE }, 2150 { 0x0308, UCHAR_SEGMENT_STARTER, FALSE }, 2151 { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */ 2152 { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */ 2153 { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */ 2154 { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */ 2155 #endif 2156 2157 { 0x0044, UCHAR_HEX_DIGIT, TRUE }, 2158 { 0xff46, UCHAR_HEX_DIGIT, TRUE }, 2159 { 0x0047, UCHAR_HEX_DIGIT, FALSE }, 2160 2161 { 0x30fb, UCHAR_HYPHEN, TRUE }, 2162 { 0xfe58, UCHAR_HYPHEN, FALSE }, 2163 2164 { 0x2172, UCHAR_ID_CONTINUE, TRUE }, 2165 { 0x0307, UCHAR_ID_CONTINUE, TRUE }, 2166 { 0x005c, UCHAR_ID_CONTINUE, FALSE }, 2167 2168 { 0x2172, UCHAR_ID_START, TRUE }, 2169 { 0x007a, UCHAR_ID_START, TRUE }, 2170 { 0x0039, UCHAR_ID_START, FALSE }, 2171 2172 { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE }, 2173 { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE }, 2174 { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE }, 2175 2176 { 0x200c, UCHAR_JOIN_CONTROL, TRUE }, 2177 { 0x2029, UCHAR_JOIN_CONTROL, FALSE }, 2178 2179 { 0x1d7bc, UCHAR_LOWERCASE, TRUE }, 2180 { 0x0345, UCHAR_LOWERCASE, TRUE }, 2181 { 0x0030, UCHAR_LOWERCASE, FALSE }, 2182 2183 { 0x1d7a9, UCHAR_MATH, TRUE }, 2184 { 0x2135, UCHAR_MATH, TRUE }, 2185 { 0x0062, UCHAR_MATH, FALSE }, 2186 2187 { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE }, 2188 { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE }, 2189 { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE }, 2190 2191 { 0x0022, UCHAR_QUOTATION_MARK, TRUE }, 2192 { 0xff62, UCHAR_QUOTATION_MARK, TRUE }, 2193 { 0xd840, UCHAR_QUOTATION_MARK, FALSE }, 2194 2195 { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE }, 2196 { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE }, 2197 2198 { 0x1d44a, UCHAR_UPPERCASE, TRUE }, 2199 { 0x2162, UCHAR_UPPERCASE, TRUE }, 2200 { 0x0345, UCHAR_UPPERCASE, FALSE }, 2201 2202 { 0x0020, UCHAR_WHITE_SPACE, TRUE }, 2203 { 0x202f, UCHAR_WHITE_SPACE, TRUE }, 2204 { 0x3001, UCHAR_WHITE_SPACE, FALSE }, 2205 2206 { 0x0711, UCHAR_XID_CONTINUE, TRUE }, 2207 { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE }, 2208 { 0x007c, UCHAR_XID_CONTINUE, FALSE }, 2209 2210 { 0x16ee, UCHAR_XID_START, TRUE }, 2211 { 0x23456, UCHAR_XID_START, TRUE }, 2212 { 0x1d1aa, UCHAR_XID_START, FALSE }, 2213 2214 /* 2215 * Version break: 2216 * The following properties are only supported starting with the 2217 * Unicode version indicated in the second field. 2218 */ 2219 { -1, 0x320, 0 }, 2220 2221 { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE }, 2222 { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE }, 2223 { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE }, 2224 2225 { 0x0149, UCHAR_DEPRECATED, TRUE }, /* changed in Unicode 5.2 */ 2226 { 0x0341, UCHAR_DEPRECATED, FALSE }, /* changed in Unicode 5.2 */ 2227 { 0xe0041, UCHAR_DEPRECATED, TRUE }, /* changed from Unicode 5 to 5.1 */ 2228 { 0xe0100, UCHAR_DEPRECATED, FALSE }, 2229 2230 { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE }, 2231 { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE }, 2232 { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE }, 2233 { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */ 2234 2235 { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE }, 2236 { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE }, 2237 { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */ 2238 { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE }, 2239 2240 { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE }, 2241 { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE }, 2242 2243 { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE }, 2244 { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE }, 2245 2246 { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE }, 2247 { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE }, 2248 2249 { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE }, 2250 { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE }, 2251 2252 { 0x2e9b, UCHAR_RADICAL, TRUE }, 2253 { 0x4e00, UCHAR_RADICAL, FALSE }, 2254 2255 { 0x012f, UCHAR_SOFT_DOTTED, TRUE }, 2256 { 0x0049, UCHAR_SOFT_DOTTED, FALSE }, 2257 2258 { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE }, 2259 { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE }, 2260 2261 { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */ 2262 2263 { 0x002e, UCHAR_S_TERM, TRUE }, 2264 { 0x0061, UCHAR_S_TERM, FALSE }, 2265 2266 { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE }, 2267 { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE }, 2268 { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE }, 2269 { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE }, 2270 2271 /* enum/integer type properties */ 2272 2273 /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */ 2274 /* test default Bidi classes for unassigned code points */ 2275 { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2276 { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2277 { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2278 { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */ 2279 { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */ 2280 { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2281 { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2282 { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2283 { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2284 { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2285 { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2286 2287 { 0x0605, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2288 { 0x061c, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2289 { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2290 { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2291 { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2292 { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2293 { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2294 { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2295 2296 { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS }, 2297 { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU }, 2298 { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS }, 2299 { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG }, 2300 { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU }, 2301 { 0x1AFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK }, 2302 { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA }, 2303 { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS }, 2304 { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK }, 2305 { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK }, 2306 { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B }, 2307 2308 /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */ 2309 { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 }, 2310 2311 { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK }, 2312 { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT }, 2313 { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE }, 2314 { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL }, 2315 { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL }, 2316 { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL }, 2317 { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL }, 2318 { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT }, 2319 { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE }, 2320 2321 { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL }, 2322 { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW }, 2323 { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS }, 2324 { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH }, 2325 { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, 2326 { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH }, 2327 { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, 2328 { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, 2329 { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, 2330 { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS }, 2331 { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL }, 2332 { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, 2333 { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, 2334 { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */ 2335 { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL }, 2336 { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS }, 2337 { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS }, 2338 2339 /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */ 2340 { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 }, 2341 { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER }, /* changed in Unicode 5.2 */ 2342 2343 { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP }, 2344 { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN }, 2345 { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH }, 2346 { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH }, 2347 { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL }, 2348 { 0x06C3, UCHAR_JOINING_GROUP, U_JG_HAMZA_ON_HEH_GOAL }, 2349 2350 { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING }, 2351 { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING }, 2352 { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING }, 2353 { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING }, 2354 { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING }, 2355 { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT }, 2356 { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT }, 2357 { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT }, 2358 2359 /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */ 2360 { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN }, 2361 { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN }, 2362 { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION }, 2363 { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION }, 2364 { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, 2365 { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, 2366 { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, 2367 { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, 2368 { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE }, 2369 { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE }, 2370 { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE }, 2371 { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION }, 2372 { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS }, 2373 { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC }, 2374 { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC }, 2375 2376 /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */ 2377 2378 /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */ 2379 2380 { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2381 { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, 2382 { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, 2383 { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, 2384 { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */ 2385 { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */ 2386 { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, 2387 2388 { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2389 { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */ 2390 { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */ 2391 { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2392 2393 { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, 2394 { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, 2395 { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, 2396 { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, 2397 { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2398 { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2399 2400 { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2401 { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2402 { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2403 { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2404 2405 { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, 2406 { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, 2407 { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, 2408 { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, 2409 { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2410 { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2411 { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2412 2413 { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2414 { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2415 { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2416 { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2417 2418 { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE }, 2419 { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE }, 2420 { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE }, 2421 { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE }, 2422 2423 { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, 2424 { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, 2425 { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, 2426 { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, 2427 { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, 2428 2429 { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2430 2431 { -1, 0x410, 0 }, /* version break for Unicode 4.1 */ 2432 2433 { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE }, 2434 { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE }, 2435 { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE }, 2436 2437 { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE }, 2438 { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE }, 2439 { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE }, 2440 { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE }, 2441 { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE }, 2442 2443 { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION }, 2444 { 0x2c8e, UCHAR_BLOCK, UBLOCK_COPTIC }, 2445 { 0xfe17, UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS }, 2446 2447 { 0x1a00, UCHAR_SCRIPT, USCRIPT_BUGINESE }, 2448 { 0x2cea, UCHAR_SCRIPT, USCRIPT_COPTIC }, 2449 { 0xa82b, UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI }, 2450 { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN }, 2451 2452 { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 }, 2453 { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 }, 2454 { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 }, 2455 { 0x115f, UCHAR_LINE_BREAK, U_LB_JL }, 2456 { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT }, 2457 { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV }, 2458 2459 { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT }, 2460 { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND }, 2461 { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL }, 2462 { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V }, 2463 2464 { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER }, 2465 { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER }, 2466 { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC }, 2467 { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM }, 2468 2469 { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER }, 2470 { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER }, 2471 { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE }, 2472 { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP }, 2473 2474 { -1, 0x520, 0 }, /* version break for Unicode 5.2 */ 2475 2476 /* test some script codes >127 */ 2477 { 0xa6e6, UCHAR_SCRIPT, USCRIPT_BAMUM }, 2478 { 0xa4d0, UCHAR_SCRIPT, USCRIPT_LISU }, 2479 { 0x10a7f, UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN }, 2480 2481 /* undefined UProperty values */ 2482 { 0x61, 0x4a7, 0 }, 2483 { 0x234bc, 0x15ed, 0 } 2484 }; 2485 2486 UVersionInfo version; 2487 UChar32 c; 2488 int32_t i, result, uVersion; 2489 UProperty which; 2490 2491 /* what is our Unicode version? */ 2492 u_getUnicodeVersion(version); 2493 uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */ 2494 2495 u_charAge(0x20, version); 2496 if(version[0]==0) { 2497 /* no additional properties available */ 2498 log_err("TestAdditionalProperties: no additional properties available, not tested\n"); 2499 return; 2500 } 2501 2502 /* test u_charAge() */ 2503 for(i=0; i<sizeof(charAges)/sizeof(charAges[0]); ++i) { 2504 u_charAge(charAges[i].c, version); 2505 if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) { 2506 log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n", 2507 charAges[i].c, 2508 version[0], version[1], version[2], version[3], 2509 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]); 2510 } 2511 } 2512 2513 if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 || 2514 u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 || 2515 u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 || /* j2478 */ 2516 u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/ 2517 u_getIntPropertyMinValue(0x2345)!=0 2518 ) { 2519 log_err("error: u_getIntPropertyMinValue() wrong\n"); 2520 } 2521 if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) { 2522 log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n"); 2523 } 2524 if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) { 2525 log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n"); 2526 } 2527 if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) { 2528 log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n"); 2529 } 2530 if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) { 2531 log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n"); 2532 } 2533 if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) { 2534 log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n"); 2535 } 2536 if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) { 2537 log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n"); 2538 } 2539 if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) { 2540 log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n"); 2541 } 2542 if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) { 2543 log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n"); 2544 } 2545 if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) { 2546 log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n"); 2547 } 2548 if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) { 2549 log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n"); 2550 } 2551 if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) { 2552 log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n"); 2553 } 2554 if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) { 2555 log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n"); 2556 } 2557 if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) { 2558 log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n"); 2559 } 2560 /*JB#2410*/ 2561 if( u_getIntPropertyMaxValue(0x2345)!=-1) { 2562 log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n"); 2563 } 2564 if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) { 2565 log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n"); 2566 } 2567 if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) != (int32_t) (U_JG_COUNT -1)) { 2568 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n"); 2569 } 2570 if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) { 2571 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n"); 2572 } 2573 if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) { 2574 log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n"); 2575 } 2576 2577 /* test u_hasBinaryProperty() and u_getIntPropertyValue() */ 2578 for(i=0; i<sizeof(props)/sizeof(props[0]); ++i) { 2579 const char *whichName; 2580 2581 if(props[i][0]<0) { 2582 /* Unicode version break */ 2583 if(uVersion<props[i][1]) { 2584 break; /* do not test properties that are not yet supported */ 2585 } else { 2586 continue; /* skip this row */ 2587 } 2588 } 2589 2590 c=(UChar32)props[i][0]; 2591 which=(UProperty)props[i][1]; 2592 whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME); 2593 2594 if(which<UCHAR_INT_START) { 2595 result=u_hasBinaryProperty(c, which); 2596 if(result!=props[i][2]) { 2597 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n", 2598 c, whichName, result, i); 2599 } 2600 } 2601 2602 result=u_getIntPropertyValue(c, which); 2603 if(result!=props[i][2]) { 2604 log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n", 2605 c, whichName, result, props[i][2], i); 2606 } 2607 2608 /* test separate functions, too */ 2609 switch((UProperty)props[i][1]) { 2610 case UCHAR_ALPHABETIC: 2611 if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) { 2612 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n", 2613 props[i][0], result, i); 2614 } 2615 break; 2616 case UCHAR_LOWERCASE: 2617 if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) { 2618 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n", 2619 props[i][0], result, i); 2620 } 2621 break; 2622 case UCHAR_UPPERCASE: 2623 if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) { 2624 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n", 2625 props[i][0], result, i); 2626 } 2627 break; 2628 case UCHAR_WHITE_SPACE: 2629 if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) { 2630 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n", 2631 props[i][0], result, i); 2632 } 2633 break; 2634 default: 2635 break; 2636 } 2637 } 2638 } 2639 2640 static void 2641 TestNumericProperties(void) { 2642 /* see UnicodeData.txt, DerivedNumericValues.txt */ 2643 static const struct { 2644 UChar32 c; 2645 int32_t type; 2646 double numValue; 2647 } values[]={ 2648 { 0x0F33, U_NT_NUMERIC, -1./2. }, 2649 { 0x0C66, U_NT_DECIMAL, 0 }, 2650 { 0x96f6, U_NT_NUMERIC, 0 }, 2651 { 0xa833, U_NT_NUMERIC, 1./16. }, 2652 { 0x2152, U_NT_NUMERIC, 1./10. }, 2653 { 0x2151, U_NT_NUMERIC, 1./9. }, 2654 { 0x1245f, U_NT_NUMERIC, 1./8. }, 2655 { 0x2150, U_NT_NUMERIC, 1./7. }, 2656 { 0x2159, U_NT_NUMERIC, 1./6. }, 2657 { 0x09f6, U_NT_NUMERIC, 3./16. }, 2658 { 0x2155, U_NT_NUMERIC, 1./5. }, 2659 { 0x00BD, U_NT_NUMERIC, 1./2. }, 2660 { 0x0031, U_NT_DECIMAL, 1. }, 2661 { 0x4e00, U_NT_NUMERIC, 1. }, 2662 { 0x58f1, U_NT_NUMERIC, 1. }, 2663 { 0x10320, U_NT_NUMERIC, 1. }, 2664 { 0x0F2B, U_NT_NUMERIC, 3./2. }, 2665 { 0x00B2, U_NT_DIGIT, 2. }, 2666 { 0x5f10, U_NT_NUMERIC, 2. }, 2667 { 0x1813, U_NT_DECIMAL, 3. }, 2668 { 0x5f0e, U_NT_NUMERIC, 3. }, 2669 { 0x2173, U_NT_NUMERIC, 4. }, 2670 { 0x8086, U_NT_NUMERIC, 4. }, 2671 { 0x278E, U_NT_DIGIT, 5. }, 2672 { 0x1D7F2, U_NT_DECIMAL, 6. }, 2673 { 0x247A, U_NT_DIGIT, 7. }, 2674 { 0x7396, U_NT_NUMERIC, 9. }, 2675 { 0x1372, U_NT_NUMERIC, 10. }, 2676 { 0x216B, U_NT_NUMERIC, 12. }, 2677 { 0x16EE, U_NT_NUMERIC, 17. }, 2678 { 0x249A, U_NT_NUMERIC, 19. }, 2679 { 0x303A, U_NT_NUMERIC, 30. }, 2680 { 0x5345, U_NT_NUMERIC, 30. }, 2681 { 0x32B2, U_NT_NUMERIC, 37. }, 2682 { 0x1375, U_NT_NUMERIC, 40. }, 2683 { 0x10323, U_NT_NUMERIC, 50. }, 2684 { 0x0BF1, U_NT_NUMERIC, 100. }, 2685 { 0x964c, U_NT_NUMERIC, 100. }, 2686 { 0x217E, U_NT_NUMERIC, 500. }, 2687 { 0x2180, U_NT_NUMERIC, 1000. }, 2688 { 0x4edf, U_NT_NUMERIC, 1000. }, 2689 { 0x2181, U_NT_NUMERIC, 5000. }, 2690 { 0x137C, U_NT_NUMERIC, 10000. }, 2691 { 0x4e07, U_NT_NUMERIC, 10000. }, 2692 { 0x4ebf, U_NT_NUMERIC, 100000000. }, 2693 { 0x5146, U_NT_NUMERIC, 1000000000000. }, 2694 { -1, U_NT_NONE, U_NO_NUMERIC_VALUE }, 2695 { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE }, 2696 { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE }, 2697 { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE }, 2698 { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE }, 2699 { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE }, 2700 { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE }, 2701 { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE } 2702 }; 2703 2704 double nv; 2705 UChar32 c; 2706 int32_t i, type; 2707 2708 for(i=0; i<LENGTHOF(values); ++i) { 2709 c=values[i].c; 2710 type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE); 2711 nv=u_getNumericValue(c); 2712 2713 if(type!=values[i].type) { 2714 log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type); 2715 } 2716 if(0.000001 <= fabs(nv - values[i].numValue)) { 2717 log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue); 2718 } 2719 } 2720 } 2721 2722 /** 2723 * Test the property names and property value names API. 2724 */ 2725 static void 2726 TestPropertyNames(void) { 2727 int32_t p, v, choice=0, rev; 2728 UBool atLeastSomething = FALSE; 2729 2730 for (p=0; ; ++p) { 2731 UProperty propEnum = (UProperty)p; 2732 UBool sawProp = FALSE; 2733 if(p > 10 && !atLeastSomething) { 2734 log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice); 2735 return; 2736 } 2737 2738 for (choice=0; ; ++choice) { 2739 const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice); 2740 if (name) { 2741 if (!sawProp) 2742 log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff); 2743 log_verbose("%d=\"%s\"", choice, name); 2744 sawProp = TRUE; 2745 atLeastSomething = TRUE; 2746 2747 /* test reverse mapping */ 2748 rev = u_getPropertyEnum(name); 2749 if (rev != p) { 2750 log_err("Property round-trip failure: %d -> %s -> %d\n", 2751 p, name, rev); 2752 } 2753 } 2754 if (!name && choice>0) break; 2755 } 2756 if (sawProp) { 2757 /* looks like a valid property; check the values */ 2758 const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME); 2759 int32_t max = 0; 2760 if (p == UCHAR_CANONICAL_COMBINING_CLASS) { 2761 max = 255; 2762 } else if (p == UCHAR_GENERAL_CATEGORY_MASK) { 2763 /* it's far too slow to iterate all the way up to 2764 the real max, U_GC_P_MASK */ 2765 max = U_GC_NL_MASK; 2766 } else if (p == UCHAR_BLOCK) { 2767 /* UBlockCodes, unlike other values, start at 1 */ 2768 max = 1; 2769 } 2770 log_verbose("\n"); 2771 for (v=-1; ; ++v) { 2772 UBool sawValue = FALSE; 2773 for (choice=0; ; ++choice) { 2774 const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice); 2775 if (vname) { 2776 if (!sawValue) log_verbose(" %s, value %d:", pname, v); 2777 log_verbose("%d=\"%s\"", choice, vname); 2778 sawValue = TRUE; 2779 2780 /* test reverse mapping */ 2781 rev = u_getPropertyValueEnum(propEnum, vname); 2782 if (rev != v) { 2783 log_err("Value round-trip failure (%s): %d -> %s -> %d\n", 2784 pname, v, vname, rev); 2785 } 2786 } 2787 if (!vname && choice>0) break; 2788 } 2789 if (sawValue) { 2790 log_verbose("\n"); 2791 } 2792 if (!sawValue && v>=max) break; 2793 } 2794 } 2795 if (!sawProp) { 2796 if (p>=UCHAR_STRING_LIMIT) { 2797 break; 2798 } else if (p>=UCHAR_DOUBLE_LIMIT) { 2799 p = UCHAR_STRING_START - 1; 2800 } else if (p>=UCHAR_MASK_LIMIT) { 2801 p = UCHAR_DOUBLE_START - 1; 2802 } else if (p>=UCHAR_INT_LIMIT) { 2803 p = UCHAR_MASK_START - 1; 2804 } else if (p>=UCHAR_BINARY_LIMIT) { 2805 p = UCHAR_INT_START - 1; 2806 } 2807 } 2808 } 2809 } 2810 2811 /** 2812 * Test the property values API. See JB#2410. 2813 */ 2814 static void 2815 TestPropertyValues(void) { 2816 int32_t i, p, min, max; 2817 UErrorCode ec; 2818 2819 /* Min should be 0 for everything. */ 2820 /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */ 2821 for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) { 2822 UProperty propEnum = (UProperty)p; 2823 min = u_getIntPropertyMinValue(propEnum); 2824 if (min != 0) { 2825 if (p == UCHAR_BLOCK) { 2826 /* This is okay...for now. See JB#2487. 2827 TODO Update this for JB#2487. */ 2828 } else { 2829 const char* name; 2830 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME); 2831 if (name == NULL) 2832 name = "<ERROR>"; 2833 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n", 2834 name, min); 2835 } 2836 } 2837 } 2838 2839 if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 || 2840 u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) { 2841 log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n"); 2842 } 2843 2844 /* Max should be -1 for invalid properties. */ 2845 max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE); 2846 if (max != -1) { 2847 log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n", 2848 max); 2849 } 2850 2851 /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */ 2852 for (i=0; i<2; ++i) { 2853 int32_t script; 2854 const char* desc; 2855 ec = U_ZERO_ERROR; 2856 switch (i) { 2857 case 0: 2858 script = uscript_getScript(-1, &ec); 2859 desc = "uscript_getScript(-1)"; 2860 break; 2861 case 1: 2862 script = u_getIntPropertyValue(-1, UCHAR_SCRIPT); 2863 desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)"; 2864 break; 2865 default: 2866 log_err("Internal test error. Too many scripts\n"); 2867 return; 2868 } 2869 /* We don't explicitly test ec. It should be U_FAILURE but it 2870 isn't documented as such. */ 2871 if (script != (int32_t)USCRIPT_INVALID_CODE) { 2872 log_err("FAIL: %s = %d, exp. 0\n", 2873 desc, script); 2874 } 2875 } 2876 } 2877 2878 /* add characters from a serialized set to a normal one */ 2879 static void 2880 _setAddSerialized(USet *set, const USerializedSet *sset) { 2881 UChar32 start, end; 2882 int32_t i, count; 2883 2884 count=uset_getSerializedRangeCount(sset); 2885 for(i=0; i<count; ++i) { 2886 uset_getSerializedRange(sset, i, &start, &end); 2887 uset_addRange(set, start, end); 2888 } 2889 } 2890 2891 /* various tests for consistency of UCD data and API behavior */ 2892 static void 2893 TestConsistency() { 2894 #if !UCONFIG_NO_NORMALIZATION 2895 UChar buffer16[300]; 2896 #endif 2897 char buffer[300]; 2898 USet *set1, *set2, *set3, *set4; 2899 UErrorCode errorCode; 2900 2901 #if !UCONFIG_NO_NORMALIZATION 2902 const UNormalizer2 *norm2; 2903 USerializedSet sset; 2904 #endif 2905 UChar32 start, end; 2906 int32_t i, length; 2907 2908 U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10); 2909 U_STRING_DECL(dashPattern, "[:Dash:]", 8); 2910 U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13); 2911 U_STRING_DECL(formatPattern, "[:Cf:]", 6); 2912 U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14); 2913 2914 U_STRING_DECL(mathBlocksPattern, 2915 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]", 2916 1+32+46+46+45+43+1+1); /* +1 for NUL */ 2917 U_STRING_DECL(mathPattern, "[:Math:]", 8); 2918 U_STRING_DECL(unassignedPattern, "[:Cn:]", 6); 2919 U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14); 2920 U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20); 2921 2922 U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10); 2923 U_STRING_INIT(dashPattern, "[:Dash:]", 8); 2924 U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13); 2925 U_STRING_INIT(formatPattern, "[:Cf:]", 6); 2926 U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14); 2927 2928 U_STRING_INIT(mathBlocksPattern, 2929 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]", 2930 1+32+46+46+45+43+1+1); /* +1 for NUL */ 2931 U_STRING_INIT(mathPattern, "[:Math:]", 8); 2932 U_STRING_INIT(unassignedPattern, "[:Cn:]", 6); 2933 U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14); 2934 U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20); 2935 2936 /* 2937 * It used to be that UCD.html and its precursors said 2938 * "Those dashes used to mark connections between pieces of words, 2939 * plus the Katakana middle dot." 2940 * 2941 * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash 2942 * but not from Hyphen. 2943 * UTC 94 (2003mar) decided to leave it that way and to changed UCD.html. 2944 * Therefore, do not show errors when testing the Hyphen property. 2945 */ 2946 log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n" 2947 "known to the UTC and not considered errors.\n"); 2948 2949 errorCode=U_ZERO_ERROR; 2950 set1=uset_openPattern(hyphenPattern, 10, &errorCode); 2951 set2=uset_openPattern(dashPattern, 8, &errorCode); 2952 if(U_SUCCESS(errorCode)) { 2953 /* remove the Katakana middle dot(s) from set1 */ 2954 uset_remove(set1, 0x30fb); 2955 uset_remove(set1, 0xff65); /* halfwidth variant */ 2956 showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE); 2957 } else { 2958 log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode)); 2959 } 2960 2961 /* check that Cf is neither Hyphen nor Dash nor Alphabetic */ 2962 set3=uset_openPattern(formatPattern, 6, &errorCode); 2963 set4=uset_openPattern(alphaPattern, 14, &errorCode); 2964 if(U_SUCCESS(errorCode)) { 2965 showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE); 2966 showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE); 2967 showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE); 2968 } else { 2969 log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode)); 2970 } 2971 2972 uset_close(set1); 2973 uset_close(set2); 2974 uset_close(set3); 2975 uset_close(set4); 2976 2977 /* 2978 * Check that each lowercase character has "small" in its name 2979 * and not "capital". 2980 * There are some such characters, some of which seem odd. 2981 * Use the verbose flag to see these notices. 2982 */ 2983 errorCode=U_ZERO_ERROR; 2984 set1=uset_openPattern(lowerPattern, 13, &errorCode); 2985 if(U_SUCCESS(errorCode)) { 2986 for(i=0;; ++i) { 2987 length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode); 2988 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 2989 break; /* done */ 2990 } 2991 if(U_FAILURE(errorCode)) { 2992 log_err("error iterating over [:Lowercase:] at item %d: %s\n", 2993 i, u_errorName(errorCode)); 2994 break; 2995 } 2996 if(length!=0) { 2997 break; /* done with code points, got a string or -1 */ 2998 } 2999 3000 while(start<=end) { 3001 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode); 3002 if(U_FAILURE(errorCode)) { 3003 log_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode)); 3004 errorCode=U_ZERO_ERROR; 3005 continue; 3006 } 3007 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) && 3008 strstr(buffer, "SMALL CAPITAL")==NULL 3009 ) { 3010 log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer); 3011 } 3012 ++start; 3013 } 3014 } 3015 } else { 3016 log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode)); 3017 } 3018 uset_close(set1); 3019 3020 #if !UCONFIG_NO_NORMALIZATION 3021 3022 /* 3023 * Test for an example that unorm_getCanonStartSet() delivers 3024 * all characters that compose from the input one, 3025 * even in multiple steps. 3026 * For example, the set for "I" (0049) should contain both 3027 * I-diaeresis (00CF) and I-diaeresis-acute (1E2E). 3028 * In general, the set for the middle such character should be a subset 3029 * of the set for the first. 3030 */ 3031 errorCode=U_ZERO_ERROR; 3032 norm2=unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, &errorCode); 3033 if(U_FAILURE(errorCode)) { 3034 log_data_err("unorm2_getInstance(NFD) failed - %s\n", u_errorName(errorCode)); 3035 return; 3036 } 3037 3038 set1=uset_open(1, 0); 3039 set2=uset_open(1, 0); 3040 3041 if (unorm_getCanonStartSet(0x49, &sset)) { 3042 UChar source[1]; 3043 3044 _setAddSerialized(set1, &sset); 3045 3046 /* enumerate all characters that are plausible to be latin letters */ 3047 for(start=0xa0; start<0x2000; ++start) { 3048 source[0]=(UChar)start; 3049 length=unorm2_normalize(norm2, source, 1, buffer16, LENGTHOF(buffer16), &errorCode); 3050 if(length>1 && buffer16[0]==0x49) { 3051 uset_add(set2, start); 3052 } 3053 } 3054 3055 compareUSets(set1, set2, 3056 "[canon start set of 0049]", "[all c with canon decomp with 0049]", 3057 TRUE); 3058 } else { 3059 log_err("error calling unorm_getCanonStartSet()\n"); 3060 } 3061 3062 uset_close(set1); 3063 uset_close(set2); 3064 3065 #endif 3066 3067 /* verify that all assigned characters in Math blocks are exactly Math characters */ 3068 errorCode=U_ZERO_ERROR; 3069 set1=uset_openPattern(mathBlocksPattern, -1, &errorCode); 3070 set2=uset_openPattern(mathPattern, 8, &errorCode); 3071 set3=uset_openPattern(unassignedPattern, 6, &errorCode); 3072 if(U_SUCCESS(errorCode)) { 3073 uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */ 3074 uset_complement(set3); /* assigned characters */ 3075 uset_retainAll(set1, set3); /* [math blocks]&[assigned] */ 3076 compareUSets(set1, set2, 3077 "[assigned Math block chars]", "[math blocks]&[:Math:]", 3078 TRUE); 3079 } else { 3080 log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode)); 3081 } 3082 uset_close(set1); 3083 uset_close(set2); 3084 uset_close(set3); 3085 3086 /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */ 3087 errorCode=U_ZERO_ERROR; 3088 set1=uset_openPattern(unknownPattern, 14, &errorCode); 3089 set2=uset_openPattern(reservedPattern, 20, &errorCode); 3090 if(U_SUCCESS(errorCode)) { 3091 compareUSets(set1, set2, 3092 "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]", 3093 TRUE); 3094 } else { 3095 log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode)); 3096 } 3097 uset_close(set1); 3098 uset_close(set2); 3099 } 3100 3101 /* 3102 * Starting with ICU4C 3.4, the core Unicode properties files 3103 * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu) 3104 * are hardcoded in the common DLL and therefore not included 3105 * in the data package any more. 3106 * Test requiring these files are disabled so that 3107 * we need not jump through hoops (like adding snapshots of these files 3108 * to testdata). 3109 * See Jitterbug 4497. 3110 */ 3111 #define HARDCODED_DATA_4497 1 3112 3113 /* API coverage for ucase.c */ 3114 static void TestUCase() { 3115 #if !HARDCODED_DATA_4497 3116 UDataMemory *pData; 3117 UCaseProps *csp; 3118 const UCaseProps *ccsp; 3119 UErrorCode errorCode; 3120 3121 /* coverage for ucase_openBinary() */ 3122 errorCode=U_ZERO_ERROR; 3123 pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode); 3124 if(U_FAILURE(errorCode)) { 3125 log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n", 3126 u_errorName(errorCode)); 3127 return; 3128 } 3129 3130 csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode); 3131 if(U_FAILURE(errorCode)) { 3132 log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n", 3133 u_errorName(errorCode)); 3134 udata_close(pData); 3135 return; 3136 } 3137 3138 if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */ 3139 log_err("ucase_openBinary() does not seem to return working UCaseProps\n"); 3140 } 3141 3142 ucase_close(csp); 3143 udata_close(pData); 3144 3145 /* coverage for ucase_getDummy() */ 3146 errorCode=U_ZERO_ERROR; 3147 ccsp=ucase_getDummy(&errorCode); 3148 if(ucase_tolower(ccsp, 0x41)!=0x41) { 3149 log_err("ucase_tolower(dummy, A)!=A\n"); 3150 } 3151 #endif 3152 } 3153 3154 /* API coverage for ubidi_props.c */ 3155 static void TestUBiDiProps() { 3156 #if !HARDCODED_DATA_4497 3157 UDataMemory *pData; 3158 UBiDiProps *bdp; 3159 const UBiDiProps *cbdp; 3160 UErrorCode errorCode; 3161 3162 /* coverage for ubidi_openBinary() */ 3163 errorCode=U_ZERO_ERROR; 3164 pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode); 3165 if(U_FAILURE(errorCode)) { 3166 log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n", 3167 u_errorName(errorCode)); 3168 return; 3169 } 3170 3171 bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode); 3172 if(U_FAILURE(errorCode)) { 3173 log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n", 3174 u_errorName(errorCode)); 3175 udata_close(pData); 3176 return; 3177 } 3178 3179 if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */ 3180 log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n"); 3181 } 3182 3183 ubidi_closeProps(bdp); 3184 udata_close(pData); 3185 3186 /* coverage for ubidi_getDummy() */ 3187 errorCode=U_ZERO_ERROR; 3188 cbdp=ubidi_getDummy(&errorCode); 3189 if(ubidi_getClass(cbdp, 0x20)!=0) { 3190 log_err("ubidi_getClass(dummy, space)!=0\n"); 3191 } 3192 #endif 3193 } 3194 3195 /* test case folding, compare return values with CaseFolding.txt ------------ */ 3196 3197 /* bit set for which case foldings for a character have been tested already */ 3198 enum { 3199 CF_SIMPLE=1, 3200 CF_FULL=2, 3201 CF_TURKIC=4, 3202 CF_ALL=7 3203 }; 3204 3205 static void 3206 testFold(UChar32 c, int which, 3207 UChar32 simple, UChar32 turkic, 3208 const UChar *full, int32_t fullLength, 3209 const UChar *turkicFull, int32_t turkicFullLength) { 3210 UChar s[2], t[32]; 3211 UChar32 c2; 3212 int32_t length, length2; 3213 3214 UErrorCode errorCode=U_ZERO_ERROR; 3215 3216 length=0; 3217 U16_APPEND_UNSAFE(s, length, c); 3218 3219 if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) { 3220 log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple); 3221 } 3222 if((which&CF_FULL)!=0) { 3223 length2=u_strFoldCase(t, LENGTHOF(t), s, length, 0, &errorCode); 3224 if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) { 3225 log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c); 3226 } 3227 } 3228 if((which&CF_TURKIC)!=0) { 3229 if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) { 3230 log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple); 3231 } 3232 3233 length2=u_strFoldCase(t, LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode); 3234 if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) { 3235 log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c); 3236 } 3237 } 3238 } 3239 3240 /* test that c case-folds to itself */ 3241 static void 3242 testFoldToSelf(UChar32 c, int which) { 3243 UChar s[2]; 3244 int32_t length; 3245 3246 length=0; 3247 U16_APPEND_UNSAFE(s, length, c); 3248 testFold(c, which, c, c, s, length, s, length); 3249 } 3250 3251 struct CaseFoldingData { 3252 USet *notSeen; 3253 UChar32 prev, prevSimple; 3254 UChar prevFull[32]; 3255 int32_t prevFullLength; 3256 int which; 3257 }; 3258 typedef struct CaseFoldingData CaseFoldingData; 3259 3260 static void U_CALLCONV 3261 caseFoldingLineFn(void *context, 3262 char *fields[][2], int32_t fieldCount, 3263 UErrorCode *pErrorCode) { 3264 CaseFoldingData *pData=(CaseFoldingData *)context; 3265 char *end; 3266 UChar full[32]; 3267 UChar32 c, prev, simple; 3268 int32_t count; 3269 int which; 3270 char status; 3271 3272 /* get code point */ 3273 c=(UChar32)strtoul(u_skipWhitespace(fields[0][0]), &end, 16); 3274 end=(char *)u_skipWhitespace(end); 3275 if(end<=fields[0][0] || end!=fields[0][1]) { 3276 log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]); 3277 *pErrorCode=U_PARSE_ERROR; 3278 return; 3279 } 3280 3281 /* get the status of this mapping */ 3282 status=*u_skipWhitespace(fields[1][0]); 3283 if(status!='C' && status!='S' && status!='F' && status!='T') { 3284 log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]); 3285 *pErrorCode=U_PARSE_ERROR; 3286 return; 3287 } 3288 3289 /* get the mapping */ 3290 count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode); 3291 if(U_FAILURE(*pErrorCode)) { 3292 log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]); 3293 return; 3294 } 3295 3296 /* there is a simple mapping only if there is exactly one code point (count is in UChars) */ 3297 if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) { 3298 simple=c; 3299 } 3300 3301 if(c!=(prev=pData->prev)) { 3302 /* 3303 * Test remaining mappings for the previous code point. 3304 * If a turkic folding was not mentioned, then it should fold the same 3305 * as the regular simple case folding. 3306 */ 3307 UChar s[2]; 3308 int32_t length; 3309 3310 length=0; 3311 U16_APPEND_UNSAFE(s, length, prev); 3312 testFold(prev, (~pData->which)&CF_ALL, 3313 prev, pData->prevSimple, 3314 s, length, 3315 pData->prevFull, pData->prevFullLength); 3316 pData->prev=pData->prevSimple=c; 3317 length=0; 3318 U16_APPEND_UNSAFE(pData->prevFull, length, c); 3319 pData->prevFullLength=length; 3320 pData->which=0; 3321 } 3322 3323 /* 3324 * Turn the status into a bit set of case foldings to test. 3325 * Remember non-Turkic case foldings as defaults for Turkic mode. 3326 */ 3327 switch(status) { 3328 case 'C': 3329 which=CF_SIMPLE|CF_FULL; 3330 pData->prevSimple=simple; 3331 u_memcpy(pData->prevFull, full, count); 3332 pData->prevFullLength=count; 3333 break; 3334 case 'S': 3335 which=CF_SIMPLE; 3336 pData->prevSimple=simple; 3337 break; 3338 case 'F': 3339 which=CF_FULL; 3340 u_memcpy(pData->prevFull, full, count); 3341 pData->prevFullLength=count; 3342 break; 3343 case 'T': 3344 which=CF_TURKIC; 3345 break; 3346 default: 3347 which=0; 3348 break; /* won't happen because of test above */ 3349 } 3350 3351 testFold(c, which, simple, simple, full, count, full, count); 3352 3353 /* remember which case foldings of c have been tested */ 3354 pData->which|=which; 3355 3356 /* remove c from the set of ones not mentioned in CaseFolding.txt */ 3357 uset_remove(pData->notSeen, c); 3358 } 3359 3360 static void 3361 TestCaseFolding() { 3362 CaseFoldingData data={ NULL }; 3363 char *fields[3][2]; 3364 UErrorCode errorCode; 3365 3366 static char *lastLine= (char *)"10FFFF; C; 10FFFF;"; 3367 3368 errorCode=U_ZERO_ERROR; 3369 /* test BMP & plane 1 - nothing interesting above */ 3370 data.notSeen=uset_open(0, 0x1ffff); 3371 data.prevFullLength=1; /* length of full case folding of U+0000 */ 3372 3373 parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode); 3374 if(U_SUCCESS(errorCode)) { 3375 int32_t i, start, end; 3376 3377 /* add a pseudo-last line to finish testing of the actual last one */ 3378 fields[0][0]=lastLine; 3379 fields[0][1]=lastLine+6; 3380 fields[1][0]=lastLine+7; 3381 fields[1][1]=lastLine+9; 3382 fields[2][0]=lastLine+10; 3383 fields[2][1]=lastLine+17; 3384 caseFoldingLineFn(&data, fields, 3, &errorCode); 3385 3386 /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */ 3387 for(i=0; 3388 0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) && 3389 U_SUCCESS(errorCode); 3390 ++i 3391 ) { 3392 do { 3393 testFoldToSelf(start, CF_ALL); 3394 } while(++start<=end); 3395 } 3396 } 3397 3398 uset_close(data.notSeen); 3399 } 3400