1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * COPYRIGHT: 5 * Copyright (c) 1997-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ********************************************************************/ 8 /******************************************************************************* 9 * 10 * File CUCDTST.C 11 * 12 * Modification History: 13 * Name Description 14 * Madhu Katragadda Ported for C API, added tests for string functions 15 ******************************************************************************** 16 */ 17 18 #include <string.h> 19 #include <math.h> 20 #include <stdlib.h> 21 22 #include "unicode/utypes.h" 23 #include "unicode/uchar.h" 24 #include "unicode/putil.h" 25 #include "unicode/ustring.h" 26 #include "unicode/uloc.h" 27 #include "unicode/unorm2.h" 28 29 #include "cintltst.h" 30 #include "putilimp.h" 31 #include "uparse.h" 32 #include "ucase.h" 33 #include "ubidi_props.h" 34 #include "uprops.h" 35 #include "uset_imp.h" 36 #include "usc_impl.h" 37 #include "udatamem.h" /* for testing ucase_openBinary() */ 38 #include "cucdapi.h" 39 #include "cmemory.h" 40 41 /* prototypes --------------------------------------------------------------- */ 42 43 static void TestUpperLower(void); 44 static void TestLetterNumber(void); 45 static void TestMisc(void); 46 static void TestPOSIX(void); 47 static void TestControlPrint(void); 48 static void TestIdentifier(void); 49 static void TestUnicodeData(void); 50 static void TestCodeUnit(void); 51 static void TestCodePoint(void); 52 static void TestCharLength(void); 53 static void TestCharNames(void); 54 static void TestUCharFromNameUnderflow(void); 55 static void TestMirroring(void); 56 static void TestUScriptRunAPI(void); 57 static void TestAdditionalProperties(void); 58 static void TestNumericProperties(void); 59 static void TestPropertyNames(void); 60 static void TestPropertyValues(void); 61 static void TestConsistency(void); 62 static void TestUCase(void); 63 static void TestUBiDiProps(void); 64 static void TestCaseFolding(void); 65 66 /* internal methods used */ 67 static int32_t MakeProp(char* str); 68 static int32_t MakeDir(char* str); 69 70 /* helpers ------------------------------------------------------------------ */ 71 72 static void 73 parseUCDFile(const char *filename, 74 char *fields[][2], int32_t fieldCount, 75 UParseLineFn *lineFn, void *context, 76 UErrorCode *pErrorCode) { 77 char path[256]; 78 char backupPath[256]; 79 80 if(U_FAILURE(*pErrorCode)) { 81 return; 82 } 83 84 /* Look inside ICU_DATA first */ 85 strcpy(path, u_getDataDirectory()); 86 strcat(path, ".." U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING); 87 strcat(path, filename); 88 89 /* As a fallback, try to guess where the source data was located 90 * at the time ICU was built, and look there. 91 */ 92 strcpy(backupPath, ctest_dataSrcDir()); 93 strcat(backupPath, U_FILE_SEP_STRING); 94 strcat(backupPath, "unidata" U_FILE_SEP_STRING); 95 strcat(backupPath, filename); 96 97 u_parseDelimitedFile(path, ';', fields, fieldCount, lineFn, context, pErrorCode); 98 if(*pErrorCode==U_FILE_ACCESS_ERROR) { 99 *pErrorCode=U_ZERO_ERROR; 100 u_parseDelimitedFile(backupPath, ';', fields, fieldCount, lineFn, context, pErrorCode); 101 } 102 if(U_FAILURE(*pErrorCode)) { 103 log_err_status(*pErrorCode, "error parsing %s: %s\n", filename, u_errorName(*pErrorCode)); 104 } 105 } 106 107 /* test data ---------------------------------------------------------------- */ 108 109 static const char tagStrings[] = "MnMcMeNdNlNoZsZlZpCcCfCsCoCnLuLlLtLmLoPcPdPsPePoSmScSkSoPiPf"; 110 static const int32_t tagValues[] = 111 { 112 /* Mn */ U_NON_SPACING_MARK, 113 /* Mc */ U_COMBINING_SPACING_MARK, 114 /* Me */ U_ENCLOSING_MARK, 115 /* Nd */ U_DECIMAL_DIGIT_NUMBER, 116 /* Nl */ U_LETTER_NUMBER, 117 /* No */ U_OTHER_NUMBER, 118 /* Zs */ U_SPACE_SEPARATOR, 119 /* Zl */ U_LINE_SEPARATOR, 120 /* Zp */ U_PARAGRAPH_SEPARATOR, 121 /* Cc */ U_CONTROL_CHAR, 122 /* Cf */ U_FORMAT_CHAR, 123 /* Cs */ U_SURROGATE, 124 /* Co */ U_PRIVATE_USE_CHAR, 125 /* Cn */ U_UNASSIGNED, 126 /* Lu */ U_UPPERCASE_LETTER, 127 /* Ll */ U_LOWERCASE_LETTER, 128 /* Lt */ U_TITLECASE_LETTER, 129 /* Lm */ U_MODIFIER_LETTER, 130 /* Lo */ U_OTHER_LETTER, 131 /* Pc */ U_CONNECTOR_PUNCTUATION, 132 /* Pd */ U_DASH_PUNCTUATION, 133 /* Ps */ U_START_PUNCTUATION, 134 /* Pe */ U_END_PUNCTUATION, 135 /* Po */ U_OTHER_PUNCTUATION, 136 /* Sm */ U_MATH_SYMBOL, 137 /* Sc */ U_CURRENCY_SYMBOL, 138 /* Sk */ U_MODIFIER_SYMBOL, 139 /* So */ U_OTHER_SYMBOL, 140 /* Pi */ U_INITIAL_PUNCTUATION, 141 /* Pf */ U_FINAL_PUNCTUATION 142 }; 143 144 static const char dirStrings[][5] = { 145 "L", 146 "R", 147 "EN", 148 "ES", 149 "ET", 150 "AN", 151 "CS", 152 "B", 153 "S", 154 "WS", 155 "ON", 156 "LRE", 157 "LRO", 158 "AL", 159 "RLE", 160 "RLO", 161 "PDF", 162 "NSM", 163 "BN", 164 /* new in Unicode 6.3/ICU 52 */ 165 "FSI", 166 "LRI", 167 "RLI", 168 "PDI" 169 }; 170 171 void addUnicodeTest(TestNode** root); 172 173 void addUnicodeTest(TestNode** root) 174 { 175 addTest(root, &TestCodeUnit, "tsutil/cucdtst/TestCodeUnit"); 176 addTest(root, &TestCodePoint, "tsutil/cucdtst/TestCodePoint"); 177 addTest(root, &TestCharLength, "tsutil/cucdtst/TestCharLength"); 178 addTest(root, &TestBinaryValues, "tsutil/cucdtst/TestBinaryValues"); 179 addTest(root, &TestUnicodeData, "tsutil/cucdtst/TestUnicodeData"); 180 addTest(root, &TestAdditionalProperties, "tsutil/cucdtst/TestAdditionalProperties"); 181 addTest(root, &TestNumericProperties, "tsutil/cucdtst/TestNumericProperties"); 182 addTest(root, &TestUpperLower, "tsutil/cucdtst/TestUpperLower"); 183 addTest(root, &TestLetterNumber, "tsutil/cucdtst/TestLetterNumber"); 184 addTest(root, &TestMisc, "tsutil/cucdtst/TestMisc"); 185 addTest(root, &TestPOSIX, "tsutil/cucdtst/TestPOSIX"); 186 addTest(root, &TestControlPrint, "tsutil/cucdtst/TestControlPrint"); 187 addTest(root, &TestIdentifier, "tsutil/cucdtst/TestIdentifier"); 188 addTest(root, &TestCharNames, "tsutil/cucdtst/TestCharNames"); 189 addTest(root, &TestUCharFromNameUnderflow, "tsutil/cucdtst/TestUCharFromNameUnderflow"); 190 addTest(root, &TestMirroring, "tsutil/cucdtst/TestMirroring"); 191 addTest(root, &TestUScriptCodeAPI, "tsutil/cucdtst/TestUScriptCodeAPI"); 192 addTest(root, &TestHasScript, "tsutil/cucdtst/TestHasScript"); 193 addTest(root, &TestGetScriptExtensions, "tsutil/cucdtst/TestGetScriptExtensions"); 194 addTest(root, &TestScriptMetadataAPI, "tsutil/cucdtst/TestScriptMetadataAPI"); 195 addTest(root, &TestUScriptRunAPI, "tsutil/cucdtst/TestUScriptRunAPI"); 196 addTest(root, &TestPropertyNames, "tsutil/cucdtst/TestPropertyNames"); 197 addTest(root, &TestPropertyValues, "tsutil/cucdtst/TestPropertyValues"); 198 addTest(root, &TestConsistency, "tsutil/cucdtst/TestConsistency"); 199 addTest(root, &TestUCase, "tsutil/cucdtst/TestUCase"); 200 addTest(root, &TestUBiDiProps, "tsutil/cucdtst/TestUBiDiProps"); 201 addTest(root, &TestCaseFolding, "tsutil/cucdtst/TestCaseFolding"); 202 } 203 204 /*==================================================== */ 205 /* test u_toupper() and u_tolower() */ 206 /*==================================================== */ 207 static void TestUpperLower() 208 { 209 const UChar upper[] = {0x41, 0x42, 0x00b2, 0x01c4, 0x01c6, 0x01c9, 0x01c8, 0x01c9, 0x000c, 0x0000}; 210 const UChar lower[] = {0x61, 0x62, 0x00b2, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01c9, 0x000c, 0x0000}; 211 U_STRING_DECL(upperTest, "abcdefg123hij.?:klmno", 21); 212 U_STRING_DECL(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21); 213 int32_t i; 214 215 U_STRING_INIT(upperTest, "abcdefg123hij.?:klmno", 21); 216 U_STRING_INIT(lowerTest, "ABCDEFG123HIJ.?:KLMNO", 21); 217 218 /* 219 Checks LetterLike Symbols which were previously a source of confusion 220 [Bertrand A. D. 02/04/98] 221 */ 222 for (i=0x2100;i<0x2138;i++) 223 { 224 /* Unicode 5.0 adds lowercase U+214E (TURNED SMALL F) to U+2132 (TURNED CAPITAL F) */ 225 if(i!=0x2126 && i!=0x212a && i!=0x212b && i!=0x2132) 226 { 227 if (i != (int)u_tolower(i)) /* itself */ 228 log_err("Failed case conversion with itself: U+%04x\n", i); 229 if (i != (int)u_toupper(i)) 230 log_err("Failed case conversion with itself: U+%04x\n", i); 231 } 232 } 233 234 for(i=0; i < u_strlen(upper); i++){ 235 if(u_tolower(upper[i]) != lower[i]){ 236 log_err("FAILED u_tolower() for %lx Expected %lx Got %lx\n", upper[i], lower[i], u_tolower(upper[i])); 237 } 238 } 239 240 log_verbose("testing upper lower\n"); 241 for (i = 0; i < 21; i++) { 242 243 if (u_isalpha(upperTest[i]) && !u_islower(upperTest[i])) 244 { 245 log_err("Failed isLowerCase test at %c\n", upperTest[i]); 246 } 247 else if (u_isalpha(lowerTest[i]) && !u_isupper(lowerTest[i])) 248 { 249 log_err("Failed isUpperCase test at %c\n", lowerTest[i]); 250 } 251 else if (upperTest[i] != u_tolower(lowerTest[i])) 252 { 253 log_err("Failed case conversion from %c To %c :\n", lowerTest[i], upperTest[i]); 254 } 255 else if (lowerTest[i] != u_toupper(upperTest[i])) 256 { 257 log_err("Failed case conversion : %c To %c \n", upperTest[i], lowerTest[i]); 258 } 259 else if (upperTest[i] != u_tolower(upperTest[i])) 260 { 261 log_err("Failed case conversion with itself: %c\n", upperTest[i]); 262 } 263 else if (lowerTest[i] != u_toupper(lowerTest[i])) 264 { 265 log_err("Failed case conversion with itself: %c\n", lowerTest[i]); 266 } 267 } 268 log_verbose("done testing upper lower\n"); 269 270 log_verbose("testing u_istitle\n"); 271 { 272 static const UChar expected[] = { 273 0x1F88, 274 0x1F89, 275 0x1F8A, 276 0x1F8B, 277 0x1F8C, 278 0x1F8D, 279 0x1F8E, 280 0x1F8F, 281 0x1F88, 282 0x1F89, 283 0x1F8A, 284 0x1F8B, 285 0x1F8C, 286 0x1F8D, 287 0x1F8E, 288 0x1F8F, 289 0x1F98, 290 0x1F99, 291 0x1F9A, 292 0x1F9B, 293 0x1F9C, 294 0x1F9D, 295 0x1F9E, 296 0x1F9F, 297 0x1F98, 298 0x1F99, 299 0x1F9A, 300 0x1F9B, 301 0x1F9C, 302 0x1F9D, 303 0x1F9E, 304 0x1F9F, 305 0x1FA8, 306 0x1FA9, 307 0x1FAA, 308 0x1FAB, 309 0x1FAC, 310 0x1FAD, 311 0x1FAE, 312 0x1FAF, 313 0x1FA8, 314 0x1FA9, 315 0x1FAA, 316 0x1FAB, 317 0x1FAC, 318 0x1FAD, 319 0x1FAE, 320 0x1FAF, 321 0x1FBC, 322 0x1FBC, 323 0x1FCC, 324 0x1FCC, 325 0x1FFC, 326 0x1FFC, 327 }; 328 int32_t num = UPRV_LENGTHOF(expected); 329 for(i=0; i<num; i++){ 330 if(!u_istitle(expected[i])){ 331 log_err("u_istitle failed for 0x%4X. Expected TRUE, got FALSE\n",expected[i]); 332 } 333 } 334 335 } 336 } 337 338 /* compare two sets and verify that their difference or intersection is empty */ 339 static UBool 340 showADiffB(const USet *a, const USet *b, 341 const char *a_name, const char *b_name, 342 UBool expect, UBool diffIsError) { 343 USet *aa; 344 int32_t i, start, end, length; 345 UErrorCode errorCode; 346 347 /* 348 * expect: 349 * TRUE -> a-b should be empty, that is, b should contain all of a 350 * FALSE -> a&b should be empty, that is, a should contain none of b (and vice versa) 351 */ 352 if(expect ? uset_containsAll(b, a) : uset_containsNone(a, b)) { 353 return TRUE; 354 } 355 356 /* clone a to aa because a is const */ 357 aa=uset_open(1, 0); 358 if(aa==NULL) { 359 /* unusual problem - out of memory? */ 360 return FALSE; 361 } 362 uset_addAll(aa, a); 363 364 /* compute the set in question */ 365 if(expect) { 366 /* a-b */ 367 uset_removeAll(aa, b); 368 } else { 369 /* a&b */ 370 uset_retainAll(aa, b); 371 } 372 373 /* aa is not empty because of the initial tests above; show its contents */ 374 errorCode=U_ZERO_ERROR; 375 i=0; 376 for(;;) { 377 length=uset_getItem(aa, i, &start, &end, NULL, 0, &errorCode); 378 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 379 break; /* done */ 380 } 381 if(U_FAILURE(errorCode)) { 382 log_err("error comparing %s with %s at difference item %d: %s\n", 383 a_name, b_name, i, u_errorName(errorCode)); 384 break; 385 } 386 if(length!=0) { 387 break; /* done with code points, got a string or -1 */ 388 } 389 390 if(diffIsError) { 391 if(expect) { 392 log_err("error: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name); 393 } else { 394 log_err("error: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end); 395 } 396 } else { 397 if(expect) { 398 log_verbose("info: %s contains U+%04x..U+%04x but %s does not\n", a_name, start, end, b_name); 399 } else { 400 log_verbose("info: %s and %s both contain U+%04x..U+%04x but should not intersect\n", a_name, b_name, start, end); 401 } 402 } 403 404 ++i; 405 } 406 407 uset_close(aa); 408 return FALSE; 409 } 410 411 static UBool 412 showAMinusB(const USet *a, const USet *b, 413 const char *a_name, const char *b_name, 414 UBool diffIsError) { 415 return showADiffB(a, b, a_name, b_name, TRUE, diffIsError); 416 } 417 418 static UBool 419 showAIntersectB(const USet *a, const USet *b, 420 const char *a_name, const char *b_name, 421 UBool diffIsError) { 422 return showADiffB(a, b, a_name, b_name, FALSE, diffIsError); 423 } 424 425 static UBool 426 compareUSets(const USet *a, const USet *b, 427 const char *a_name, const char *b_name, 428 UBool diffIsError) { 429 /* 430 * Use an arithmetic & not a logical && so that both branches 431 * are always taken and all differences are shown. 432 */ 433 return 434 showAMinusB(a, b, a_name, b_name, diffIsError) & 435 showAMinusB(b, a, b_name, a_name, diffIsError); 436 } 437 438 /* test isLetter(u_isapha()) and isDigit(u_isdigit()) */ 439 static void TestLetterNumber() 440 { 441 UChar i = 0x0000; 442 443 log_verbose("Testing for isalpha\n"); 444 for (i = 0x0041; i < 0x005B; i++) { 445 if (!u_isalpha(i)) 446 { 447 log_err("Failed isLetter test at %.4X\n", i); 448 } 449 } 450 for (i = 0x0660; i < 0x066A; i++) { 451 if (u_isalpha(i)) 452 { 453 log_err("Failed isLetter test with numbers at %.4X\n", i); 454 } 455 } 456 457 log_verbose("Testing for isdigit\n"); 458 for (i = 0x0660; i < 0x066A; i++) { 459 if (!u_isdigit(i)) 460 { 461 log_verbose("Failed isNumber test at %.4X\n", i); 462 } 463 } 464 465 log_verbose("Testing for isalnum\n"); 466 for (i = 0x0041; i < 0x005B; i++) { 467 if (!u_isalnum(i)) 468 { 469 log_err("Failed isAlNum test at %.4X\n", i); 470 } 471 } 472 for (i = 0x0660; i < 0x066A; i++) { 473 if (!u_isalnum(i)) 474 { 475 log_err("Failed isAlNum test at %.4X\n", i); 476 } 477 } 478 479 { 480 /* 481 * The following checks work only starting from Unicode 4.0. 482 * Check the version number here. 483 */ 484 static UVersionInfo u401={ 4, 0, 1, 0 }; 485 UVersionInfo version; 486 u_getUnicodeVersion(version); 487 if(version[0]<4 || 0==memcmp(version, u401, 4)) { 488 return; 489 } 490 } 491 492 { 493 /* 494 * Sanity check: 495 * Verify that exactly the digit characters have decimal digit values. 496 * This assumption is used in the implementation of u_digit() 497 * (which checks nt=de) 498 * compared with the parallel java.lang.Character.digit() 499 * (which checks Nd). 500 * 501 * This was not true in Unicode 3.2 and earlier. 502 * Unicode 4.0 fixed discrepancies. 503 * Unicode 4.0.1 re-introduced problems in this area due to an 504 * unintentionally incomplete last-minute change. 505 */ 506 U_STRING_DECL(digitsPattern, "[:Nd:]", 6); 507 U_STRING_DECL(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24); 508 509 USet *digits, *decimalValues; 510 UErrorCode errorCode; 511 512 U_STRING_INIT(digitsPattern, "[:Nd:]", 6); 513 U_STRING_INIT(decimalValuesPattern, "[:Numeric_Type=Decimal:]", 24); 514 errorCode=U_ZERO_ERROR; 515 digits=uset_openPattern(digitsPattern, 6, &errorCode); 516 decimalValues=uset_openPattern(decimalValuesPattern, 24, &errorCode); 517 518 if(U_SUCCESS(errorCode)) { 519 compareUSets(digits, decimalValues, "[:Nd:]", "[:Numeric_Type=Decimal:]", TRUE); 520 } 521 522 uset_close(digits); 523 uset_close(decimalValues); 524 } 525 } 526 527 static void testSampleCharProps(UBool propFn(UChar32), const char *propName, 528 const UChar32 *sampleChars, int32_t sampleCharsLength, 529 UBool expected) { 530 int32_t i; 531 for (i = 0; i < sampleCharsLength; ++i) { 532 UBool result = propFn(sampleChars[i]); 533 if (result != expected) { 534 log_err("error: character property function %s(U+%04x)=%d is wrong\n", 535 propName, sampleChars[i], result); 536 } 537 } 538 } 539 540 /* Tests for isDefined(u_isdefined)(, isBaseForm(u_isbase()), isSpaceChar(u_isspace()), isWhiteSpace(), u_CharDigitValue() */ 541 static void TestMisc() 542 { 543 static const UChar32 sampleSpaces[] = {0x0020, 0x00a0, 0x2000, 0x2001, 0x2005}; 544 static const UChar32 sampleNonSpaces[] = {0x61, 0x62, 0x63, 0x64, 0x74}; 545 static const UChar32 sampleUndefined[] = {0xfff1, 0xfff7, 0xfa6e}; 546 static const UChar32 sampleDefined[] = {0x523E, 0x4f88, 0xfffd}; 547 static const UChar32 sampleBase[] = {0x0061, 0x0031, 0x03d2}; 548 static const UChar32 sampleNonBase[] = {0x002B, 0x0020, 0x203B}; 549 /* static const UChar sampleChars[] = {0x000a, 0x0045, 0x4e00, 0xDC00, 0xFFE8, 0xFFF0};*/ 550 static const UChar32 sampleDigits[]= {0x0030, 0x0662, 0x0F23, 0x0ED5}; 551 static const UChar32 sampleNonDigits[] = {0x0010, 0x0041, 0x0122, 0x68FE}; 552 static const UChar32 sampleWhiteSpaces[] = {0x2008, 0x2009, 0x200a, 0x001c, 0x000c}; 553 static const UChar32 sampleNonWhiteSpaces[] = {0x61, 0x62, 0x3c, 0x28, 0x3f, 0x85, 0x2007, 0xffef}; 554 555 static const int32_t sampleDigitValues[] = {0, 2, 3, 5}; 556 557 uint32_t mask; 558 559 int32_t i; 560 char icuVersion[U_MAX_VERSION_STRING_LENGTH]; 561 UVersionInfo realVersion; 562 563 memset(icuVersion, 0, U_MAX_VERSION_STRING_LENGTH); 564 565 testSampleCharProps(u_isspace, "u_isspace", sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE); 566 testSampleCharProps(u_isspace, "u_isspace", sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE); 567 568 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar", 569 sampleSpaces, UPRV_LENGTHOF(sampleSpaces), TRUE); 570 testSampleCharProps(u_isJavaSpaceChar, "u_isJavaSpaceChar", 571 sampleNonSpaces, UPRV_LENGTHOF(sampleNonSpaces), FALSE); 572 573 testSampleCharProps(u_isWhitespace, "u_isWhitespace", 574 sampleWhiteSpaces, UPRV_LENGTHOF(sampleWhiteSpaces), TRUE); 575 testSampleCharProps(u_isWhitespace, "u_isWhitespace", 576 sampleNonWhiteSpaces, UPRV_LENGTHOF(sampleNonWhiteSpaces), FALSE); 577 578 testSampleCharProps(u_isdefined, "u_isdefined", 579 sampleDefined, UPRV_LENGTHOF(sampleDefined), TRUE); 580 testSampleCharProps(u_isdefined, "u_isdefined", 581 sampleUndefined, UPRV_LENGTHOF(sampleUndefined), FALSE); 582 583 testSampleCharProps(u_isbase, "u_isbase", sampleBase, UPRV_LENGTHOF(sampleBase), TRUE); 584 testSampleCharProps(u_isbase, "u_isbase", sampleNonBase, UPRV_LENGTHOF(sampleNonBase), FALSE); 585 586 testSampleCharProps(u_isdigit, "u_isdigit", sampleDigits, UPRV_LENGTHOF(sampleDigits), TRUE); 587 testSampleCharProps(u_isdigit, "u_isdigit", sampleNonDigits, UPRV_LENGTHOF(sampleNonDigits), FALSE); 588 589 for (i = 0; i < UPRV_LENGTHOF(sampleDigits); i++) { 590 if (u_charDigitValue(sampleDigits[i]) != sampleDigitValues[i]) { 591 log_err("error: u_charDigitValue(U+04x)=%d != %d\n", 592 sampleDigits[i], u_charDigitValue(sampleDigits[i]), sampleDigitValues[i]); 593 } 594 } 595 596 /* Tests the ICU version #*/ 597 u_getVersion(realVersion); 598 u_versionToString(realVersion, icuVersion); 599 if (strncmp(icuVersion, U_ICU_VERSION, uprv_min((int32_t)strlen(icuVersion), (int32_t)strlen(U_ICU_VERSION))) != 0) 600 { 601 log_err("ICU version test failed. Header says=%s, got=%s \n", U_ICU_VERSION, icuVersion); 602 } 603 #if defined(ICU_VERSION) 604 /* test only happens where we have configure.in with VERSION - sanity check. */ 605 if(strcmp(U_ICU_VERSION, ICU_VERSION)) 606 { 607 log_err("ICU version mismatch: Header says %s, build environment says %s.\n", U_ICU_VERSION, ICU_VERSION); 608 } 609 #endif 610 611 /* test U_GC_... */ 612 if( 613 U_GET_GC_MASK(0x41)!=U_GC_LU_MASK || 614 U_GET_GC_MASK(0x662)!=U_GC_ND_MASK || 615 U_GET_GC_MASK(0xa0)!=U_GC_ZS_MASK || 616 U_GET_GC_MASK(0x28)!=U_GC_PS_MASK || 617 U_GET_GC_MASK(0x2044)!=U_GC_SM_MASK || 618 U_GET_GC_MASK(0xe0063)!=U_GC_CF_MASK 619 ) { 620 log_err("error: U_GET_GC_MASK does not work properly\n"); 621 } 622 623 mask=0; 624 mask=(mask&~U_GC_CN_MASK)|U_GC_CN_MASK; 625 626 mask=(mask&~U_GC_LU_MASK)|U_GC_LU_MASK; 627 mask=(mask&~U_GC_LL_MASK)|U_GC_LL_MASK; 628 mask=(mask&~U_GC_LT_MASK)|U_GC_LT_MASK; 629 mask=(mask&~U_GC_LM_MASK)|U_GC_LM_MASK; 630 mask=(mask&~U_GC_LO_MASK)|U_GC_LO_MASK; 631 632 mask=(mask&~U_GC_MN_MASK)|U_GC_MN_MASK; 633 mask=(mask&~U_GC_ME_MASK)|U_GC_ME_MASK; 634 mask=(mask&~U_GC_MC_MASK)|U_GC_MC_MASK; 635 636 mask=(mask&~U_GC_ND_MASK)|U_GC_ND_MASK; 637 mask=(mask&~U_GC_NL_MASK)|U_GC_NL_MASK; 638 mask=(mask&~U_GC_NO_MASK)|U_GC_NO_MASK; 639 640 mask=(mask&~U_GC_ZS_MASK)|U_GC_ZS_MASK; 641 mask=(mask&~U_GC_ZL_MASK)|U_GC_ZL_MASK; 642 mask=(mask&~U_GC_ZP_MASK)|U_GC_ZP_MASK; 643 644 mask=(mask&~U_GC_CC_MASK)|U_GC_CC_MASK; 645 mask=(mask&~U_GC_CF_MASK)|U_GC_CF_MASK; 646 mask=(mask&~U_GC_CO_MASK)|U_GC_CO_MASK; 647 mask=(mask&~U_GC_CS_MASK)|U_GC_CS_MASK; 648 649 mask=(mask&~U_GC_PD_MASK)|U_GC_PD_MASK; 650 mask=(mask&~U_GC_PS_MASK)|U_GC_PS_MASK; 651 mask=(mask&~U_GC_PE_MASK)|U_GC_PE_MASK; 652 mask=(mask&~U_GC_PC_MASK)|U_GC_PC_MASK; 653 mask=(mask&~U_GC_PO_MASK)|U_GC_PO_MASK; 654 655 mask=(mask&~U_GC_SM_MASK)|U_GC_SM_MASK; 656 mask=(mask&~U_GC_SC_MASK)|U_GC_SC_MASK; 657 mask=(mask&~U_GC_SK_MASK)|U_GC_SK_MASK; 658 mask=(mask&~U_GC_SO_MASK)|U_GC_SO_MASK; 659 660 mask=(mask&~U_GC_PI_MASK)|U_GC_PI_MASK; 661 mask=(mask&~U_GC_PF_MASK)|U_GC_PF_MASK; 662 663 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) { 664 log_err("error: problems with U_GC_XX_MASK constants\n"); 665 } 666 667 mask=0; 668 mask=(mask&~U_GC_C_MASK)|U_GC_C_MASK; 669 mask=(mask&~U_GC_L_MASK)|U_GC_L_MASK; 670 mask=(mask&~U_GC_M_MASK)|U_GC_M_MASK; 671 mask=(mask&~U_GC_N_MASK)|U_GC_N_MASK; 672 mask=(mask&~U_GC_Z_MASK)|U_GC_Z_MASK; 673 mask=(mask&~U_GC_P_MASK)|U_GC_P_MASK; 674 mask=(mask&~U_GC_S_MASK)|U_GC_S_MASK; 675 676 if(mask!=(U_CHAR_CATEGORY_COUNT<32 ? U_MASK(U_CHAR_CATEGORY_COUNT)-1: 0xffffffff)) { 677 log_err("error: problems with U_GC_Y_MASK constants\n"); 678 } 679 { 680 static const UChar32 digit[10]={ 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039 }; 681 for(i=0; i<10; i++){ 682 if(digit[i]!=u_forDigit(i,10)){ 683 log_err("u_forDigit failed for %i. Expected: 0x%4X Got: 0x%4X\n",i,digit[i],u_forDigit(i,10)); 684 } 685 } 686 } 687 688 /* test u_digit() */ 689 { 690 static const struct { 691 UChar32 c; 692 int8_t radix, value; 693 } data[]={ 694 /* base 16 */ 695 { 0x0031, 16, 1 }, 696 { 0x0038, 16, 8 }, 697 { 0x0043, 16, 12 }, 698 { 0x0066, 16, 15 }, 699 { 0x00e4, 16, -1 }, 700 { 0x0662, 16, 2 }, 701 { 0x06f5, 16, 5 }, 702 { 0xff13, 16, 3 }, 703 { 0xff41, 16, 10 }, 704 705 /* base 8 */ 706 { 0x0031, 8, 1 }, 707 { 0x0038, 8, -1 }, 708 { 0x0043, 8, -1 }, 709 { 0x0066, 8, -1 }, 710 { 0x00e4, 8, -1 }, 711 { 0x0662, 8, 2 }, 712 { 0x06f5, 8, 5 }, 713 { 0xff13, 8, 3 }, 714 { 0xff41, 8, -1 }, 715 716 /* base 36 */ 717 { 0x5a, 36, 35 }, 718 { 0x7a, 36, 35 }, 719 { 0xff3a, 36, 35 }, 720 { 0xff5a, 36, 35 }, 721 722 /* wrong radix values */ 723 { 0x0031, 1, -1 }, 724 { 0xff3a, 37, -1 } 725 }; 726 727 for(i=0; i<UPRV_LENGTHOF(data); ++i) { 728 if(u_digit(data[i].c, data[i].radix)!=data[i].value) { 729 log_err("u_digit(U+%04x, %d)=%d expected %d\n", 730 data[i].c, 731 data[i].radix, 732 u_digit(data[i].c, data[i].radix), 733 data[i].value); 734 } 735 } 736 } 737 } 738 739 /* test C/POSIX-style functions --------------------------------------------- */ 740 741 /* bit flags */ 742 #define ISAL 1 743 #define ISLO 2 744 #define ISUP 4 745 746 #define ISDI 8 747 #define ISXD 0x10 748 749 #define ISAN 0x20 750 751 #define ISPU 0x40 752 #define ISGR 0x80 753 #define ISPR 0x100 754 755 #define ISSP 0x200 756 #define ISBL 0x400 757 #define ISCN 0x800 758 759 /* C/POSIX-style functions, in the same order as the bit flags */ 760 typedef UBool U_EXPORT2 IsPOSIXClass(UChar32 c); 761 762 static const struct { 763 IsPOSIXClass *fn; 764 const char *name; 765 } posixClasses[]={ 766 { u_isalpha, "isalpha" }, 767 { u_islower, "islower" }, 768 { u_isupper, "isupper" }, 769 { u_isdigit, "isdigit" }, 770 { u_isxdigit, "isxdigit" }, 771 { u_isalnum, "isalnum" }, 772 { u_ispunct, "ispunct" }, 773 { u_isgraph, "isgraph" }, 774 { u_isprint, "isprint" }, 775 { u_isspace, "isspace" }, 776 { u_isblank, "isblank" }, 777 { u_iscntrl, "iscntrl" } 778 }; 779 780 static const struct { 781 UChar32 c; 782 uint32_t posixResults; 783 } posixData[]={ 784 { 0x0008, ISCN }, /* backspace */ 785 { 0x0009, ISSP|ISBL|ISCN }, /* TAB */ 786 { 0x000a, ISSP| ISCN }, /* LF */ 787 { 0x000c, ISSP| ISCN }, /* FF */ 788 { 0x000d, ISSP| ISCN }, /* CR */ 789 { 0x0020, ISPR|ISSP|ISBL }, /* space */ 790 { 0x0021, ISPU|ISGR|ISPR }, /* ! */ 791 { 0x0033, ISDI|ISXD|ISAN| ISGR|ISPR }, /* 3 */ 792 { 0x0040, ISPU|ISGR|ISPR }, /* @ */ 793 { 0x0041, ISAL| ISUP| ISXD|ISAN| ISGR|ISPR }, /* A */ 794 { 0x007a, ISAL|ISLO| ISAN| ISGR|ISPR }, /* z */ 795 { 0x007b, ISPU|ISGR|ISPR }, /* { */ 796 { 0x0085, ISSP| ISCN }, /* NEL */ 797 { 0x00a0, ISPR|ISSP|ISBL }, /* NBSP */ 798 { 0x00a4, ISGR|ISPR }, /* currency sign */ 799 { 0x00e4, ISAL|ISLO| ISAN| ISGR|ISPR }, /* a-umlaut */ 800 { 0x0300, ISGR|ISPR }, /* combining grave */ 801 { 0x0600, ISCN }, /* arabic number sign */ 802 { 0x0627, ISAL| ISAN| ISGR|ISPR }, /* alef */ 803 { 0x0663, ISDI|ISXD|ISAN| ISGR|ISPR }, /* arabic 3 */ 804 { 0x2002, ISPR|ISSP|ISBL }, /* en space */ 805 { 0x2007, ISPR|ISSP|ISBL }, /* figure space */ 806 { 0x2009, ISPR|ISSP|ISBL }, /* thin space */ 807 { 0x200b, ISCN }, /* ZWSP */ 808 /*{ 0x200b, ISPR|ISSP },*/ /* ZWSP */ /* ZWSP became a control char in 4.0.1*/ 809 { 0x200e, ISCN }, /* LRM */ 810 { 0x2028, ISPR|ISSP| ISCN }, /* LS */ 811 { 0x2029, ISPR|ISSP| ISCN }, /* PS */ 812 { 0x20ac, ISGR|ISPR }, /* Euro */ 813 { 0xff15, ISDI|ISXD|ISAN| ISGR|ISPR }, /* fullwidth 5 */ 814 { 0xff25, ISAL| ISUP| ISXD|ISAN| ISGR|ISPR }, /* fullwidth E */ 815 { 0xff35, ISAL| ISUP| ISAN| ISGR|ISPR }, /* fullwidth U */ 816 { 0xff45, ISAL|ISLO| ISXD|ISAN| ISGR|ISPR }, /* fullwidth e */ 817 { 0xff55, ISAL|ISLO| ISAN| ISGR|ISPR } /* fullwidth u */ 818 }; 819 820 static void 821 TestPOSIX() { 822 uint32_t mask; 823 int32_t cl, i; 824 UBool expect; 825 826 mask=1; 827 for(cl=0; cl<12; ++cl) { 828 for(i=0; i<UPRV_LENGTHOF(posixData); ++i) { 829 expect=(UBool)((posixData[i].posixResults&mask)!=0); 830 if(posixClasses[cl].fn(posixData[i].c)!=expect) { 831 log_err("u_%s(U+%04x)=%s is wrong\n", 832 posixClasses[cl].name, posixData[i].c, expect ? "FALSE" : "TRUE"); 833 } 834 } 835 mask<<=1; 836 } 837 } 838 839 /* Tests for isControl(u_iscntrl()) and isPrintable(u_isprint()) */ 840 static void TestControlPrint() 841 { 842 const UChar32 sampleControl[] = {0x1b, 0x97, 0x82, 0x2028, 0x2029, 0x200c, 0x202b}; 843 const UChar32 sampleNonControl[] = {0x61, 0x0031, 0x00e2}; 844 const UChar32 samplePrintable[] = {0x0042, 0x005f, 0x2014}; 845 const UChar32 sampleNonPrintable[] = {0x200c, 0x009f, 0x001b}; 846 UChar32 c; 847 848 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleControl, UPRV_LENGTHOF(sampleControl), TRUE); 849 testSampleCharProps(u_iscntrl, "u_iscntrl", sampleNonControl, UPRV_LENGTHOF(sampleNonControl), FALSE); 850 851 testSampleCharProps(u_isprint, "u_isprint", 852 samplePrintable, UPRV_LENGTHOF(samplePrintable), TRUE); 853 testSampleCharProps(u_isprint, "u_isprint", 854 sampleNonPrintable, UPRV_LENGTHOF(sampleNonPrintable), FALSE); 855 856 /* test all ISO 8 controls */ 857 for(c=0; c<=0x9f; ++c) { 858 if(c==0x20) { 859 /* skip ASCII graphic characters and continue with DEL */ 860 c=0x7f; 861 } 862 if(!u_iscntrl(c)) { 863 log_err("error: u_iscntrl(ISO 8 control U+%04x)=FALSE\n", c); 864 } 865 if(!u_isISOControl(c)) { 866 log_err("error: u_isISOControl(ISO 8 control U+%04x)=FALSE\n", c); 867 } 868 if(u_isprint(c)) { 869 log_err("error: u_isprint(ISO 8 control U+%04x)=TRUE\n", c); 870 } 871 } 872 873 /* test all Latin-1 graphic characters */ 874 for(c=0x20; c<=0xff; ++c) { 875 if(c==0x7f) { 876 c=0xa0; 877 } else if(c==0xad) { 878 /* Unicode 4 changes 00AD Soft Hyphen to Cf (and it is in fact not printable) */ 879 ++c; 880 } 881 if(!u_isprint(c)) { 882 log_err("error: u_isprint(Latin-1 graphic character U+%04x)=FALSE\n", c); 883 } 884 } 885 } 886 887 /* u_isJavaIDStart, u_isJavaIDPart, u_isIDStart(), u_isIDPart(), u_isIDIgnorable()*/ 888 static void TestIdentifier() 889 { 890 const UChar32 sampleJavaIDStart[] = {0x0071, 0x00e4, 0x005f}; 891 const UChar32 sampleNonJavaIDStart[] = {0x0020, 0x2030, 0x0082}; 892 const UChar32 sampleJavaIDPart[] = {0x005f, 0x0032, 0x0045}; 893 const UChar32 sampleNonJavaIDPart[] = {0x2030, 0x2020, 0x0020}; 894 const UChar32 sampleUnicodeIDStart[] = {0x0250, 0x00e2, 0x0061}; 895 const UChar32 sampleNonUnicodeIDStart[] = {0x2000, 0x000a, 0x2019}; 896 const UChar32 sampleUnicodeIDPart[] = {0x005f, 0x0032, 0x0045}; 897 const UChar32 sampleNonUnicodeIDPart[] = {0x2030, 0x00a3, 0x0020}; 898 const UChar32 sampleIDIgnore[] = {0x0006, 0x0010, 0x206b, 0x85}; 899 const UChar32 sampleNonIDIgnore[] = {0x0075, 0x00a3, 0x0061}; 900 901 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart", 902 sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE); 903 testSampleCharProps(u_isJavaIDStart, "u_isJavaIDStart", 904 sampleNonJavaIDStart, UPRV_LENGTHOF(sampleNonJavaIDStart), FALSE); 905 906 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart", 907 sampleJavaIDPart, UPRV_LENGTHOF(sampleJavaIDPart), TRUE); 908 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart", 909 sampleNonJavaIDPart, UPRV_LENGTHOF(sampleNonJavaIDPart), FALSE); 910 911 /* IDPart should imply IDStart */ 912 testSampleCharProps(u_isJavaIDPart, "u_isJavaIDPart", 913 sampleJavaIDStart, UPRV_LENGTHOF(sampleJavaIDStart), TRUE); 914 915 testSampleCharProps(u_isIDStart, "u_isIDStart", 916 sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE); 917 testSampleCharProps(u_isIDStart, "u_isIDStart", 918 sampleNonUnicodeIDStart, UPRV_LENGTHOF(sampleNonUnicodeIDStart), FALSE); 919 920 testSampleCharProps(u_isIDPart, "u_isIDPart", 921 sampleUnicodeIDPart, UPRV_LENGTHOF(sampleUnicodeIDPart), TRUE); 922 testSampleCharProps(u_isIDPart, "u_isIDPart", 923 sampleNonUnicodeIDPart, UPRV_LENGTHOF(sampleNonUnicodeIDPart), FALSE); 924 925 /* IDPart should imply IDStart */ 926 testSampleCharProps(u_isIDPart, "u_isIDPart", 927 sampleUnicodeIDStart, UPRV_LENGTHOF(sampleUnicodeIDStart), TRUE); 928 929 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable", 930 sampleIDIgnore, UPRV_LENGTHOF(sampleIDIgnore), TRUE); 931 testSampleCharProps(u_isIDIgnorable, "u_isIDIgnorable", 932 sampleNonIDIgnore, UPRV_LENGTHOF(sampleNonIDIgnore), FALSE); 933 } 934 935 /* for each line of UnicodeData.txt, check some of the properties */ 936 typedef struct UnicodeDataContext { 937 #if UCONFIG_NO_NORMALIZATION 938 const void *dummy; 939 #else 940 const UNormalizer2 *nfc; 941 const UNormalizer2 *nfkc; 942 #endif 943 } UnicodeDataContext; 944 945 /* 946 * ### TODO 947 * This test fails incorrectly if the First or Last code point of a repetitive area 948 * is overridden, which is allowed and is encouraged for the PUAs. 949 * Currently, this means that both area First/Last and override lines are 950 * tested against the properties from the API, 951 * and the area boundary will not match and cause an error. 952 * 953 * This function should detect area boundaries and skip them for the test of individual 954 * code points' properties. 955 * Then it should check that the areas contain all the same properties except where overridden. 956 * For this, it would have had to set a flag for which code points were listed explicitly. 957 */ 958 static void U_CALLCONV 959 unicodeDataLineFn(void *context, 960 char *fields[][2], int32_t fieldCount, 961 UErrorCode *pErrorCode) 962 { 963 char buffer[100]; 964 const char *d; 965 char *end; 966 uint32_t value; 967 UChar32 c; 968 int32_t i; 969 int8_t type; 970 int32_t dt; 971 UChar dm[32], s[32]; 972 int32_t dmLength, length; 973 974 #if !UCONFIG_NO_NORMALIZATION 975 const UNormalizer2 *nfc, *nfkc; 976 #endif 977 978 /* get the character code, field 0 */ 979 c=strtoul(fields[0][0], &end, 16); 980 if(end<=fields[0][0] || end!=fields[0][1]) { 981 log_err("error: syntax error in field 0 at %s\n", fields[0][0]); 982 return; 983 } 984 if((uint32_t)c>=UCHAR_MAX_VALUE + 1) { 985 log_err("error in UnicodeData.txt: code point %lu out of range\n", c); 986 return; 987 } 988 989 /* get general category, field 2 */ 990 *fields[2][1]=0; 991 type = (int8_t)tagValues[MakeProp(fields[2][0])]; 992 if(u_charType(c)!=type) { 993 log_err("error: u_charType(U+%04lx)==%u instead of %u\n", c, u_charType(c), type); 994 } 995 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) { 996 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c); 997 } 998 999 /* get canonical combining class, field 3 */ 1000 value=strtoul(fields[3][0], &end, 10); 1001 if(end<=fields[3][0] || end!=fields[3][1]) { 1002 log_err("error: syntax error in field 3 at code 0x%lx\n", c); 1003 return; 1004 } 1005 if(value>255) { 1006 log_err("error in UnicodeData.txt: combining class %lu out of range\n", value); 1007 return; 1008 } 1009 #if !UCONFIG_NO_NORMALIZATION 1010 if(value!=u_getCombiningClass(c) || value!=(uint32_t)u_getIntPropertyValue(c, UCHAR_CANONICAL_COMBINING_CLASS)) { 1011 log_err("error: u_getCombiningClass(U+%04lx)==%hu instead of %lu\n", c, u_getCombiningClass(c), value); 1012 } 1013 nfkc=((UnicodeDataContext *)context)->nfkc; 1014 if(value!=unorm2_getCombiningClass(nfkc, c)) { 1015 log_err("error: unorm2_getCombiningClass(nfkc, U+%04lx)==%hu instead of %lu\n", c, unorm2_getCombiningClass(nfkc, c), value); 1016 } 1017 #endif 1018 1019 /* get BiDi category, field 4 */ 1020 *fields[4][1]=0; 1021 i=MakeDir(fields[4][0]); 1022 if(i!=u_charDirection(c) || i!=u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)) { 1023 log_err("error: u_charDirection(U+%04lx)==%u instead of %u (%s)\n", c, u_charDirection(c), MakeDir(fields[4][0]), fields[4][0]); 1024 } 1025 1026 /* get Decomposition_Type & Decomposition_Mapping, field 5 */ 1027 d=NULL; 1028 if(fields[5][0]==fields[5][1]) { 1029 /* no decomposition, except UnicodeData.txt omits Hangul syllable decompositions */ 1030 if(c==0xac00 || c==0xd7a3) { 1031 dt=U_DT_CANONICAL; 1032 } else { 1033 dt=U_DT_NONE; 1034 } 1035 } else { 1036 d=fields[5][0]; 1037 *fields[5][1]=0; 1038 dt=UCHAR_INVALID_CODE; 1039 if(*d=='<') { 1040 end=strchr(++d, '>'); 1041 if(end!=NULL) { 1042 *end=0; 1043 dt=u_getPropertyValueEnum(UCHAR_DECOMPOSITION_TYPE, d); 1044 d=u_skipWhitespace(end+1); 1045 } 1046 } else { 1047 dt=U_DT_CANONICAL; 1048 } 1049 } 1050 if(dt>U_DT_NONE) { 1051 if(c==0xac00) { 1052 dm[0]=0x1100; 1053 dm[1]=0x1161; 1054 dm[2]=0; 1055 dmLength=2; 1056 } else if(c==0xd7a3) { 1057 dm[0]=0xd788; 1058 dm[1]=0x11c2; 1059 dm[2]=0; 1060 dmLength=2; 1061 } else { 1062 dmLength=u_parseString(d, dm, 32, NULL, pErrorCode); 1063 } 1064 } else { 1065 dmLength=-1; 1066 } 1067 if(dt<0 || U_FAILURE(*pErrorCode)) { 1068 log_err("error in UnicodeData.txt: syntax error in U+%04lX decomposition field\n", (long)c); 1069 return; 1070 } 1071 #if !UCONFIG_NO_NORMALIZATION 1072 i=u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE); 1073 if(i!=dt) { 1074 log_err("error: u_getIntPropertyValue(U+%04lx, UCHAR_DECOMPOSITION_TYPE)==%d instead of %d\n", c, i, dt); 1075 } 1076 /* Expect Decomposition_Mapping=nfkc.getRawDecomposition(c). */ 1077 length=unorm2_getRawDecomposition(nfkc, c, s, 32, pErrorCode); 1078 if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) { 1079 log_err("error: unorm2_getRawDecomposition(nfkc, U+%04lx)==%d instead of %d " 1080 "or the Decomposition_Mapping is different (%s)\n", 1081 c, length, dmLength, u_errorName(*pErrorCode)); 1082 return; 1083 } 1084 /* For canonical decompositions only, expect Decomposition_Mapping=nfc.getRawDecomposition(c). */ 1085 if(dt!=U_DT_CANONICAL) { 1086 dmLength=-1; 1087 } 1088 nfc=((UnicodeDataContext *)context)->nfc; 1089 length=unorm2_getRawDecomposition(nfc, c, s, 32, pErrorCode); 1090 if(U_FAILURE(*pErrorCode) || length!=dmLength || (length>0 && 0!=u_strcmp(s, dm))) { 1091 log_err("error: unorm2_getRawDecomposition(nfc, U+%04lx)==%d instead of %d " 1092 "or the Decomposition_Mapping is different (%s)\n", 1093 c, length, dmLength, u_errorName(*pErrorCode)); 1094 return; 1095 } 1096 /* recompose */ 1097 if(dt==U_DT_CANONICAL && !u_hasBinaryProperty(c, UCHAR_FULL_COMPOSITION_EXCLUSION)) { 1098 UChar32 a, b, composite; 1099 i=0; 1100 U16_NEXT(dm, i, dmLength, a); 1101 U16_NEXT(dm, i, dmLength, b); 1102 /* i==dmLength */ 1103 composite=unorm2_composePair(nfc, a, b); 1104 if(composite!=c) { 1105 log_err("error: nfc U+%04lX decomposes to U+%04lX+U+%04lX but does not compose back (instead U+%04lX)\n", 1106 (long)c, (long)a, (long)b, (long)composite); 1107 } 1108 /* 1109 * Note: NFKC has fewer round-trip mappings than NFC, 1110 * so we can't just test unorm2_composePair(nfkc, a, b) here without further data. 1111 */ 1112 } 1113 #endif 1114 1115 /* get ISO Comment, field 11 */ 1116 *fields[11][1]=0; 1117 i=u_getISOComment(c, buffer, sizeof(buffer), pErrorCode); 1118 if(U_FAILURE(*pErrorCode) || 0!=strcmp(fields[11][0], buffer)) { 1119 log_err_status(*pErrorCode, "error: u_getISOComment(U+%04lx) wrong (%s): \"%s\" should be \"%s\"\n", 1120 c, u_errorName(*pErrorCode), 1121 U_FAILURE(*pErrorCode) ? buffer : "[error]", 1122 fields[11][0]); 1123 } 1124 1125 /* get uppercase mapping, field 12 */ 1126 if(fields[12][0]!=fields[12][1]) { 1127 value=strtoul(fields[12][0], &end, 16); 1128 if(end!=fields[12][1]) { 1129 log_err("error: syntax error in field 12 at code 0x%lx\n", c); 1130 return; 1131 } 1132 if((UChar32)value!=u_toupper(c)) { 1133 log_err("error: u_toupper(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_toupper(c), value); 1134 } 1135 } else { 1136 /* no case mapping: the API must map the code point to itself */ 1137 if(c!=u_toupper(c)) { 1138 log_err("error: U+%04lx does not have an uppercase mapping but u_toupper()==U+%04lx\n", c, u_toupper(c)); 1139 } 1140 } 1141 1142 /* get lowercase mapping, field 13 */ 1143 if(fields[13][0]!=fields[13][1]) { 1144 value=strtoul(fields[13][0], &end, 16); 1145 if(end!=fields[13][1]) { 1146 log_err("error: syntax error in field 13 at code 0x%lx\n", c); 1147 return; 1148 } 1149 if((UChar32)value!=u_tolower(c)) { 1150 log_err("error: u_tolower(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_tolower(c), value); 1151 } 1152 } else { 1153 /* no case mapping: the API must map the code point to itself */ 1154 if(c!=u_tolower(c)) { 1155 log_err("error: U+%04lx does not have a lowercase mapping but u_tolower()==U+%04lx\n", c, u_tolower(c)); 1156 } 1157 } 1158 1159 /* get titlecase mapping, field 14 */ 1160 if(fields[14][0]!=fields[14][1]) { 1161 value=strtoul(fields[14][0], &end, 16); 1162 if(end!=fields[14][1]) { 1163 log_err("error: syntax error in field 14 at code 0x%lx\n", c); 1164 return; 1165 } 1166 if((UChar32)value!=u_totitle(c)) { 1167 log_err("error: u_totitle(U+%04lx)==U+%04lx instead of U+%04lx\n", c, u_totitle(c), value); 1168 } 1169 } else { 1170 /* no case mapping: the API must map the code point to itself */ 1171 if(c!=u_totitle(c)) { 1172 log_err("error: U+%04lx does not have a titlecase mapping but u_totitle()==U+%04lx\n", c, u_totitle(c)); 1173 } 1174 } 1175 } 1176 1177 static UBool U_CALLCONV 1178 enumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) { 1179 static const UChar32 test[][2]={ 1180 {0x41, U_UPPERCASE_LETTER}, 1181 {0x308, U_NON_SPACING_MARK}, 1182 {0xfffe, U_GENERAL_OTHER_TYPES}, 1183 {0xe0041, U_FORMAT_CHAR}, 1184 {0xeffff, U_UNASSIGNED} 1185 }; 1186 1187 int32_t i, count; 1188 1189 if(0!=strcmp((const char *)context, "a1")) { 1190 log_err("error: u_enumCharTypes() passes on an incorrect context pointer\n"); 1191 return FALSE; 1192 } 1193 1194 count=UPRV_LENGTHOF(test); 1195 for(i=0; i<count; ++i) { 1196 if(start<=test[i][0] && test[i][0]<limit) { 1197 if(type!=(UCharCategory)test[i][1]) { 1198 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld instead of U+%04lx with %ld\n", 1199 start, limit, (long)type, test[i][0], test[i][1]); 1200 } 1201 /* stop at the range that includes the last test code point (increases code coverage for enumeration) */ 1202 return i==(count-1) ? FALSE : TRUE; 1203 } 1204 } 1205 1206 if(start>test[count-1][0]) { 1207 log_err("error: u_enumCharTypes() has range [U+%04lx, U+%04lx[ with %ld after it should have stopped\n", 1208 start, limit, (long)type); 1209 return FALSE; 1210 } 1211 1212 return TRUE; 1213 } 1214 1215 static UBool U_CALLCONV 1216 enumDefaultsRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type) { 1217 /* default Bidi classes for unassigned code points, from the DerivedBidiClass.txt header */ 1218 static const int32_t defaultBidi[][2]={ /* { limit, class } */ 1219 { 0x0590, U_LEFT_TO_RIGHT }, 1220 { 0x0600, U_RIGHT_TO_LEFT }, 1221 { 0x07C0, U_RIGHT_TO_LEFT_ARABIC }, 1222 { 0x08A0, U_RIGHT_TO_LEFT }, 1223 { 0x0900, U_RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+08A0..U+08FF from R to AL */ 1224 { 0x20A0, U_LEFT_TO_RIGHT }, 1225 { 0x20D0, U_EUROPEAN_NUMBER_TERMINATOR }, /* Unicode 6.3 changes the currency symbols block U+20A0..U+20CF to default to ET not L */ 1226 { 0xFB1D, U_LEFT_TO_RIGHT }, 1227 { 0xFB50, U_RIGHT_TO_LEFT }, 1228 { 0xFE00, U_RIGHT_TO_LEFT_ARABIC }, 1229 { 0xFE70, U_LEFT_TO_RIGHT }, 1230 { 0xFF00, U_RIGHT_TO_LEFT_ARABIC }, 1231 { 0x10800, U_LEFT_TO_RIGHT }, 1232 { 0x11000, U_RIGHT_TO_LEFT }, 1233 { 0x1E800, U_LEFT_TO_RIGHT }, /* new default-R range in Unicode 5.2: U+1E800 - U+1EFFF */ 1234 { 0x1EE00, U_RIGHT_TO_LEFT }, 1235 { 0x1EF00, U_RIGHT_TO_LEFT_ARABIC }, /* Unicode 6.1 changes U+1EE00..U+1EEFF from R to AL */ 1236 { 0x1F000, U_RIGHT_TO_LEFT }, 1237 { 0x110000, U_LEFT_TO_RIGHT } 1238 }; 1239 1240 UChar32 c; 1241 int32_t i; 1242 UCharDirection shouldBeDir; 1243 1244 /* 1245 * LineBreak.txt specifies: 1246 * # - Assigned characters that are not listed explicitly are given the value 1247 * # "AL". 1248 * # - Unassigned characters are given the value "XX". 1249 * 1250 * PUA characters are listed explicitly with "XX". 1251 * Verify that no assigned character has "XX". 1252 */ 1253 if(type!=U_UNASSIGNED && type!=U_PRIVATE_USE_CHAR) { 1254 c=start; 1255 while(c<limit) { 1256 if(0==u_getIntPropertyValue(c, UCHAR_LINE_BREAK)) { 1257 log_err("error UCHAR_LINE_BREAK(assigned U+%04lx)=XX\n", c); 1258 } 1259 ++c; 1260 } 1261 } 1262 1263 /* 1264 * Verify default Bidi classes. 1265 * For recent Unicode versions, see UCD.html. 1266 * 1267 * For older Unicode versions: 1268 * See table 3-7 "Bidirectional Character Types" in UAX #9. 1269 * http://www.unicode.org/reports/tr9/ 1270 * 1271 * See also DerivedBidiClass.txt for Cn code points! 1272 * 1273 * Unicode 4.0.1/Public Review Issue #28 (http://www.unicode.org/review/resolved-pri.html) 1274 * changed some default values. 1275 * In particular, non-characters and unassigned Default Ignorable Code Points 1276 * change from L to BN. 1277 * 1278 * UCD.html version 4.0.1 does not yet reflect these changes. 1279 */ 1280 if(type==U_UNASSIGNED || type==U_PRIVATE_USE_CHAR) { 1281 /* enumerate the intersections of defaultBidi ranges with [start..limit[ */ 1282 c=start; 1283 for(i=0; i<UPRV_LENGTHOF(defaultBidi) && c<limit; ++i) { 1284 if((int32_t)c<defaultBidi[i][0]) { 1285 while(c<limit && (int32_t)c<defaultBidi[i][0]) { 1286 if(U_IS_UNICODE_NONCHAR(c) || u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) { 1287 shouldBeDir=U_BOUNDARY_NEUTRAL; 1288 } else { 1289 shouldBeDir=(UCharDirection)defaultBidi[i][1]; 1290 } 1291 1292 if( u_charDirection(c)!=shouldBeDir || 1293 u_getIntPropertyValue(c, UCHAR_BIDI_CLASS)!=shouldBeDir 1294 ) { 1295 log_err("error: u_charDirection(unassigned/PUA U+%04lx)=%s should be %s\n", 1296 c, dirStrings[u_charDirection(c)], dirStrings[shouldBeDir]); 1297 } 1298 ++c; 1299 } 1300 } 1301 } 1302 } 1303 1304 return TRUE; 1305 } 1306 1307 /* tests for several properties */ 1308 static void TestUnicodeData() 1309 { 1310 UVersionInfo expectVersionArray; 1311 UVersionInfo versionArray; 1312 char *fields[15][2]; 1313 UErrorCode errorCode; 1314 UChar32 c; 1315 int8_t type; 1316 1317 UnicodeDataContext context; 1318 1319 u_versionFromString(expectVersionArray, U_UNICODE_VERSION); 1320 u_getUnicodeVersion(versionArray); 1321 if(memcmp(versionArray, expectVersionArray, U_MAX_VERSION_LENGTH) != 0) 1322 { 1323 log_err("Testing u_getUnicodeVersion() - expected " U_UNICODE_VERSION " got %d.%d.%d.%d\n", 1324 versionArray[0], versionArray[1], versionArray[2], versionArray[3]); 1325 } 1326 1327 #if defined(ICU_UNICODE_VERSION) 1328 /* test only happens where we have configure.in with UNICODE_VERSION - sanity check. */ 1329 if(strcmp(U_UNICODE_VERSION, ICU_UNICODE_VERSION)) 1330 { 1331 log_err("Testing configure.in's ICU_UNICODE_VERSION - expected " U_UNICODE_VERSION " got " ICU_UNICODE_VERSION "\n"); 1332 } 1333 #endif 1334 1335 if (ublock_getCode((UChar)0x0041) != UBLOCK_BASIC_LATIN || u_getIntPropertyValue(0x41, UCHAR_BLOCK)!=(int32_t)UBLOCK_BASIC_LATIN) { 1336 log_err("ublock_getCode(U+0041) property failed! Expected : %i Got: %i \n", UBLOCK_BASIC_LATIN,ublock_getCode((UChar)0x0041)); 1337 } 1338 1339 errorCode=U_ZERO_ERROR; 1340 #if !UCONFIG_NO_NORMALIZATION 1341 context.nfc=unorm2_getNFCInstance(&errorCode); 1342 context.nfkc=unorm2_getNFKCInstance(&errorCode); 1343 if(U_FAILURE(errorCode)) { 1344 log_data_err("error: unable to open an NFC or NFKC UNormalizer2 - %s\n", u_errorName(errorCode)); 1345 return; 1346 } 1347 #endif 1348 parseUCDFile("UnicodeData.txt", fields, 15, unicodeDataLineFn, &context, &errorCode); 1349 if(U_FAILURE(errorCode)) { 1350 return; /* if we couldn't parse UnicodeData.txt, we should return */ 1351 } 1352 1353 /* sanity check on repeated properties */ 1354 for(c=0xfffe; c<=0x10ffff;) { 1355 type=u_charType(c); 1356 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) { 1357 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c); 1358 } 1359 if(type!=U_UNASSIGNED) { 1360 log_err("error: u_charType(U+%04lx)!=U_UNASSIGNED (returns %d)\n", c, u_charType(c)); 1361 } 1362 if((c&0xffff)==0xfffe) { 1363 ++c; 1364 } else { 1365 c+=0xffff; 1366 } 1367 } 1368 1369 /* test that PUA is not "unassigned" */ 1370 for(c=0xe000; c<=0x10fffd;) { 1371 type=u_charType(c); 1372 if((uint32_t)u_getIntPropertyValue(c, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(type)) { 1373 log_err("error: (uint32_t)u_getIntPropertyValue(U+%04lx, UCHAR_GENERAL_CATEGORY_MASK)!=U_MASK(u_charType())\n", c); 1374 } 1375 if(type==U_UNASSIGNED) { 1376 log_err("error: u_charType(U+%04lx)==U_UNASSIGNED\n", c); 1377 } else if(type!=U_PRIVATE_USE_CHAR) { 1378 log_verbose("PUA override: u_charType(U+%04lx)=%d\n", c, type); 1379 } 1380 if(c==0xf8ff) { 1381 c=0xf0000; 1382 } else if(c==0xffffd) { 1383 c=0x100000; 1384 } else { 1385 ++c; 1386 } 1387 } 1388 1389 /* test u_enumCharTypes() */ 1390 u_enumCharTypes(enumTypeRange, "a1"); 1391 1392 /* check default properties */ 1393 u_enumCharTypes(enumDefaultsRange, NULL); 1394 } 1395 1396 static void TestCodeUnit(){ 1397 const UChar codeunit[]={0x0000,0xe065,0x20ac,0xd7ff,0xd800,0xd841,0xd905,0xdbff,0xdc00,0xdc02,0xddee,0xdfff,0}; 1398 1399 int32_t i; 1400 1401 for(i=0; i<UPRV_LENGTHOF(codeunit); i++){ 1402 UChar c=codeunit[i]; 1403 if(i<4){ 1404 if(!(UTF_IS_SINGLE(c)) || (UTF_IS_LEAD(c)) || (UTF_IS_TRAIL(c)) ||(UTF_IS_SURROGATE(c))){ 1405 log_err("ERROR: U+%04x is a single", c); 1406 } 1407 1408 } 1409 if(i >= 4 && i< 8){ 1410 if(!(UTF_IS_LEAD(c)) || UTF_IS_SINGLE(c) || UTF_IS_TRAIL(c) || !(UTF_IS_SURROGATE(c))){ 1411 log_err("ERROR: U+%04x is a first surrogate", c); 1412 } 1413 } 1414 if(i >= 8 && i< 12){ 1415 if(!(UTF_IS_TRAIL(c)) || UTF_IS_SINGLE(c) || UTF_IS_LEAD(c) || !(UTF_IS_SURROGATE(c))){ 1416 log_err("ERROR: U+%04x is a second surrogate", c); 1417 } 1418 } 1419 } 1420 1421 } 1422 1423 static void TestCodePoint(){ 1424 const UChar32 codePoint[]={ 1425 /*surrogate, notvalid(codepoint), not a UnicodeChar, not Error */ 1426 0xd800, 1427 0xdbff, 1428 0xdc00, 1429 0xdfff, 1430 0xdc04, 1431 0xd821, 1432 /*not a surrogate, valid, isUnicodeChar , not Error*/ 1433 0x20ac, 1434 0xd7ff, 1435 0xe000, 1436 0xe123, 1437 0x0061, 1438 0xe065, 1439 0x20402, 1440 0x24506, 1441 0x23456, 1442 0x20402, 1443 0x10402, 1444 0x23456, 1445 /*not a surrogate, not valid, isUnicodeChar, isError */ 1446 0x0015, 1447 0x009f, 1448 /*not a surrogate, not valid, not isUnicodeChar, isError */ 1449 0xffff, 1450 0xfffe, 1451 }; 1452 int32_t i; 1453 for(i=0; i<UPRV_LENGTHOF(codePoint); i++){ 1454 UChar32 c=codePoint[i]; 1455 if(i<6){ 1456 if(!UTF_IS_SURROGATE(c) || !U_IS_SURROGATE(c) || !U16_IS_SURROGATE(c)){ 1457 log_err("ERROR: isSurrogate() failed for U+%04x\n", c); 1458 } 1459 if(UTF_IS_VALID(c)){ 1460 log_err("ERROR: isValid() failed for U+%04x\n", c); 1461 } 1462 if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){ 1463 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c); 1464 } 1465 if(UTF_IS_ERROR(c)){ 1466 log_err("ERROR: isError() failed for U+%04x\n", c); 1467 } 1468 }else if(i >=6 && i<18){ 1469 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){ 1470 log_err("ERROR: isSurrogate() failed for U+%04x\n", c); 1471 } 1472 if(!UTF_IS_VALID(c)){ 1473 log_err("ERROR: isValid() failed for U+%04x\n", c); 1474 } 1475 if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){ 1476 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c); 1477 } 1478 if(UTF_IS_ERROR(c)){ 1479 log_err("ERROR: isError() failed for U+%04x\n", c); 1480 } 1481 }else if(i >=18 && i<20){ 1482 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){ 1483 log_err("ERROR: isSurrogate() failed for U+%04x\n", c); 1484 } 1485 if(UTF_IS_VALID(c)){ 1486 log_err("ERROR: isValid() failed for U+%04x\n", c); 1487 } 1488 if(!UTF_IS_UNICODE_CHAR(c) || !U_IS_UNICODE_CHAR(c)){ 1489 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c); 1490 } 1491 if(!UTF_IS_ERROR(c)){ 1492 log_err("ERROR: isError() failed for U+%04x\n", c); 1493 } 1494 } 1495 else if(i >=18 && i<UPRV_LENGTHOF(codePoint)){ 1496 if(UTF_IS_SURROGATE(c) || U_IS_SURROGATE(c) || U16_IS_SURROGATE(c)){ 1497 log_err("ERROR: isSurrogate() failed for U+%04x\n", c); 1498 } 1499 if(UTF_IS_VALID(c)){ 1500 log_err("ERROR: isValid() failed for U+%04x\n", c); 1501 } 1502 if(UTF_IS_UNICODE_CHAR(c) || U_IS_UNICODE_CHAR(c)){ 1503 log_err("ERROR: isUnicodeChar() failed for U+%04x\n", c); 1504 } 1505 if(!UTF_IS_ERROR(c)){ 1506 log_err("ERROR: isError() failed for U+%04x\n", c); 1507 } 1508 } 1509 } 1510 1511 if( 1512 !U_IS_BMP(0) || !U_IS_BMP(0x61) || !U_IS_BMP(0x20ac) || 1513 !U_IS_BMP(0xd9da) || !U_IS_BMP(0xdfed) || !U_IS_BMP(0xffff) || 1514 U_IS_BMP(U_SENTINEL) || U_IS_BMP(0x10000) || U_IS_BMP(0x50005) || 1515 U_IS_BMP(0x10ffff) || U_IS_BMP(0x110000) || U_IS_BMP(0x7fffffff) 1516 ) { 1517 log_err("error with U_IS_BMP()\n"); 1518 } 1519 1520 if( 1521 U_IS_SUPPLEMENTARY(0) || U_IS_SUPPLEMENTARY(0x61) || U_IS_SUPPLEMENTARY(0x20ac) || 1522 U_IS_SUPPLEMENTARY(0xd9da) || U_IS_SUPPLEMENTARY(0xdfed) || U_IS_SUPPLEMENTARY(0xffff) || 1523 U_IS_SUPPLEMENTARY(U_SENTINEL) || !U_IS_SUPPLEMENTARY(0x10000) || !U_IS_SUPPLEMENTARY(0x50005) || 1524 !U_IS_SUPPLEMENTARY(0x10ffff) || U_IS_SUPPLEMENTARY(0x110000) || U_IS_SUPPLEMENTARY(0x7fffffff) 1525 ) { 1526 log_err("error with U_IS_SUPPLEMENTARY()\n"); 1527 } 1528 } 1529 1530 static void TestCharLength() 1531 { 1532 const int32_t codepoint[]={ 1533 1, 0x0061, 1534 1, 0xe065, 1535 1, 0x20ac, 1536 2, 0x20402, 1537 2, 0x23456, 1538 2, 0x24506, 1539 2, 0x20402, 1540 2, 0x10402, 1541 1, 0xd7ff, 1542 1, 0xe000 1543 }; 1544 1545 int32_t i; 1546 UBool multiple; 1547 for(i=0; i<UPRV_LENGTHOF(codepoint); i=(int16_t)(i+2)){ 1548 UChar32 c=codepoint[i+1]; 1549 if(UTF_CHAR_LENGTH(c) != codepoint[i] || U16_LENGTH(c) != codepoint[i]){ 1550 log_err("The no: of code units for U+%04x:- Expected: %d Got: %d\n", c, codepoint[i], U16_LENGTH(c)); 1551 } 1552 multiple=(UBool)(codepoint[i] == 1 ? FALSE : TRUE); 1553 if(UTF_NEED_MULTIPLE_UCHAR(c) != multiple){ 1554 log_err("ERROR: Unicode::needMultipleUChar() failed for U+%04x\n", c); 1555 } 1556 } 1557 } 1558 1559 /*internal functions ----*/ 1560 static int32_t MakeProp(char* str) 1561 { 1562 int32_t result = 0; 1563 char* matchPosition =0; 1564 1565 matchPosition = strstr(tagStrings, str); 1566 if (matchPosition == 0) 1567 { 1568 log_err("unrecognized type letter "); 1569 log_err(str); 1570 } 1571 else 1572 result = (int32_t)((matchPosition - tagStrings) / 2); 1573 return result; 1574 } 1575 1576 static int32_t MakeDir(char* str) 1577 { 1578 int32_t pos = 0; 1579 for (pos = 0; pos < U_CHAR_DIRECTION_COUNT; pos++) { 1580 if (strcmp(str, dirStrings[pos]) == 0) { 1581 return pos; 1582 } 1583 } 1584 return -1; 1585 } 1586 1587 /* test u_charName() -------------------------------------------------------- */ 1588 1589 static const struct { 1590 uint32_t code; 1591 const char *name, *oldName, *extName, *alias; 1592 } names[]={ 1593 {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"}, 1594 {0x01a2, "LATIN CAPITAL LETTER OI", "", 1595 "LATIN CAPITAL LETTER OI", 1596 "LATIN CAPITAL LETTER GHA"}, 1597 {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "", 1598 "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK" }, 1599 {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "", 1600 "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", 1601 "TIBETAN MARK BKA- SHOG GI MGO RGYAN"}, 1602 {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401" }, 1603 {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED" }, 1604 {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA" }, 1605 {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH" }, 1606 {0xd800, "", "", "<lead surrogate-D800>" }, 1607 {0xdc00, "", "", "<trail surrogate-DC00>" }, 1608 {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS" }, 1609 {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN" }, 1610 {0xffff, "", "", "<noncharacter-FFFF>" }, 1611 {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "", 1612 "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", 1613 "BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS"}, 1614 {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456" } 1615 }; 1616 1617 static UBool 1618 enumCharNamesFn(void *context, 1619 UChar32 code, UCharNameChoice nameChoice, 1620 const char *name, int32_t length) { 1621 int32_t *pCount=(int32_t *)context; 1622 const char *expected; 1623 int i; 1624 1625 if(length<=0 || length!=(int32_t)strlen(name)) { 1626 /* should not be called with an empty string or invalid length */ 1627 log_err("u_enumCharName(0x%lx)=%s but length=%ld\n", name, length); 1628 return TRUE; 1629 } 1630 1631 ++*pCount; 1632 for(i=0; i<UPRV_LENGTHOF(names); ++i) { 1633 if(code==(UChar32)names[i].code) { 1634 switch (nameChoice) { 1635 case U_EXTENDED_CHAR_NAME: 1636 if(0!=strcmp(name, names[i].extName)) { 1637 log_err("u_enumCharName(0x%lx - Extended)=%s instead of %s\n", code, name, names[i].extName); 1638 } 1639 break; 1640 case U_UNICODE_CHAR_NAME: 1641 if(0!=strcmp(name, names[i].name)) { 1642 log_err("u_enumCharName(0x%lx)=%s instead of %s\n", code, name, names[i].name); 1643 } 1644 break; 1645 case U_UNICODE_10_CHAR_NAME: 1646 expected=names[i].oldName; 1647 if(expected[0]==0 || 0!=strcmp(name, expected)) { 1648 log_err("u_enumCharName(0x%lx - 1.0)=%s instead of %s\n", code, name, expected); 1649 } 1650 break; 1651 case U_CHAR_NAME_ALIAS: 1652 expected=names[i].alias; 1653 if(expected==NULL || expected[0]==0 || 0!=strcmp(name, expected)) { 1654 log_err("u_enumCharName(0x%lx - alias)=%s instead of %s\n", code, name, expected); 1655 } 1656 break; 1657 case U_CHAR_NAME_CHOICE_COUNT: 1658 break; 1659 } 1660 break; 1661 } 1662 } 1663 return TRUE; 1664 } 1665 1666 struct enumExtCharNamesContext { 1667 uint32_t length; 1668 int32_t last; 1669 }; 1670 1671 static UBool 1672 enumExtCharNamesFn(void *context, 1673 UChar32 code, UCharNameChoice nameChoice, 1674 const char *name, int32_t length) { 1675 struct enumExtCharNamesContext *ecncp = (struct enumExtCharNamesContext *) context; 1676 1677 if (ecncp->last != (int32_t) code - 1) { 1678 if (ecncp->last < 0) { 1679 log_err("u_enumCharName(0x%lx - Ext) after u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x%lx - Ext)\n", code, ecncp->last, ecncp->last + 1); 1680 } else { 1681 log_err("u_enumCharName(0x%lx - Ext) instead of u_enumCharName(0x0 - Ext)\n", code); 1682 } 1683 } 1684 ecncp->last = (int32_t) code; 1685 1686 if (!*name) { 1687 log_err("u_enumCharName(0x%lx - Ext) should not be an empty string\n", code); 1688 } 1689 1690 return enumCharNamesFn(&ecncp->length, code, nameChoice, name, length); 1691 } 1692 1693 /** 1694 * This can be made more efficient by moving it into putil.c and having 1695 * it directly access the ebcdic translation tables. 1696 * TODO: If we get this method in putil.c, then delete it from here. 1697 */ 1698 static UChar 1699 u_charToUChar(char c) { 1700 UChar uc; 1701 u_charsToUChars(&c, &uc, 1); 1702 return uc; 1703 } 1704 1705 static void 1706 TestCharNames() { 1707 static char name[80]; 1708 UErrorCode errorCode=U_ZERO_ERROR; 1709 struct enumExtCharNamesContext extContext; 1710 const char *expected; 1711 int32_t length; 1712 UChar32 c; 1713 int32_t i; 1714 1715 log_verbose("Testing uprv_getMaxCharNameLength()\n"); 1716 length=uprv_getMaxCharNameLength(); 1717 if(length==0) { 1718 /* no names data available */ 1719 return; 1720 } 1721 if(length<83) { /* Unicode 3.2 max char name length */ 1722 log_err("uprv_getMaxCharNameLength()=%d is too short"); 1723 } 1724 /* ### TODO same tests for max ISO comment length as for max name length */ 1725 1726 log_verbose("Testing u_charName()\n"); 1727 for(i=0; i<UPRV_LENGTHOF(names); ++i) { 1728 /* modern Unicode character name */ 1729 length=u_charName(names[i].code, U_UNICODE_CHAR_NAME, name, sizeof(name), &errorCode); 1730 if(U_FAILURE(errorCode)) { 1731 log_err("u_charName(0x%lx) error %s\n", names[i].code, u_errorName(errorCode)); 1732 return; 1733 } 1734 if(length<0 || 0!=strcmp(name, names[i].name) || length!=(uint16_t)strlen(name)) { 1735 log_err("u_charName(0x%lx) gets: %s (length %ld) instead of: %s\n", names[i].code, name, length, names[i].name); 1736 } 1737 1738 /* find the modern name */ 1739 if (*names[i].name) { 1740 c=u_charFromName(U_UNICODE_CHAR_NAME, names[i].name, &errorCode); 1741 if(U_FAILURE(errorCode)) { 1742 log_err("u_charFromName(%s) error %s\n", names[i].name, u_errorName(errorCode)); 1743 return; 1744 } 1745 if(c!=(UChar32)names[i].code) { 1746 log_err("u_charFromName(%s) gets 0x%lx instead of 0x%lx\n", names[i].name, c, names[i].code); 1747 } 1748 } 1749 1750 /* Unicode 1.0 character name */ 1751 length=u_charName(names[i].code, U_UNICODE_10_CHAR_NAME, name, sizeof(name), &errorCode); 1752 if(U_FAILURE(errorCode)) { 1753 log_err("u_charName(0x%lx - 1.0) error %s\n", names[i].code, u_errorName(errorCode)); 1754 return; 1755 } 1756 if(length<0 || (length>0 && 0!=strcmp(name, names[i].oldName)) || length!=(uint16_t)strlen(name)) { 1757 log_err("u_charName(0x%lx - 1.0) gets %s length %ld instead of nothing or %s\n", names[i].code, name, length, names[i].oldName); 1758 } 1759 1760 /* find the Unicode 1.0 name if it is stored (length>0 means that we could read it) */ 1761 if(names[i].oldName[0]!=0 /* && length>0 */) { 1762 c=u_charFromName(U_UNICODE_10_CHAR_NAME, names[i].oldName, &errorCode); 1763 if(U_FAILURE(errorCode)) { 1764 log_err("u_charFromName(%s - 1.0) error %s\n", names[i].oldName, u_errorName(errorCode)); 1765 return; 1766 } 1767 if(c!=(UChar32)names[i].code) { 1768 log_err("u_charFromName(%s - 1.0) gets 0x%lx instead of 0x%lx\n", names[i].oldName, c, names[i].code); 1769 } 1770 } 1771 1772 /* Unicode character name alias */ 1773 length=u_charName(names[i].code, U_CHAR_NAME_ALIAS, name, sizeof(name), &errorCode); 1774 if(U_FAILURE(errorCode)) { 1775 log_err("u_charName(0x%lx - alias) error %s\n", names[i].code, u_errorName(errorCode)); 1776 return; 1777 } 1778 expected=names[i].alias; 1779 if(expected==NULL) { 1780 expected=""; 1781 } 1782 if(length<0 || (length>0 && 0!=strcmp(name, expected)) || length!=(uint16_t)strlen(name)) { 1783 log_err("u_charName(0x%lx - alias) gets %s length %ld instead of nothing or %s\n", 1784 names[i].code, name, length, expected); 1785 } 1786 1787 /* find the Unicode character name alias if it is stored (length>0 means that we could read it) */ 1788 if(expected[0]!=0 /* && length>0 */) { 1789 c=u_charFromName(U_CHAR_NAME_ALIAS, expected, &errorCode); 1790 if(U_FAILURE(errorCode)) { 1791 log_err("u_charFromName(%s - alias) error %s\n", 1792 expected, u_errorName(errorCode)); 1793 return; 1794 } 1795 if(c!=(UChar32)names[i].code) { 1796 log_err("u_charFromName(%s - alias) gets 0x%lx instead of 0x%lx\n", 1797 expected, c, names[i].code); 1798 } 1799 } 1800 } 1801 1802 /* test u_enumCharNames() */ 1803 length=0; 1804 errorCode=U_ZERO_ERROR; 1805 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumCharNamesFn, &length, U_UNICODE_CHAR_NAME, &errorCode); 1806 if(U_FAILURE(errorCode) || length<94140) { 1807 log_err("u_enumCharNames(%ld..%lx) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE, u_errorName(errorCode), length); 1808 } 1809 1810 extContext.length = 0; 1811 extContext.last = -1; 1812 errorCode=U_ZERO_ERROR; 1813 u_enumCharNames(UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, enumExtCharNamesFn, &extContext, U_EXTENDED_CHAR_NAME, &errorCode); 1814 if(U_FAILURE(errorCode) || extContext.length<UCHAR_MAX_VALUE + 1) { 1815 log_err("u_enumCharNames(%ld..0x%lx - Extended) error %s names count=%ld\n", UCHAR_MIN_VALUE, UCHAR_MAX_VALUE + 1, u_errorName(errorCode), extContext.length); 1816 } 1817 1818 /* test that u_charFromName() uppercases the input name, i.e., works with mixed-case names (new in 2.0) */ 1819 if(0x61!=u_charFromName(U_UNICODE_CHAR_NAME, "LATin smALl letTER A", &errorCode)) { 1820 log_err("u_charFromName(U_UNICODE_CHAR_NAME, \"LATin smALl letTER A\") did not find U+0061 (%s)\n", u_errorName(errorCode)); 1821 } 1822 1823 /* Test getCharNameCharacters */ 1824 if(!getTestOption(QUICK_OPTION)) { 1825 enum { BUFSIZE = 256 }; 1826 UErrorCode ec = U_ZERO_ERROR; 1827 char buf[BUFSIZE]; 1828 int32_t maxLength; 1829 UChar32 cp; 1830 UChar pat[BUFSIZE], dumbPat[BUFSIZE]; 1831 int32_t l1, l2; 1832 UBool map[256]; 1833 UBool ok; 1834 1835 USet* set = uset_open(1, 0); /* empty set */ 1836 USet* dumb = uset_open(1, 0); /* empty set */ 1837 1838 /* 1839 * uprv_getCharNameCharacters() will likely return more lowercase 1840 * letters than actual character names contain because 1841 * it includes all the characters in lowercased names of 1842 * general categories, for the full possible set of extended names. 1843 */ 1844 { 1845 USetAdder sa={ 1846 NULL, 1847 uset_add, 1848 uset_addRange, 1849 uset_addString, 1850 NULL /* don't need remove() */ 1851 }; 1852 sa.set=set; 1853 uprv_getCharNameCharacters(&sa); 1854 } 1855 1856 /* build set the dumb (but sure-fire) way */ 1857 for (i=0; i<256; ++i) { 1858 map[i] = FALSE; 1859 } 1860 1861 maxLength=0; 1862 for (cp=0; cp<0x110000; ++cp) { 1863 int32_t len = u_charName(cp, U_EXTENDED_CHAR_NAME, 1864 buf, BUFSIZE, &ec); 1865 if (U_FAILURE(ec)) { 1866 log_err("FAIL: u_charName failed when it shouldn't\n"); 1867 uset_close(set); 1868 uset_close(dumb); 1869 return; 1870 } 1871 if(len>maxLength) { 1872 maxLength=len; 1873 } 1874 1875 for (i=0; i<len; ++i) { 1876 if (!map[(uint8_t) buf[i]]) { 1877 uset_add(dumb, (UChar32)u_charToUChar(buf[i])); 1878 map[(uint8_t) buf[i]] = TRUE; 1879 } 1880 } 1881 1882 /* test for leading/trailing whitespace */ 1883 if(buf[0]==' ' || buf[0]=='\t' || buf[len-1]==' ' || buf[len-1]=='\t') { 1884 log_err("u_charName(U+%04x) returns a name with leading or trailing whitespace\n", cp); 1885 } 1886 } 1887 1888 if(map[(uint8_t)'\t']) { 1889 log_err("u_charName() returned a name with a TAB for some code point\n", cp); 1890 } 1891 1892 length=uprv_getMaxCharNameLength(); 1893 if(length!=maxLength) { 1894 log_err("uprv_getMaxCharNameLength()=%d differs from the maximum length %d of all extended names\n", 1895 length, maxLength); 1896 } 1897 1898 /* compare the sets. Where is my uset_equals?!! */ 1899 ok=TRUE; 1900 for(i=0; i<256; ++i) { 1901 if(uset_contains(set, i)!=uset_contains(dumb, i)) { 1902 if(0x61<=i && i<=0x7a /* a-z */ && uset_contains(set, i) && !uset_contains(dumb, i)) { 1903 /* ignore lowercase a-z that are in set but not in dumb */ 1904 ok=TRUE; 1905 } else { 1906 ok=FALSE; 1907 break; 1908 } 1909 } 1910 } 1911 1912 l1 = uset_toPattern(set, pat, BUFSIZE, TRUE, &ec); 1913 l2 = uset_toPattern(dumb, dumbPat, BUFSIZE, TRUE, &ec); 1914 if (U_FAILURE(ec)) { 1915 log_err("FAIL: uset_toPattern failed when it shouldn't\n"); 1916 uset_close(set); 1917 uset_close(dumb); 1918 return; 1919 } 1920 1921 if (l1 >= BUFSIZE) { 1922 l1 = BUFSIZE-1; 1923 pat[l1] = 0; 1924 } 1925 if (l2 >= BUFSIZE) { 1926 l2 = BUFSIZE-1; 1927 dumbPat[l2] = 0; 1928 } 1929 1930 if (!ok) { 1931 log_err("FAIL: uprv_getCharNameCharacters() returned %s, expected %s (too many lowercase a-z are ok)\n", 1932 aescstrdup(pat, l1), aescstrdup(dumbPat, l2)); 1933 } else if(getTestOption(VERBOSITY_OPTION)) { 1934 log_verbose("Ok: uprv_getCharNameCharacters() returned %s\n", aescstrdup(pat, l1)); 1935 } 1936 1937 uset_close(set); 1938 uset_close(dumb); 1939 } 1940 1941 /* ### TODO: test error cases and other interesting things */ 1942 } 1943 1944 static void 1945 TestUCharFromNameUnderflow() { 1946 // Ticket #10889: Underflow crash when there is no dash. 1947 UErrorCode errorCode=U_ZERO_ERROR; 1948 UChar32 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<NO BREAK SPACE>", &errorCode); 1949 if(U_SUCCESS(errorCode)) { 1950 log_err("u_charFromName(<NO BREAK SPACE>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode)); 1951 } 1952 1953 // Test related edge cases. 1954 errorCode=U_ZERO_ERROR; 1955 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<-00a0>", &errorCode); 1956 if(U_SUCCESS(errorCode)) { 1957 log_err("u_charFromName(<-00a0>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode)); 1958 } 1959 1960 errorCode=U_ZERO_ERROR; 1961 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control->", &errorCode); 1962 if(U_SUCCESS(errorCode)) { 1963 log_err("u_charFromName(<control->) = U+%04x but should fail - %s\n", c, u_errorName(errorCode)); 1964 } 1965 1966 errorCode=U_ZERO_ERROR; 1967 c=u_charFromName(U_EXTENDED_CHAR_NAME, "<control-111111>", &errorCode); 1968 if(U_SUCCESS(errorCode)) { 1969 log_err("u_charFromName(<control-111111>) = U+%04x but should fail - %s\n", c, u_errorName(errorCode)); 1970 } 1971 } 1972 1973 /* test u_isMirrored() and u_charMirror() ----------------------------------- */ 1974 1975 static void 1976 TestMirroring() { 1977 USet *set; 1978 UErrorCode errorCode; 1979 1980 UChar32 start, end, c2, c3; 1981 int32_t i; 1982 1983 U_STRING_DECL(mirroredPattern, "[:Bidi_Mirrored:]", 17); 1984 1985 U_STRING_INIT(mirroredPattern, "[:Bidi_Mirrored:]", 17); 1986 1987 log_verbose("Testing u_isMirrored()\n"); 1988 if(!(u_isMirrored(0x28) && u_isMirrored(0xbb) && u_isMirrored(0x2045) && u_isMirrored(0x232a) && 1989 !u_isMirrored(0x27) && !u_isMirrored(0x61) && !u_isMirrored(0x284) && !u_isMirrored(0x3400) 1990 ) 1991 ) { 1992 log_err("u_isMirrored() does not work correctly\n"); 1993 } 1994 1995 log_verbose("Testing u_charMirror()\n"); 1996 if(!(u_charMirror(0x3c)==0x3e && u_charMirror(0x5d)==0x5b && u_charMirror(0x208d)==0x208e && u_charMirror(0x3017)==0x3016 && 1997 u_charMirror(0xbb)==0xab && u_charMirror(0x2215)==0x29F5 && u_charMirror(0x29F5)==0x2215 && /* large delta between the code points */ 1998 u_charMirror(0x2e)==0x2e && u_charMirror(0x6f3)==0x6f3 && u_charMirror(0x301c)==0x301c && u_charMirror(0xa4ab)==0xa4ab && 1999 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */ 2000 u_charMirror(0x2018)==0x2018 && u_charMirror(0x201b)==0x201b && u_charMirror(0x301d)==0x301d 2001 ) 2002 ) { 2003 log_err("u_charMirror() does not work correctly\n"); 2004 } 2005 2006 /* verify that Bidi_Mirroring_Glyph roundtrips */ 2007 errorCode=U_ZERO_ERROR; 2008 set=uset_openPattern(mirroredPattern, 17, &errorCode); 2009 2010 if (U_FAILURE(errorCode)) { 2011 log_data_err("uset_openPattern(mirroredPattern, 17, &errorCode) failed!\n"); 2012 } else { 2013 for(i=0; 0==uset_getItem(set, i, &start, &end, NULL, 0, &errorCode); ++i) { 2014 do { 2015 c2=u_charMirror(start); 2016 c3=u_charMirror(c2); 2017 if(c3!=start) { 2018 log_err("u_charMirror() does not roundtrip: U+%04lx->U+%04lx->U+%04lx\n", (long)start, (long)c2, (long)c3); 2019 } 2020 c3=u_getBidiPairedBracket(start); 2021 if(u_getIntPropertyValue(start, UCHAR_BIDI_PAIRED_BRACKET_TYPE)==U_BPT_NONE) { 2022 if(c3!=start) { 2023 log_err("u_getBidiPairedBracket(U+%04lx) != self for bpt(c)==None\n", 2024 (long)start); 2025 } 2026 } else { 2027 if(c3!=c2) { 2028 log_err("u_getBidiPairedBracket(U+%04lx) != U+%04lx = bmg(c)'\n", 2029 (long)start, (long)c2); 2030 } 2031 } 2032 } while(++start<=end); 2033 } 2034 } 2035 2036 uset_close(set); 2037 } 2038 2039 2040 struct RunTestData 2041 { 2042 const char *runText; 2043 UScriptCode runCode; 2044 }; 2045 2046 typedef struct RunTestData RunTestData; 2047 2048 static void 2049 CheckScriptRuns(UScriptRun *scriptRun, int32_t *runStarts, const RunTestData *testData, int32_t nRuns, 2050 const char *prefix) 2051 { 2052 int32_t run, runStart, runLimit; 2053 UScriptCode runCode; 2054 2055 /* iterate over all the runs */ 2056 run = 0; 2057 while (uscript_nextRun(scriptRun, &runStart, &runLimit, &runCode)) { 2058 if (runStart != runStarts[run]) { 2059 log_err("%s: incorrect start offset for run %d: expected %d, got %d\n", 2060 prefix, run, runStarts[run], runStart); 2061 } 2062 2063 if (runLimit != runStarts[run + 1]) { 2064 log_err("%s: incorrect limit offset for run %d: expected %d, got %d\n", 2065 prefix, run, runStarts[run + 1], runLimit); 2066 } 2067 2068 if (runCode != testData[run].runCode) { 2069 log_err("%s: incorrect script for run %d: expected \"%s\", got \"%s\"\n", 2070 prefix, run, uscript_getName(testData[run].runCode), uscript_getName(runCode)); 2071 } 2072 2073 run += 1; 2074 2075 /* stop when we've seen all the runs we expect to see */ 2076 if (run >= nRuns) { 2077 break; 2078 } 2079 } 2080 2081 /* Complain if we didn't see then number of runs we expected */ 2082 if (run != nRuns) { 2083 log_err("%s: incorrect number of runs: expected %d, got %d\n", prefix, run, nRuns); 2084 } 2085 } 2086 2087 static void 2088 TestUScriptRunAPI() 2089 { 2090 static const RunTestData testData1[] = { 2091 {"\\u0020\\u0946\\u0939\\u093F\\u0928\\u094D\\u0926\\u0940\\u0020", USCRIPT_DEVANAGARI}, 2092 {"\\u0627\\u0644\\u0639\\u0631\\u0628\\u064A\\u0629\\u0020", USCRIPT_ARABIC}, 2093 {"\\u0420\\u0443\\u0441\\u0441\\u043A\\u0438\\u0439\\u0020", USCRIPT_CYRILLIC}, 2094 {"English (", USCRIPT_LATIN}, 2095 {"\\u0E44\\u0E17\\u0E22", USCRIPT_THAI}, 2096 {") ", USCRIPT_LATIN}, 2097 {"\\u6F22\\u5B75", USCRIPT_HAN}, 2098 {"\\u3068\\u3072\\u3089\\u304C\\u306A\\u3068", USCRIPT_HIRAGANA}, 2099 {"\\u30AB\\u30BF\\u30AB\\u30CA", USCRIPT_KATAKANA}, 2100 {"\\U00010400\\U00010401\\U00010402\\U00010403", USCRIPT_DESERET} 2101 }; 2102 2103 static const RunTestData testData2[] = { 2104 {"((((((((((abc))))))))))", USCRIPT_LATIN} 2105 }; 2106 2107 static const struct { 2108 const RunTestData *testData; 2109 int32_t nRuns; 2110 } testDataEntries[] = { 2111 {testData1, UPRV_LENGTHOF(testData1)}, 2112 {testData2, UPRV_LENGTHOF(testData2)} 2113 }; 2114 2115 static const int32_t nTestEntries = UPRV_LENGTHOF(testDataEntries); 2116 int32_t testEntry; 2117 2118 for (testEntry = 0; testEntry < nTestEntries; testEntry += 1) { 2119 UChar testString[1024]; 2120 int32_t runStarts[256]; 2121 int32_t nTestRuns = testDataEntries[testEntry].nRuns; 2122 const RunTestData *testData = testDataEntries[testEntry].testData; 2123 2124 int32_t run, stringLimit; 2125 UScriptRun *scriptRun = NULL; 2126 UErrorCode err; 2127 2128 /* 2129 * Fill in the test string and the runStarts array. 2130 */ 2131 stringLimit = 0; 2132 for (run = 0; run < nTestRuns; run += 1) { 2133 runStarts[run] = stringLimit; 2134 stringLimit += u_unescape(testData[run].runText, &testString[stringLimit], 1024 - stringLimit); 2135 /*stringLimit -= 1;*/ 2136 } 2137 2138 /* The limit of the last run */ 2139 runStarts[nTestRuns] = stringLimit; 2140 2141 /* 2142 * Make sure that calling uscript_OpenRun with a NULL text pointer 2143 * and a non-zero text length returns the correct error. 2144 */ 2145 err = U_ZERO_ERROR; 2146 scriptRun = uscript_openRun(NULL, stringLimit, &err); 2147 2148 if (err != U_ILLEGAL_ARGUMENT_ERROR) { 2149 log_err("uscript_openRun(NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err)); 2150 } 2151 2152 if (scriptRun != NULL) { 2153 log_err("uscript_openRun(NULL, stringLimit, &err) returned a non-NULL result.\n"); 2154 uscript_closeRun(scriptRun); 2155 } 2156 2157 /* 2158 * Make sure that calling uscript_OpenRun with a non-NULL text pointer 2159 * and a zero text length returns the correct error. 2160 */ 2161 err = U_ZERO_ERROR; 2162 scriptRun = uscript_openRun(testString, 0, &err); 2163 2164 if (err != U_ILLEGAL_ARGUMENT_ERROR) { 2165 log_err("uscript_openRun(testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err)); 2166 } 2167 2168 if (scriptRun != NULL) { 2169 log_err("uscript_openRun(testString, 0, &err) returned a non-NULL result.\n"); 2170 uscript_closeRun(scriptRun); 2171 } 2172 2173 /* 2174 * Make sure that calling uscript_openRun with a NULL text pointer 2175 * and a zero text length doesn't return an error. 2176 */ 2177 err = U_ZERO_ERROR; 2178 scriptRun = uscript_openRun(NULL, 0, &err); 2179 2180 if (U_FAILURE(err)) { 2181 log_err("Got error %s from uscript_openRun(NULL, 0, &err)\n", u_errorName(err)); 2182 } 2183 2184 /* Make sure that the empty iterator doesn't find any runs */ 2185 if (uscript_nextRun(scriptRun, NULL, NULL, NULL)) { 2186 log_err("uscript_nextRun(...) returned TRUE for an empty iterator.\n"); 2187 } 2188 2189 /* 2190 * Make sure that calling uscript_setRunText with a NULL text pointer 2191 * and a non-zero text length returns the correct error. 2192 */ 2193 err = U_ZERO_ERROR; 2194 uscript_setRunText(scriptRun, NULL, stringLimit, &err); 2195 2196 if (err != U_ILLEGAL_ARGUMENT_ERROR) { 2197 log_err("uscript_setRunText(scriptRun, NULL, stringLimit, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err)); 2198 } 2199 2200 /* 2201 * Make sure that calling uscript_OpenRun with a non-NULL text pointer 2202 * and a zero text length returns the correct error. 2203 */ 2204 err = U_ZERO_ERROR; 2205 uscript_setRunText(scriptRun, testString, 0, &err); 2206 2207 if (err != U_ILLEGAL_ARGUMENT_ERROR) { 2208 log_err("uscript_setRunText(scriptRun, testString, 0, &err) returned %s instead of U_ILLEGAL_ARGUMENT_ERROR.\n", u_errorName(err)); 2209 } 2210 2211 /* 2212 * Now call uscript_setRunText on the empty iterator 2213 * and make sure that it works. 2214 */ 2215 err = U_ZERO_ERROR; 2216 uscript_setRunText(scriptRun, testString, stringLimit, &err); 2217 2218 if (U_FAILURE(err)) { 2219 log_err("Got error %s from uscript_setRunText(...)\n", u_errorName(err)); 2220 } else { 2221 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_setRunText"); 2222 } 2223 2224 uscript_closeRun(scriptRun); 2225 2226 /* 2227 * Now open an interator over the testString 2228 * using uscript_openRun and make sure that it works 2229 */ 2230 scriptRun = uscript_openRun(testString, stringLimit, &err); 2231 2232 if (U_FAILURE(err)) { 2233 log_err("Got error %s from uscript_openRun(...)\n", u_errorName(err)); 2234 } else { 2235 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_openRun"); 2236 } 2237 2238 /* Now reset the iterator, and make sure 2239 * that it still works. 2240 */ 2241 uscript_resetRun(scriptRun); 2242 2243 CheckScriptRuns(scriptRun, runStarts, testData, nTestRuns, "uscript_resetRun"); 2244 2245 /* Close the iterator */ 2246 uscript_closeRun(scriptRun); 2247 } 2248 } 2249 2250 /* test additional, non-core properties */ 2251 static void 2252 TestAdditionalProperties() { 2253 /* test data for u_charAge() */ 2254 static const struct { 2255 UChar32 c; 2256 UVersionInfo version; 2257 } charAges[]={ 2258 {0x41, { 1, 1, 0, 0 }}, 2259 {0xffff, { 1, 1, 0, 0 }}, 2260 {0x20ab, { 2, 0, 0, 0 }}, 2261 {0x2fffe, { 2, 0, 0, 0 }}, 2262 {0x20ac, { 2, 1, 0, 0 }}, 2263 {0xfb1d, { 3, 0, 0, 0 }}, 2264 {0x3f4, { 3, 1, 0, 0 }}, 2265 {0x10300, { 3, 1, 0, 0 }}, 2266 {0x220, { 3, 2, 0, 0 }}, 2267 {0xff60, { 3, 2, 0, 0 }} 2268 }; 2269 2270 /* test data for u_hasBinaryProperty() */ 2271 static const int32_t 2272 props[][3]={ /* code point, property, value */ 2273 { 0x0627, UCHAR_ALPHABETIC, TRUE }, 2274 { 0x1034a, UCHAR_ALPHABETIC, TRUE }, 2275 { 0x2028, UCHAR_ALPHABETIC, FALSE }, 2276 2277 { 0x0066, UCHAR_ASCII_HEX_DIGIT, TRUE }, 2278 { 0x0067, UCHAR_ASCII_HEX_DIGIT, FALSE }, 2279 2280 { 0x202c, UCHAR_BIDI_CONTROL, TRUE }, 2281 { 0x202f, UCHAR_BIDI_CONTROL, FALSE }, 2282 2283 { 0x003c, UCHAR_BIDI_MIRRORED, TRUE }, 2284 { 0x003d, UCHAR_BIDI_MIRRORED, FALSE }, 2285 2286 /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */ 2287 { 0x2018, UCHAR_BIDI_MIRRORED, FALSE }, 2288 { 0x201d, UCHAR_BIDI_MIRRORED, FALSE }, 2289 { 0x201f, UCHAR_BIDI_MIRRORED, FALSE }, 2290 { 0x301e, UCHAR_BIDI_MIRRORED, FALSE }, 2291 2292 { 0x058a, UCHAR_DASH, TRUE }, 2293 { 0x007e, UCHAR_DASH, FALSE }, 2294 2295 { 0x0c4d, UCHAR_DIACRITIC, TRUE }, 2296 { 0x3000, UCHAR_DIACRITIC, FALSE }, 2297 2298 { 0x0e46, UCHAR_EXTENDER, TRUE }, 2299 { 0x0020, UCHAR_EXTENDER, FALSE }, 2300 2301 #if !UCONFIG_NO_NORMALIZATION 2302 { 0xfb1d, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE }, 2303 { 0x1d15f, UCHAR_FULL_COMPOSITION_EXCLUSION, TRUE }, 2304 { 0xfb1e, UCHAR_FULL_COMPOSITION_EXCLUSION, FALSE }, 2305 2306 { 0x110a, UCHAR_NFD_INERT, TRUE }, /* Jamo L */ 2307 { 0x0308, UCHAR_NFD_INERT, FALSE }, 2308 2309 { 0x1164, UCHAR_NFKD_INERT, TRUE }, /* Jamo V */ 2310 { 0x1d79d, UCHAR_NFKD_INERT, FALSE }, /* math compat version of xi */ 2311 2312 { 0x0021, UCHAR_NFC_INERT, TRUE }, /* ! */ 2313 { 0x0061, UCHAR_NFC_INERT, FALSE }, /* a */ 2314 { 0x00e4, UCHAR_NFC_INERT, FALSE }, /* a-umlaut */ 2315 { 0x0102, UCHAR_NFC_INERT, FALSE }, /* a-breve */ 2316 { 0xac1c, UCHAR_NFC_INERT, FALSE }, /* Hangul LV */ 2317 { 0xac1d, UCHAR_NFC_INERT, TRUE }, /* Hangul LVT */ 2318 2319 { 0x1d79d, UCHAR_NFKC_INERT, FALSE }, /* math compat version of xi */ 2320 { 0x2a6d6, UCHAR_NFKC_INERT, TRUE }, /* Han, last of CJK ext. B */ 2321 2322 { 0x00e4, UCHAR_SEGMENT_STARTER, TRUE }, 2323 { 0x0308, UCHAR_SEGMENT_STARTER, FALSE }, 2324 { 0x110a, UCHAR_SEGMENT_STARTER, TRUE }, /* Jamo L */ 2325 { 0x1164, UCHAR_SEGMENT_STARTER, FALSE },/* Jamo V */ 2326 { 0xac1c, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LV */ 2327 { 0xac1d, UCHAR_SEGMENT_STARTER, TRUE }, /* Hangul LVT */ 2328 #endif 2329 2330 { 0x0044, UCHAR_HEX_DIGIT, TRUE }, 2331 { 0xff46, UCHAR_HEX_DIGIT, TRUE }, 2332 { 0x0047, UCHAR_HEX_DIGIT, FALSE }, 2333 2334 { 0x30fb, UCHAR_HYPHEN, TRUE }, 2335 { 0xfe58, UCHAR_HYPHEN, FALSE }, 2336 2337 { 0x2172, UCHAR_ID_CONTINUE, TRUE }, 2338 { 0x0307, UCHAR_ID_CONTINUE, TRUE }, 2339 { 0x005c, UCHAR_ID_CONTINUE, FALSE }, 2340 2341 { 0x2172, UCHAR_ID_START, TRUE }, 2342 { 0x007a, UCHAR_ID_START, TRUE }, 2343 { 0x0039, UCHAR_ID_START, FALSE }, 2344 2345 { 0x4db5, UCHAR_IDEOGRAPHIC, TRUE }, 2346 { 0x2f999, UCHAR_IDEOGRAPHIC, TRUE }, 2347 { 0x2f99, UCHAR_IDEOGRAPHIC, FALSE }, 2348 2349 { 0x200c, UCHAR_JOIN_CONTROL, TRUE }, 2350 { 0x2029, UCHAR_JOIN_CONTROL, FALSE }, 2351 2352 { 0x1d7bc, UCHAR_LOWERCASE, TRUE }, 2353 { 0x0345, UCHAR_LOWERCASE, TRUE }, 2354 { 0x0030, UCHAR_LOWERCASE, FALSE }, 2355 2356 { 0x1d7a9, UCHAR_MATH, TRUE }, 2357 { 0x2135, UCHAR_MATH, TRUE }, 2358 { 0x0062, UCHAR_MATH, FALSE }, 2359 2360 { 0xfde1, UCHAR_NONCHARACTER_CODE_POINT, TRUE }, 2361 { 0x10ffff, UCHAR_NONCHARACTER_CODE_POINT, TRUE }, 2362 { 0x10fffd, UCHAR_NONCHARACTER_CODE_POINT, FALSE }, 2363 2364 { 0x0022, UCHAR_QUOTATION_MARK, TRUE }, 2365 { 0xff62, UCHAR_QUOTATION_MARK, TRUE }, 2366 { 0xd840, UCHAR_QUOTATION_MARK, FALSE }, 2367 2368 { 0x061f, UCHAR_TERMINAL_PUNCTUATION, TRUE }, 2369 { 0xe003f, UCHAR_TERMINAL_PUNCTUATION, FALSE }, 2370 2371 { 0x1d44a, UCHAR_UPPERCASE, TRUE }, 2372 { 0x2162, UCHAR_UPPERCASE, TRUE }, 2373 { 0x0345, UCHAR_UPPERCASE, FALSE }, 2374 2375 { 0x0020, UCHAR_WHITE_SPACE, TRUE }, 2376 { 0x202f, UCHAR_WHITE_SPACE, TRUE }, 2377 { 0x3001, UCHAR_WHITE_SPACE, FALSE }, 2378 2379 { 0x0711, UCHAR_XID_CONTINUE, TRUE }, 2380 { 0x1d1aa, UCHAR_XID_CONTINUE, TRUE }, 2381 { 0x007c, UCHAR_XID_CONTINUE, FALSE }, 2382 2383 { 0x16ee, UCHAR_XID_START, TRUE }, 2384 { 0x23456, UCHAR_XID_START, TRUE }, 2385 { 0x1d1aa, UCHAR_XID_START, FALSE }, 2386 2387 /* 2388 * Version break: 2389 * The following properties are only supported starting with the 2390 * Unicode version indicated in the second field. 2391 */ 2392 { -1, 0x320, 0 }, 2393 2394 { 0x180c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE }, 2395 { 0xfe02, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, TRUE }, 2396 { 0x1801, UCHAR_DEFAULT_IGNORABLE_CODE_POINT, FALSE }, 2397 2398 { 0x0149, UCHAR_DEPRECATED, TRUE }, /* changed in Unicode 5.2 */ 2399 { 0x0341, UCHAR_DEPRECATED, FALSE }, /* changed in Unicode 5.2 */ 2400 { 0xe0001, UCHAR_DEPRECATED, TRUE }, /* changed from Unicode 5 to 5.1 */ 2401 { 0xe0100, UCHAR_DEPRECATED, FALSE }, 2402 2403 { 0x00a0, UCHAR_GRAPHEME_BASE, TRUE }, 2404 { 0x0a4d, UCHAR_GRAPHEME_BASE, FALSE }, 2405 { 0xff9d, UCHAR_GRAPHEME_BASE, TRUE }, 2406 { 0xff9f, UCHAR_GRAPHEME_BASE, FALSE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */ 2407 2408 { 0x0300, UCHAR_GRAPHEME_EXTEND, TRUE }, 2409 { 0xff9d, UCHAR_GRAPHEME_EXTEND, FALSE }, 2410 { 0xff9f, UCHAR_GRAPHEME_EXTEND, TRUE }, /* changed from Unicode 3.2 to 4 and again from 5 to 5.1 */ 2411 { 0x0603, UCHAR_GRAPHEME_EXTEND, FALSE }, 2412 2413 { 0x0a4d, UCHAR_GRAPHEME_LINK, TRUE }, 2414 { 0xff9f, UCHAR_GRAPHEME_LINK, FALSE }, 2415 2416 { 0x2ff7, UCHAR_IDS_BINARY_OPERATOR, TRUE }, 2417 { 0x2ff3, UCHAR_IDS_BINARY_OPERATOR, FALSE }, 2418 2419 { 0x2ff3, UCHAR_IDS_TRINARY_OPERATOR, TRUE }, 2420 { 0x2f03, UCHAR_IDS_TRINARY_OPERATOR, FALSE }, 2421 2422 { 0x0ec1, UCHAR_LOGICAL_ORDER_EXCEPTION, TRUE }, 2423 { 0xdcba, UCHAR_LOGICAL_ORDER_EXCEPTION, FALSE }, 2424 2425 { 0x2e9b, UCHAR_RADICAL, TRUE }, 2426 { 0x4e00, UCHAR_RADICAL, FALSE }, 2427 2428 { 0x012f, UCHAR_SOFT_DOTTED, TRUE }, 2429 { 0x0049, UCHAR_SOFT_DOTTED, FALSE }, 2430 2431 { 0xfa11, UCHAR_UNIFIED_IDEOGRAPH, TRUE }, 2432 { 0xfa12, UCHAR_UNIFIED_IDEOGRAPH, FALSE }, 2433 2434 { -1, 0x401, 0 }, /* version break for Unicode 4.0.1 */ 2435 2436 { 0x002e, UCHAR_S_TERM, TRUE }, 2437 { 0x0061, UCHAR_S_TERM, FALSE }, 2438 2439 { 0x180c, UCHAR_VARIATION_SELECTOR, TRUE }, 2440 { 0xfe03, UCHAR_VARIATION_SELECTOR, TRUE }, 2441 { 0xe01ef, UCHAR_VARIATION_SELECTOR, TRUE }, 2442 { 0xe0200, UCHAR_VARIATION_SELECTOR, FALSE }, 2443 2444 /* enum/integer type properties */ 2445 2446 /* UCHAR_BIDI_CLASS tested for assigned characters in TestUnicodeData() */ 2447 /* test default Bidi classes for unassigned code points */ 2448 { 0x0590, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2449 { 0x05cf, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2450 { 0x05ed, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2451 { 0x07f2, UCHAR_BIDI_CLASS, U_DIR_NON_SPACING_MARK }, /* Nko, new in Unicode 5.0 */ 2452 { 0x07fe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, /* unassigned R */ 2453 { 0x089f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2454 { 0xfb37, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2455 { 0xfb42, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2456 { 0x10806, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2457 { 0x10909, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2458 { 0x10fe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2459 2460 { 0x061d, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2461 { 0x063f, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2462 { 0x070e, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2463 { 0x0775, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2464 { 0xfbc2, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2465 { 0xfd90, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2466 { 0xfefe, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2467 2468 { 0x02AF, UCHAR_BLOCK, UBLOCK_IPA_EXTENSIONS }, 2469 { 0x0C4E, UCHAR_BLOCK, UBLOCK_TELUGU }, 2470 { 0x155A, UCHAR_BLOCK, UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS }, 2471 { 0x1717, UCHAR_BLOCK, UBLOCK_TAGALOG }, 2472 { 0x1900, UCHAR_BLOCK, UBLOCK_LIMBU }, 2473 { 0x1CBF, UCHAR_BLOCK, UBLOCK_NO_BLOCK }, 2474 { 0x3040, UCHAR_BLOCK, UBLOCK_HIRAGANA }, 2475 { 0x1D0FF, UCHAR_BLOCK, UBLOCK_BYZANTINE_MUSICAL_SYMBOLS }, 2476 { 0x50000, UCHAR_BLOCK, UBLOCK_NO_BLOCK }, 2477 { 0xEFFFF, UCHAR_BLOCK, UBLOCK_NO_BLOCK }, 2478 { 0x10D0FF, UCHAR_BLOCK, UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B }, 2479 2480 /* UCHAR_CANONICAL_COMBINING_CLASS tested for assigned characters in TestUnicodeData() */ 2481 { 0xd7d7, UCHAR_CANONICAL_COMBINING_CLASS, 0 }, 2482 2483 { 0x00A0, UCHAR_DECOMPOSITION_TYPE, U_DT_NOBREAK }, 2484 { 0x00A8, UCHAR_DECOMPOSITION_TYPE, U_DT_COMPAT }, 2485 { 0x00bf, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE }, 2486 { 0x00c0, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL }, 2487 { 0x1E9B, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL }, 2488 { 0xBCDE, UCHAR_DECOMPOSITION_TYPE, U_DT_CANONICAL }, 2489 { 0xFB5D, UCHAR_DECOMPOSITION_TYPE, U_DT_MEDIAL }, 2490 { 0x1D736, UCHAR_DECOMPOSITION_TYPE, U_DT_FONT }, 2491 { 0xe0033, UCHAR_DECOMPOSITION_TYPE, U_DT_NONE }, 2492 2493 { 0x0009, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL }, 2494 { 0x0020, UCHAR_EAST_ASIAN_WIDTH, U_EA_NARROW }, 2495 { 0x00B1, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS }, 2496 { 0x20A9, UCHAR_EAST_ASIAN_WIDTH, U_EA_HALFWIDTH }, 2497 { 0x2FFB, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, 2498 { 0x3000, UCHAR_EAST_ASIAN_WIDTH, U_EA_FULLWIDTH }, 2499 { 0x35bb, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, 2500 { 0x58bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, 2501 { 0xD7A3, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, 2502 { 0xEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS }, 2503 { 0x1D198, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL }, 2504 { 0x20000, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, 2505 { 0x2F8C7, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, 2506 { 0x3a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_WIDE }, /* plane 3 got default W values in Unicode 4 */ 2507 { 0x5a5bd, UCHAR_EAST_ASIAN_WIDTH, U_EA_NEUTRAL }, 2508 { 0xFEEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS }, 2509 { 0x10EEEE, UCHAR_EAST_ASIAN_WIDTH, U_EA_AMBIGUOUS }, 2510 2511 /* UCHAR_GENERAL_CATEGORY tested for assigned characters in TestUnicodeData() */ 2512 { 0xd7c7, UCHAR_GENERAL_CATEGORY, 0 }, 2513 { 0xd7d7, UCHAR_GENERAL_CATEGORY, U_OTHER_LETTER }, /* changed in Unicode 5.2 */ 2514 2515 { 0x0444, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP }, 2516 { 0x0639, UCHAR_JOINING_GROUP, U_JG_AIN }, 2517 { 0x072A, UCHAR_JOINING_GROUP, U_JG_DALATH_RISH }, 2518 { 0x0647, UCHAR_JOINING_GROUP, U_JG_HEH }, 2519 { 0x06C1, UCHAR_JOINING_GROUP, U_JG_HEH_GOAL }, 2520 2521 { 0x200C, UCHAR_JOINING_TYPE, U_JT_NON_JOINING }, 2522 { 0x200D, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING }, 2523 { 0x0639, UCHAR_JOINING_TYPE, U_JT_DUAL_JOINING }, 2524 { 0x0640, UCHAR_JOINING_TYPE, U_JT_JOIN_CAUSING }, 2525 { 0x06C3, UCHAR_JOINING_TYPE, U_JT_RIGHT_JOINING }, 2526 { 0x0300, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT }, 2527 { 0x070F, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT }, 2528 { 0xe0033, UCHAR_JOINING_TYPE, U_JT_TRANSPARENT }, 2529 2530 /* TestUnicodeData() verifies that no assigned character has "XX" (unknown) */ 2531 { 0xe7e7, UCHAR_LINE_BREAK, U_LB_UNKNOWN }, 2532 { 0x10fffd, UCHAR_LINE_BREAK, U_LB_UNKNOWN }, 2533 { 0x0028, UCHAR_LINE_BREAK, U_LB_OPEN_PUNCTUATION }, 2534 { 0x232A, UCHAR_LINE_BREAK, U_LB_CLOSE_PUNCTUATION }, 2535 { 0x3401, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, 2536 { 0x4e02, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, 2537 { 0x20004, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, 2538 { 0xf905, UCHAR_LINE_BREAK, U_LB_IDEOGRAPHIC }, 2539 { 0xdb7e, UCHAR_LINE_BREAK, U_LB_SURROGATE }, 2540 { 0xdbfd, UCHAR_LINE_BREAK, U_LB_SURROGATE }, 2541 { 0xdffc, UCHAR_LINE_BREAK, U_LB_SURROGATE }, 2542 { 0x2762, UCHAR_LINE_BREAK, U_LB_EXCLAMATION }, 2543 { 0x002F, UCHAR_LINE_BREAK, U_LB_BREAK_SYMBOLS }, 2544 { 0x1D49C, UCHAR_LINE_BREAK, U_LB_ALPHABETIC }, 2545 { 0x1731, UCHAR_LINE_BREAK, U_LB_ALPHABETIC }, 2546 2547 /* UCHAR_NUMERIC_TYPE tested in TestNumericProperties() */ 2548 2549 /* UCHAR_SCRIPT tested in TestUScriptCodeAPI() */ 2550 2551 { 0x10ff, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2552 { 0x1100, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, 2553 { 0x1111, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, 2554 { 0x1159, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, 2555 { 0x115a, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */ 2556 { 0x115e, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */ 2557 { 0x115f, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, 2558 2559 { 0xa95f, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2560 { 0xa960, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */ 2561 { 0xa97c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LEADING_JAMO }, /* changed in Unicode 5.2 */ 2562 { 0xa97d, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2563 2564 { 0x1160, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, 2565 { 0x1161, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, 2566 { 0x1172, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, 2567 { 0x11a2, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, 2568 { 0x11a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2569 { 0x11a7, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2570 2571 { 0xd7af, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2572 { 0xd7b0, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2573 { 0xd7c6, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_VOWEL_JAMO }, /* changed in Unicode 5.2 */ 2574 { 0xd7c7, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2575 2576 { 0x11a8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, 2577 { 0x11b8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, 2578 { 0x11c8, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, 2579 { 0x11f9, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, 2580 { 0x11fa, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2581 { 0x11ff, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2582 { 0x1200, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2583 2584 { 0xd7ca, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2585 { 0xd7cb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2586 { 0xd7fb, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_TRAILING_JAMO }, /* changed in Unicode 5.2 */ 2587 { 0xd7fc, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2588 2589 { 0xac00, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE }, 2590 { 0xac1c, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE }, 2591 { 0xc5ec, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE }, 2592 { 0xd788, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LV_SYLLABLE }, 2593 2594 { 0xac01, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, 2595 { 0xac1b, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, 2596 { 0xac1d, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, 2597 { 0xc5ee, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, 2598 { 0xd7a3, UCHAR_HANGUL_SYLLABLE_TYPE, U_HST_LVT_SYLLABLE }, 2599 2600 { 0xd7a4, UCHAR_HANGUL_SYLLABLE_TYPE, 0 }, 2601 2602 { -1, 0x410, 0 }, /* version break for Unicode 4.1 */ 2603 2604 { 0x00d7, UCHAR_PATTERN_SYNTAX, TRUE }, 2605 { 0xfe45, UCHAR_PATTERN_SYNTAX, TRUE }, 2606 { 0x0061, UCHAR_PATTERN_SYNTAX, FALSE }, 2607 2608 { 0x0020, UCHAR_PATTERN_WHITE_SPACE, TRUE }, 2609 { 0x0085, UCHAR_PATTERN_WHITE_SPACE, TRUE }, 2610 { 0x200f, UCHAR_PATTERN_WHITE_SPACE, TRUE }, 2611 { 0x00a0, UCHAR_PATTERN_WHITE_SPACE, FALSE }, 2612 { 0x3000, UCHAR_PATTERN_WHITE_SPACE, FALSE }, 2613 2614 { 0x1d200, UCHAR_BLOCK, UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION }, 2615 { 0x2c8e, UCHAR_BLOCK, UBLOCK_COPTIC }, 2616 { 0xfe17, UCHAR_BLOCK, UBLOCK_VERTICAL_FORMS }, 2617 2618 { 0x1a00, UCHAR_SCRIPT, USCRIPT_BUGINESE }, 2619 { 0x2cea, UCHAR_SCRIPT, USCRIPT_COPTIC }, 2620 { 0xa82b, UCHAR_SCRIPT, USCRIPT_SYLOTI_NAGRI }, 2621 { 0x103d0, UCHAR_SCRIPT, USCRIPT_OLD_PERSIAN }, 2622 2623 { 0xcc28, UCHAR_LINE_BREAK, U_LB_H2 }, 2624 { 0xcc29, UCHAR_LINE_BREAK, U_LB_H3 }, 2625 { 0xac03, UCHAR_LINE_BREAK, U_LB_H3 }, 2626 { 0x115f, UCHAR_LINE_BREAK, U_LB_JL }, 2627 { 0x11aa, UCHAR_LINE_BREAK, U_LB_JT }, 2628 { 0x11a1, UCHAR_LINE_BREAK, U_LB_JV }, 2629 2630 { 0xb2c9, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_LVT }, 2631 { 0x036f, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_EXTEND }, 2632 { 0x0000, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_CONTROL }, 2633 { 0x1160, UCHAR_GRAPHEME_CLUSTER_BREAK, U_GCB_V }, 2634 2635 { 0x05f4, UCHAR_WORD_BREAK, U_WB_MIDLETTER }, 2636 { 0x4ef0, UCHAR_WORD_BREAK, U_WB_OTHER }, 2637 { 0x19d9, UCHAR_WORD_BREAK, U_WB_NUMERIC }, 2638 { 0x2044, UCHAR_WORD_BREAK, U_WB_MIDNUM }, 2639 2640 { 0xfffd, UCHAR_SENTENCE_BREAK, U_SB_OTHER }, 2641 { 0x1ffc, UCHAR_SENTENCE_BREAK, U_SB_UPPER }, 2642 { 0xff63, UCHAR_SENTENCE_BREAK, U_SB_CLOSE }, 2643 { 0x2028, UCHAR_SENTENCE_BREAK, U_SB_SEP }, 2644 2645 { -1, 0x520, 0 }, /* version break for Unicode 5.2 */ 2646 2647 /* unassigned code points in new default Bidi R blocks */ 2648 { 0x1ede4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2649 { 0x1efe4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT }, 2650 2651 /* test some script codes >127 */ 2652 { 0xa6e6, UCHAR_SCRIPT, USCRIPT_BAMUM }, 2653 { 0xa4d0, UCHAR_SCRIPT, USCRIPT_LISU }, 2654 { 0x10a7f, UCHAR_SCRIPT, USCRIPT_OLD_SOUTH_ARABIAN }, 2655 2656 { -1, 0x600, 0 }, /* version break for Unicode 6.0 */ 2657 2658 /* value changed in Unicode 6.0 */ 2659 { 0x06C3, UCHAR_JOINING_GROUP, U_JG_TEH_MARBUTA_GOAL }, 2660 2661 { -1, 0x610, 0 }, /* version break for Unicode 6.1 */ 2662 2663 /* unassigned code points in new/changed default Bidi AL blocks */ 2664 { 0x08ba, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2665 { 0x1eee4, UCHAR_BIDI_CLASS, U_RIGHT_TO_LEFT_ARABIC }, 2666 2667 { -1, 0x630, 0 }, /* version break for Unicode 6.3 */ 2668 2669 /* unassigned code points in the currency symbols block now default to ET */ 2670 { 0x20C0, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR }, 2671 { 0x20CF, UCHAR_BIDI_CLASS, U_EUROPEAN_NUMBER_TERMINATOR }, 2672 2673 /* new property in Unicode 6.3 */ 2674 { 0x0027, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE }, 2675 { 0x0028, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN }, 2676 { 0x0029, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE }, 2677 { 0xFF5C, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_NONE }, 2678 { 0xFF5B, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_OPEN }, 2679 { 0xFF5D, UCHAR_BIDI_PAIRED_BRACKET_TYPE, U_BPT_CLOSE }, 2680 2681 { -1, 0x700, 0 }, /* version break for Unicode 7.0 */ 2682 2683 /* new character range with Joining_Group values */ 2684 { 0x10ABF, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP }, 2685 { 0x10AC0, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_ALEPH }, 2686 { 0x10AC1, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_BETH }, 2687 { 0x10AEF, UCHAR_JOINING_GROUP, U_JG_MANICHAEAN_HUNDRED }, 2688 { 0x10AF0, UCHAR_JOINING_GROUP, U_JG_NO_JOINING_GROUP }, 2689 2690 /* undefined UProperty values */ 2691 { 0x61, 0x4a7, 0 }, 2692 { 0x234bc, 0x15ed, 0 } 2693 }; 2694 2695 UVersionInfo version; 2696 UChar32 c; 2697 int32_t i, result, uVersion; 2698 UProperty which; 2699 2700 /* what is our Unicode version? */ 2701 u_getUnicodeVersion(version); 2702 uVersion=((int32_t)version[0]<<8)|(version[1]<<4)|version[2]; /* major/minor/update version numbers */ 2703 2704 u_charAge(0x20, version); 2705 if(version[0]==0) { 2706 /* no additional properties available */ 2707 log_err("TestAdditionalProperties: no additional properties available, not tested\n"); 2708 return; 2709 } 2710 2711 /* test u_charAge() */ 2712 for(i=0; i<UPRV_LENGTHOF(charAges); ++i) { 2713 u_charAge(charAges[i].c, version); 2714 if(0!=memcmp(version, charAges[i].version, sizeof(UVersionInfo))) { 2715 log_err("error: u_charAge(U+%04lx)={ %u, %u, %u, %u } instead of { %u, %u, %u, %u }\n", 2716 charAges[i].c, 2717 version[0], version[1], version[2], version[3], 2718 charAges[i].version[0], charAges[i].version[1], charAges[i].version[2], charAges[i].version[3]); 2719 } 2720 } 2721 2722 if( u_getIntPropertyMinValue(UCHAR_DASH)!=0 || 2723 u_getIntPropertyMinValue(UCHAR_BIDI_CLASS)!=0 || 2724 u_getIntPropertyMinValue(UCHAR_BLOCK)!=0 || /* j2478 */ 2725 u_getIntPropertyMinValue(UCHAR_SCRIPT)!=0 || /*JB#2410*/ 2726 u_getIntPropertyMinValue(0x2345)!=0 2727 ) { 2728 log_err("error: u_getIntPropertyMinValue() wrong\n"); 2729 } 2730 if( u_getIntPropertyMaxValue(UCHAR_DASH)!=1) { 2731 log_err("error: u_getIntPropertyMaxValue(UCHAR_DASH) wrong\n"); 2732 } 2733 if( u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE)!=1) { 2734 log_err("error: u_getIntPropertyMaxValue(UCHAR_ID_CONTINUE) wrong\n"); 2735 } 2736 if( u_getIntPropertyMaxValue((UProperty)(UCHAR_BINARY_LIMIT-1))!=1) { 2737 log_err("error: u_getIntPropertyMaxValue(UCHAR_BINARY_LIMIT-1) wrong\n"); 2738 } 2739 if( u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)!=(int32_t)U_CHAR_DIRECTION_COUNT-1 ) { 2740 log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS) wrong\n"); 2741 } 2742 if( u_getIntPropertyMaxValue(UCHAR_BLOCK)!=(int32_t)UBLOCK_COUNT-1 ) { 2743 log_err("error: u_getIntPropertyMaxValue(UCHAR_BLOCK) wrong\n"); 2744 } 2745 if(u_getIntPropertyMaxValue(UCHAR_LINE_BREAK)!=(int32_t)U_LB_COUNT-1) { 2746 log_err("error: u_getIntPropertyMaxValue(UCHAR_LINE_BREAK) wrong\n"); 2747 } 2748 if(u_getIntPropertyMaxValue(UCHAR_SCRIPT)!=(int32_t)USCRIPT_CODE_LIMIT-1) { 2749 log_err("error: u_getIntPropertyMaxValue(UCHAR_SCRIPT) wrong\n"); 2750 } 2751 if(u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE)!=(int32_t)U_NT_COUNT-1) { 2752 log_err("error: u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE) wrong\n"); 2753 } 2754 if(u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY)!=(int32_t)U_CHAR_CATEGORY_COUNT-1) { 2755 log_err("error: u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY) wrong\n"); 2756 } 2757 if(u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE)!=(int32_t)U_HST_COUNT-1) { 2758 log_err("error: u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE) wrong\n"); 2759 } 2760 if(u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK)!=(int32_t)U_GCB_COUNT-1) { 2761 log_err("error: u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK) wrong\n"); 2762 } 2763 if(u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK)!=(int32_t)U_SB_COUNT-1) { 2764 log_err("error: u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK) wrong\n"); 2765 } 2766 if(u_getIntPropertyMaxValue(UCHAR_WORD_BREAK)!=(int32_t)U_WB_COUNT-1) { 2767 log_err("error: u_getIntPropertyMaxValue(UCHAR_WORD_BREAK) wrong\n"); 2768 } 2769 if(u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE)!=(int32_t)U_BPT_COUNT-1) { 2770 log_err("error: u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE) wrong\n"); 2771 } 2772 /*JB#2410*/ 2773 if( u_getIntPropertyMaxValue(0x2345)!=-1) { 2774 log_err("error: u_getIntPropertyMaxValue(0x2345) wrong\n"); 2775 } 2776 if( u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) != (int32_t) (U_DT_COUNT - 1)) { 2777 log_err("error: u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE) wrong\n"); 2778 } 2779 if( u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) != (int32_t) (U_JG_COUNT -1)) { 2780 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP) wrong\n"); 2781 } 2782 if( u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) != (int32_t) (U_JT_COUNT -1)) { 2783 log_err("error: u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE) wrong\n"); 2784 } 2785 if( u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) != (int32_t) (U_EA_COUNT -1)) { 2786 log_err("error: u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH) wrong\n"); 2787 } 2788 2789 /* test u_hasBinaryProperty() and u_getIntPropertyValue() */ 2790 for(i=0; i<UPRV_LENGTHOF(props); ++i) { 2791 const char *whichName; 2792 2793 if(props[i][0]<0) { 2794 /* Unicode version break */ 2795 if(uVersion<props[i][1]) { 2796 break; /* do not test properties that are not yet supported */ 2797 } else { 2798 continue; /* skip this row */ 2799 } 2800 } 2801 2802 c=(UChar32)props[i][0]; 2803 which=(UProperty)props[i][1]; 2804 whichName=u_getPropertyName(which, U_LONG_PROPERTY_NAME); 2805 2806 if(which<UCHAR_INT_START) { 2807 result=u_hasBinaryProperty(c, which); 2808 if(result!=props[i][2]) { 2809 log_data_err("error: u_hasBinaryProperty(U+%04lx, %s)=%d is wrong (props[%d]) - (Are you missing data?)\n", 2810 c, whichName, result, i); 2811 } 2812 } 2813 2814 result=u_getIntPropertyValue(c, which); 2815 if(result!=props[i][2]) { 2816 log_data_err("error: u_getIntPropertyValue(U+%04lx, %s)=%d is wrong, should be %d (props[%d]) - (Are you missing data?)\n", 2817 c, whichName, result, props[i][2], i); 2818 } 2819 2820 /* test separate functions, too */ 2821 switch((UProperty)props[i][1]) { 2822 case UCHAR_ALPHABETIC: 2823 if(u_isUAlphabetic((UChar32)props[i][0])!=(UBool)props[i][2]) { 2824 log_err("error: u_isUAlphabetic(U+%04lx)=%d is wrong (props[%d])\n", 2825 props[i][0], result, i); 2826 } 2827 break; 2828 case UCHAR_LOWERCASE: 2829 if(u_isULowercase((UChar32)props[i][0])!=(UBool)props[i][2]) { 2830 log_err("error: u_isULowercase(U+%04lx)=%d is wrong (props[%d])\n", 2831 props[i][0], result, i); 2832 } 2833 break; 2834 case UCHAR_UPPERCASE: 2835 if(u_isUUppercase((UChar32)props[i][0])!=(UBool)props[i][2]) { 2836 log_err("error: u_isUUppercase(U+%04lx)=%d is wrong (props[%d])\n", 2837 props[i][0], result, i); 2838 } 2839 break; 2840 case UCHAR_WHITE_SPACE: 2841 if(u_isUWhiteSpace((UChar32)props[i][0])!=(UBool)props[i][2]) { 2842 log_err("error: u_isUWhiteSpace(U+%04lx)=%d is wrong (props[%d])\n", 2843 props[i][0], result, i); 2844 } 2845 break; 2846 default: 2847 break; 2848 } 2849 } 2850 } 2851 2852 static void 2853 TestNumericProperties(void) { 2854 /* see UnicodeData.txt, DerivedNumericValues.txt */ 2855 static const struct { 2856 UChar32 c; 2857 int32_t type; 2858 double numValue; 2859 } values[]={ 2860 { 0x0F33, U_NT_NUMERIC, -1./2. }, 2861 { 0x0C66, U_NT_DECIMAL, 0 }, 2862 { 0x96f6, U_NT_NUMERIC, 0 }, 2863 { 0xa833, U_NT_NUMERIC, 1./16. }, 2864 { 0x2152, U_NT_NUMERIC, 1./10. }, 2865 { 0x2151, U_NT_NUMERIC, 1./9. }, 2866 { 0x1245f, U_NT_NUMERIC, 1./8. }, 2867 { 0x2150, U_NT_NUMERIC, 1./7. }, 2868 { 0x2159, U_NT_NUMERIC, 1./6. }, 2869 { 0x09f6, U_NT_NUMERIC, 3./16. }, 2870 { 0x2155, U_NT_NUMERIC, 1./5. }, 2871 { 0x00BD, U_NT_NUMERIC, 1./2. }, 2872 { 0x0031, U_NT_DECIMAL, 1. }, 2873 { 0x4e00, U_NT_NUMERIC, 1. }, 2874 { 0x58f1, U_NT_NUMERIC, 1. }, 2875 { 0x10320, U_NT_NUMERIC, 1. }, 2876 { 0x0F2B, U_NT_NUMERIC, 3./2. }, 2877 { 0x00B2, U_NT_DIGIT, 2. }, 2878 { 0x5f10, U_NT_NUMERIC, 2. }, 2879 { 0x1813, U_NT_DECIMAL, 3. }, 2880 { 0x5f0e, U_NT_NUMERIC, 3. }, 2881 { 0x2173, U_NT_NUMERIC, 4. }, 2882 { 0x8086, U_NT_NUMERIC, 4. }, 2883 { 0x278E, U_NT_DIGIT, 5. }, 2884 { 0x1D7F2, U_NT_DECIMAL, 6. }, 2885 { 0x247A, U_NT_DIGIT, 7. }, 2886 { 0x7396, U_NT_NUMERIC, 9. }, 2887 { 0x1372, U_NT_NUMERIC, 10. }, 2888 { 0x216B, U_NT_NUMERIC, 12. }, 2889 { 0x16EE, U_NT_NUMERIC, 17. }, 2890 { 0x249A, U_NT_NUMERIC, 19. }, 2891 { 0x303A, U_NT_NUMERIC, 30. }, 2892 { 0x5345, U_NT_NUMERIC, 30. }, 2893 { 0x32B2, U_NT_NUMERIC, 37. }, 2894 { 0x1375, U_NT_NUMERIC, 40. }, 2895 { 0x10323, U_NT_NUMERIC, 50. }, 2896 { 0x0BF1, U_NT_NUMERIC, 100. }, 2897 { 0x964c, U_NT_NUMERIC, 100. }, 2898 { 0x217E, U_NT_NUMERIC, 500. }, 2899 { 0x2180, U_NT_NUMERIC, 1000. }, 2900 { 0x4edf, U_NT_NUMERIC, 1000. }, 2901 { 0x2181, U_NT_NUMERIC, 5000. }, 2902 { 0x137C, U_NT_NUMERIC, 10000. }, 2903 { 0x4e07, U_NT_NUMERIC, 10000. }, 2904 { 0x12432, U_NT_NUMERIC, 216000. }, 2905 { 0x12433, U_NT_NUMERIC, 432000. }, 2906 { 0x4ebf, U_NT_NUMERIC, 100000000. }, 2907 { 0x5146, U_NT_NUMERIC, 1000000000000. }, 2908 { -1, U_NT_NONE, U_NO_NUMERIC_VALUE }, 2909 { 0x61, U_NT_NONE, U_NO_NUMERIC_VALUE }, 2910 { 0x3000, U_NT_NONE, U_NO_NUMERIC_VALUE }, 2911 { 0xfffe, U_NT_NONE, U_NO_NUMERIC_VALUE }, 2912 { 0x10301, U_NT_NONE, U_NO_NUMERIC_VALUE }, 2913 { 0xe0033, U_NT_NONE, U_NO_NUMERIC_VALUE }, 2914 { 0x10ffff, U_NT_NONE, U_NO_NUMERIC_VALUE }, 2915 { 0x110000, U_NT_NONE, U_NO_NUMERIC_VALUE } 2916 }; 2917 2918 double nv; 2919 UChar32 c; 2920 int32_t i, type; 2921 2922 for(i=0; i<UPRV_LENGTHOF(values); ++i) { 2923 c=values[i].c; 2924 type=u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE); 2925 nv=u_getNumericValue(c); 2926 2927 if(type!=values[i].type) { 2928 log_err("UCHAR_NUMERIC_TYPE(U+%04lx)=%d should be %d\n", c, type, values[i].type); 2929 } 2930 if(0.000001 <= fabs(nv - values[i].numValue)) { 2931 log_err("u_getNumericValue(U+%04lx)=%g should be %g\n", c, nv, values[i].numValue); 2932 } 2933 } 2934 } 2935 2936 /** 2937 * Test the property names and property value names API. 2938 */ 2939 static void 2940 TestPropertyNames(void) { 2941 int32_t p, v, choice=0, rev; 2942 UBool atLeastSomething = FALSE; 2943 2944 for (p=0; ; ++p) { 2945 UProperty propEnum = (UProperty)p; 2946 UBool sawProp = FALSE; 2947 if(p > 10 && !atLeastSomething) { 2948 log_data_err("Never got anything after 10 tries.\nYour data is probably fried. Quitting this test\n", p, choice); 2949 return; 2950 } 2951 2952 for (choice=0; ; ++choice) { 2953 const char* name = u_getPropertyName(propEnum, (UPropertyNameChoice)choice); 2954 if (name) { 2955 if (!sawProp) 2956 log_verbose("prop 0x%04x+%2d:", p&~0xfff, p&0xfff); 2957 log_verbose("%d=\"%s\"", choice, name); 2958 sawProp = TRUE; 2959 atLeastSomething = TRUE; 2960 2961 /* test reverse mapping */ 2962 rev = u_getPropertyEnum(name); 2963 if (rev != p) { 2964 log_err("Property round-trip failure: %d -> %s -> %d\n", 2965 p, name, rev); 2966 } 2967 } 2968 if (!name && choice>0) break; 2969 } 2970 if (sawProp) { 2971 /* looks like a valid property; check the values */ 2972 const char* pname = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME); 2973 int32_t max = 0; 2974 if (p == UCHAR_CANONICAL_COMBINING_CLASS) { 2975 max = 255; 2976 } else if (p == UCHAR_GENERAL_CATEGORY_MASK) { 2977 /* it's far too slow to iterate all the way up to 2978 the real max, U_GC_P_MASK */ 2979 max = U_GC_NL_MASK; 2980 } else if (p == UCHAR_BLOCK) { 2981 /* UBlockCodes, unlike other values, start at 1 */ 2982 max = 1; 2983 } 2984 log_verbose("\n"); 2985 for (v=-1; ; ++v) { 2986 UBool sawValue = FALSE; 2987 for (choice=0; ; ++choice) { 2988 const char* vname = u_getPropertyValueName(propEnum, v, (UPropertyNameChoice)choice); 2989 if (vname) { 2990 if (!sawValue) log_verbose(" %s, value %d:", pname, v); 2991 log_verbose("%d=\"%s\"", choice, vname); 2992 sawValue = TRUE; 2993 2994 /* test reverse mapping */ 2995 rev = u_getPropertyValueEnum(propEnum, vname); 2996 if (rev != v) { 2997 log_err("Value round-trip failure (%s): %d -> %s -> %d\n", 2998 pname, v, vname, rev); 2999 } 3000 } 3001 if (!vname && choice>0) break; 3002 } 3003 if (sawValue) { 3004 log_verbose("\n"); 3005 } 3006 if (!sawValue && v>=max) break; 3007 } 3008 } 3009 if (!sawProp) { 3010 if (p>=UCHAR_STRING_LIMIT) { 3011 break; 3012 } else if (p>=UCHAR_DOUBLE_LIMIT) { 3013 p = UCHAR_STRING_START - 1; 3014 } else if (p>=UCHAR_MASK_LIMIT) { 3015 p = UCHAR_DOUBLE_START - 1; 3016 } else if (p>=UCHAR_INT_LIMIT) { 3017 p = UCHAR_MASK_START - 1; 3018 } else if (p>=UCHAR_BINARY_LIMIT) { 3019 p = UCHAR_INT_START - 1; 3020 } 3021 } 3022 } 3023 } 3024 3025 /** 3026 * Test the property values API. See JB#2410. 3027 */ 3028 static void 3029 TestPropertyValues(void) { 3030 int32_t i, p, min, max; 3031 UErrorCode ec; 3032 3033 /* Min should be 0 for everything. */ 3034 /* Until JB#2478 is fixed, the one exception is UCHAR_BLOCK. */ 3035 for (p=UCHAR_INT_START; p<UCHAR_INT_LIMIT; ++p) { 3036 UProperty propEnum = (UProperty)p; 3037 min = u_getIntPropertyMinValue(propEnum); 3038 if (min != 0) { 3039 if (p == UCHAR_BLOCK) { 3040 /* This is okay...for now. See JB#2487. 3041 TODO Update this for JB#2487. */ 3042 } else { 3043 const char* name; 3044 name = u_getPropertyName(propEnum, U_LONG_PROPERTY_NAME); 3045 if (name == NULL) 3046 name = "<ERROR>"; 3047 log_err("FAIL: u_getIntPropertyMinValue(%s) = %d, exp. 0\n", 3048 name, min); 3049 } 3050 } 3051 } 3052 3053 if( u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY_MASK)!=0 || 3054 u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY_MASK)!=-1) { 3055 log_err("error: u_getIntPropertyMin/MaxValue(UCHAR_GENERAL_CATEGORY_MASK) is wrong\n"); 3056 } 3057 3058 /* Max should be -1 for invalid properties. */ 3059 max = u_getIntPropertyMaxValue(UCHAR_INVALID_CODE); 3060 if (max != -1) { 3061 log_err("FAIL: u_getIntPropertyMaxValue(-1) = %d, exp. -1\n", 3062 max); 3063 } 3064 3065 /* Script should return USCRIPT_INVALID_CODE for an invalid code point. */ 3066 for (i=0; i<2; ++i) { 3067 int32_t script; 3068 const char* desc; 3069 ec = U_ZERO_ERROR; 3070 switch (i) { 3071 case 0: 3072 script = uscript_getScript(-1, &ec); 3073 desc = "uscript_getScript(-1)"; 3074 break; 3075 case 1: 3076 script = u_getIntPropertyValue(-1, UCHAR_SCRIPT); 3077 desc = "u_getIntPropertyValue(-1, UCHAR_SCRIPT)"; 3078 break; 3079 default: 3080 log_err("Internal test error. Too many scripts\n"); 3081 return; 3082 } 3083 /* We don't explicitly test ec. It should be U_FAILURE but it 3084 isn't documented as such. */ 3085 if (script != (int32_t)USCRIPT_INVALID_CODE) { 3086 log_err("FAIL: %s = %d, exp. 0\n", 3087 desc, script); 3088 } 3089 } 3090 } 3091 3092 /* various tests for consistency of UCD data and API behavior */ 3093 static void 3094 TestConsistency() { 3095 char buffer[300]; 3096 USet *set1, *set2, *set3, *set4; 3097 UErrorCode errorCode; 3098 3099 UChar32 start, end; 3100 int32_t i, length; 3101 3102 U_STRING_DECL(hyphenPattern, "[:Hyphen:]", 10); 3103 U_STRING_DECL(dashPattern, "[:Dash:]", 8); 3104 U_STRING_DECL(lowerPattern, "[:Lowercase:]", 13); 3105 U_STRING_DECL(formatPattern, "[:Cf:]", 6); 3106 U_STRING_DECL(alphaPattern, "[:Alphabetic:]", 14); 3107 3108 U_STRING_DECL(mathBlocksPattern, 3109 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]", 3110 214); 3111 U_STRING_DECL(mathPattern, "[:Math:]", 8); 3112 U_STRING_DECL(unassignedPattern, "[:Cn:]", 6); 3113 U_STRING_DECL(unknownPattern, "[:sc=Unknown:]", 14); 3114 U_STRING_DECL(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20); 3115 3116 U_STRING_INIT(hyphenPattern, "[:Hyphen:]", 10); 3117 U_STRING_INIT(dashPattern, "[:Dash:]", 8); 3118 U_STRING_INIT(lowerPattern, "[:Lowercase:]", 13); 3119 U_STRING_INIT(formatPattern, "[:Cf:]", 6); 3120 U_STRING_INIT(alphaPattern, "[:Alphabetic:]", 14); 3121 3122 U_STRING_INIT(mathBlocksPattern, 3123 "[[:block=Mathematical Operators:][:block=Miscellaneous Mathematical Symbols-A:][:block=Miscellaneous Mathematical Symbols-B:][:block=Supplemental Mathematical Operators:][:block=Mathematical Alphanumeric Symbols:]]", 3124 214); 3125 U_STRING_INIT(mathPattern, "[:Math:]", 8); 3126 U_STRING_INIT(unassignedPattern, "[:Cn:]", 6); 3127 U_STRING_INIT(unknownPattern, "[:sc=Unknown:]", 14); 3128 U_STRING_INIT(reservedPattern, "[[:Cn:][:Co:][:Cs:]]", 20); 3129 3130 /* 3131 * It used to be that UCD.html and its precursors said 3132 * "Those dashes used to mark connections between pieces of words, 3133 * plus the Katakana middle dot." 3134 * 3135 * Unicode 4 changed 00AD Soft Hyphen to Cf and removed it from Dash 3136 * but not from Hyphen. 3137 * UTC 94 (2003mar) decided to leave it that way and to change UCD.html. 3138 * Therefore, do not show errors when testing the Hyphen property. 3139 */ 3140 log_verbose("Starting with Unicode 4, inconsistencies with [:Hyphen:] are\n" 3141 "known to the UTC and not considered errors.\n"); 3142 3143 errorCode=U_ZERO_ERROR; 3144 set1=uset_openPattern(hyphenPattern, 10, &errorCode); 3145 set2=uset_openPattern(dashPattern, 8, &errorCode); 3146 if(U_SUCCESS(errorCode)) { 3147 /* remove the Katakana middle dot(s) from set1 */ 3148 uset_remove(set1, 0x30fb); 3149 uset_remove(set1, 0xff65); /* halfwidth variant */ 3150 showAMinusB(set1, set2, "[:Hyphen:]", "[:Dash:]", FALSE); 3151 } else { 3152 log_data_err("error opening [:Hyphen:] or [:Dash:] - %s (Are you missing data?)\n", u_errorName(errorCode)); 3153 } 3154 3155 /* check that Cf is neither Hyphen nor Dash nor Alphabetic */ 3156 set3=uset_openPattern(formatPattern, 6, &errorCode); 3157 set4=uset_openPattern(alphaPattern, 14, &errorCode); 3158 if(U_SUCCESS(errorCode)) { 3159 showAIntersectB(set3, set1, "[:Cf:]", "[:Hyphen:]", FALSE); 3160 showAIntersectB(set3, set2, "[:Cf:]", "[:Dash:]", TRUE); 3161 showAIntersectB(set3, set4, "[:Cf:]", "[:Alphabetic:]", TRUE); 3162 } else { 3163 log_data_err("error opening [:Cf:] or [:Alpbabetic:] - %s (Are you missing data?)\n", u_errorName(errorCode)); 3164 } 3165 3166 uset_close(set1); 3167 uset_close(set2); 3168 uset_close(set3); 3169 uset_close(set4); 3170 3171 /* 3172 * Check that each lowercase character has "small" in its name 3173 * and not "capital". 3174 * There are some such characters, some of which seem odd. 3175 * Use the verbose flag to see these notices. 3176 */ 3177 errorCode=U_ZERO_ERROR; 3178 set1=uset_openPattern(lowerPattern, 13, &errorCode); 3179 if(U_SUCCESS(errorCode)) { 3180 for(i=0;; ++i) { 3181 length=uset_getItem(set1, i, &start, &end, NULL, 0, &errorCode); 3182 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) { 3183 break; /* done */ 3184 } 3185 if(U_FAILURE(errorCode)) { 3186 log_err("error iterating over [:Lowercase:] at item %d: %s\n", 3187 i, u_errorName(errorCode)); 3188 break; 3189 } 3190 if(length!=0) { 3191 break; /* done with code points, got a string or -1 */ 3192 } 3193 3194 while(start<=end) { 3195 length=u_charName(start, U_UNICODE_CHAR_NAME, buffer, sizeof(buffer), &errorCode); 3196 if(U_FAILURE(errorCode)) { 3197 log_data_err("error getting the name of U+%04x - %s\n", start, u_errorName(errorCode)); 3198 errorCode=U_ZERO_ERROR; 3199 } 3200 if( (strstr(buffer, "SMALL")==NULL || strstr(buffer, "CAPITAL")!=NULL) && 3201 strstr(buffer, "SMALL CAPITAL")==NULL 3202 ) { 3203 log_verbose("info: [:Lowercase:] contains U+%04x whose name does not suggest lowercase: %s\n", start, buffer); 3204 } 3205 ++start; 3206 } 3207 } 3208 } else { 3209 log_data_err("error opening [:Lowercase:] - %s (Are you missing data?)\n", u_errorName(errorCode)); 3210 } 3211 uset_close(set1); 3212 3213 /* verify that all assigned characters in Math blocks are exactly Math characters */ 3214 errorCode=U_ZERO_ERROR; 3215 set1=uset_openPattern(mathBlocksPattern, -1, &errorCode); 3216 set2=uset_openPattern(mathPattern, 8, &errorCode); 3217 set3=uset_openPattern(unassignedPattern, 6, &errorCode); 3218 if(U_SUCCESS(errorCode)) { 3219 uset_retainAll(set2, set1); /* [math blocks]&[:Math:] */ 3220 uset_complement(set3); /* assigned characters */ 3221 uset_retainAll(set1, set3); /* [math blocks]&[assigned] */ 3222 compareUSets(set1, set2, 3223 "[assigned Math block chars]", "[math blocks]&[:Math:]", 3224 TRUE); 3225 } else { 3226 log_data_err("error opening [math blocks] or [:Math:] or [:Cn:] - %s (Are you missing data?)\n", u_errorName(errorCode)); 3227 } 3228 uset_close(set1); 3229 uset_close(set2); 3230 uset_close(set3); 3231 3232 /* new in Unicode 5.0: exactly all unassigned+PUA+surrogate code points have script=Unknown */ 3233 errorCode=U_ZERO_ERROR; 3234 set1=uset_openPattern(unknownPattern, 14, &errorCode); 3235 set2=uset_openPattern(reservedPattern, 20, &errorCode); 3236 if(U_SUCCESS(errorCode)) { 3237 compareUSets(set1, set2, 3238 "[:sc=Unknown:]", "[[:Cn:][:Co:][:Cs:]]", 3239 TRUE); 3240 } else { 3241 log_data_err("error opening [:sc=Unknown:] or [[:Cn:][:Co:][:Cs:]] - %s (Are you missing data?)\n", u_errorName(errorCode)); 3242 } 3243 uset_close(set1); 3244 uset_close(set2); 3245 } 3246 3247 /* 3248 * Starting with ICU4C 3.4, the core Unicode properties files 3249 * (uprops.icu, ucase.icu, ubidi.icu, unorm.icu) 3250 * are hardcoded in the common DLL and therefore not included 3251 * in the data package any more. 3252 * Test requiring these files are disabled so that 3253 * we need not jump through hoops (like adding snapshots of these files 3254 * to testdata). 3255 * See Jitterbug 4497. 3256 */ 3257 #define HARDCODED_DATA_4497 1 3258 3259 /* API coverage for ucase.c */ 3260 static void TestUCase() { 3261 #if !HARDCODED_DATA_4497 3262 UDataMemory *pData; 3263 UCaseProps *csp; 3264 const UCaseProps *ccsp; 3265 UErrorCode errorCode; 3266 3267 /* coverage for ucase_openBinary() */ 3268 errorCode=U_ZERO_ERROR; 3269 pData=udata_open(NULL, UCASE_DATA_TYPE, UCASE_DATA_NAME, &errorCode); 3270 if(U_FAILURE(errorCode)) { 3271 log_data_err("unable to open " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n", 3272 u_errorName(errorCode)); 3273 return; 3274 } 3275 3276 csp=ucase_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode); 3277 if(U_FAILURE(errorCode)) { 3278 log_err("ucase_openBinary() fails for the contents of " UCASE_DATA_NAME "." UCASE_DATA_TYPE ": %s\n", 3279 u_errorName(errorCode)); 3280 udata_close(pData); 3281 return; 3282 } 3283 3284 if(UCASE_LOWER!=ucase_getType(csp, 0xdf)) { /* verify islower(sharp s) */ 3285 log_err("ucase_openBinary() does not seem to return working UCaseProps\n"); 3286 } 3287 3288 ucase_close(csp); 3289 udata_close(pData); 3290 3291 /* coverage for ucase_getDummy() */ 3292 errorCode=U_ZERO_ERROR; 3293 ccsp=ucase_getDummy(&errorCode); 3294 if(ucase_tolower(ccsp, 0x41)!=0x41) { 3295 log_err("ucase_tolower(dummy, A)!=A\n"); 3296 } 3297 #endif 3298 } 3299 3300 /* API coverage for ubidi_props.c */ 3301 static void TestUBiDiProps() { 3302 #if !HARDCODED_DATA_4497 3303 UDataMemory *pData; 3304 UBiDiProps *bdp; 3305 const UBiDiProps *cbdp; 3306 UErrorCode errorCode; 3307 3308 /* coverage for ubidi_openBinary() */ 3309 errorCode=U_ZERO_ERROR; 3310 pData=udata_open(NULL, UBIDI_DATA_TYPE, UBIDI_DATA_NAME, &errorCode); 3311 if(U_FAILURE(errorCode)) { 3312 log_data_err("unable to open " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n", 3313 u_errorName(errorCode)); 3314 return; 3315 } 3316 3317 bdp=ubidi_openBinary((const uint8_t *)pData->pHeader, -1, &errorCode); 3318 if(U_FAILURE(errorCode)) { 3319 log_err("ubidi_openBinary() fails for the contents of " UBIDI_DATA_NAME "." UBIDI_DATA_TYPE ": %s\n", 3320 u_errorName(errorCode)); 3321 udata_close(pData); 3322 return; 3323 } 3324 3325 if(0x2215!=ubidi_getMirror(bdp, 0x29F5)) { /* verify some data */ 3326 log_err("ubidi_openBinary() does not seem to return working UBiDiProps\n"); 3327 } 3328 3329 ubidi_closeProps(bdp); 3330 udata_close(pData); 3331 3332 /* coverage for ubidi_getDummy() */ 3333 errorCode=U_ZERO_ERROR; 3334 cbdp=ubidi_getDummy(&errorCode); 3335 if(ubidi_getClass(cbdp, 0x20)!=0) { 3336 log_err("ubidi_getClass(dummy, space)!=0\n"); 3337 } 3338 #endif 3339 } 3340 3341 /* test case folding, compare return values with CaseFolding.txt ------------ */ 3342 3343 /* bit set for which case foldings for a character have been tested already */ 3344 enum { 3345 CF_SIMPLE=1, 3346 CF_FULL=2, 3347 CF_TURKIC=4, 3348 CF_ALL=7 3349 }; 3350 3351 static void 3352 testFold(UChar32 c, int which, 3353 UChar32 simple, UChar32 turkic, 3354 const UChar *full, int32_t fullLength, 3355 const UChar *turkicFull, int32_t turkicFullLength) { 3356 UChar s[2], t[32]; 3357 UChar32 c2; 3358 int32_t length, length2; 3359 3360 UErrorCode errorCode=U_ZERO_ERROR; 3361 3362 length=0; 3363 U16_APPEND_UNSAFE(s, length, c); 3364 3365 if((which&CF_SIMPLE)!=0 && (c2=u_foldCase(c, 0))!=simple) { 3366 log_err("u_foldCase(U+%04lx, default)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple); 3367 } 3368 if((which&CF_FULL)!=0) { 3369 length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, 0, &errorCode); 3370 if(length2!=fullLength || 0!=u_memcmp(t, full, fullLength)) { 3371 log_err("u_strFoldCase(U+%04lx, default) does not fold properly\n", (long)c); 3372 } 3373 } 3374 if((which&CF_TURKIC)!=0) { 3375 if((c2=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I))!=turkic) { 3376 log_err("u_foldCase(U+%04lx, turkic)=U+%04lx != U+%04lx\n", (long)c, (long)c2, (long)simple); 3377 } 3378 3379 length2=u_strFoldCase(t, UPRV_LENGTHOF(t), s, length, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode); 3380 if(length2!=turkicFullLength || 0!=u_memcmp(t, turkicFull, length2)) { 3381 log_err("u_strFoldCase(U+%04lx, turkic) does not fold properly\n", (long)c); 3382 } 3383 } 3384 } 3385 3386 /* test that c case-folds to itself */ 3387 static void 3388 testFoldToSelf(UChar32 c, int which) { 3389 UChar s[2]; 3390 int32_t length; 3391 3392 length=0; 3393 U16_APPEND_UNSAFE(s, length, c); 3394 testFold(c, which, c, c, s, length, s, length); 3395 } 3396 3397 struct CaseFoldingData { 3398 USet *notSeen; 3399 UChar32 prev, prevSimple; 3400 UChar prevFull[32]; 3401 int32_t prevFullLength; 3402 int which; 3403 }; 3404 typedef struct CaseFoldingData CaseFoldingData; 3405 3406 static void U_CALLCONV 3407 caseFoldingLineFn(void *context, 3408 char *fields[][2], int32_t fieldCount, 3409 UErrorCode *pErrorCode) { 3410 CaseFoldingData *pData=(CaseFoldingData *)context; 3411 char *end; 3412 UChar full[32]; 3413 UChar32 c, prev, simple; 3414 int32_t count; 3415 int which; 3416 char status; 3417 3418 /* get code point */ 3419 const char *s=u_skipWhitespace(fields[0][0]); 3420 if(0==strncmp(s, "0000..10FFFF", 12)) { 3421 /* 3422 * Ignore the line 3423 * # @missing: 0000..10FFFF; C; <code point> 3424 * because maps-to-self is already our default, and this line breaks this parser. 3425 */ 3426 return; 3427 } 3428 c=(UChar32)strtoul(s, &end, 16); 3429 end=(char *)u_skipWhitespace(end); 3430 if(end<=fields[0][0] || end!=fields[0][1]) { 3431 log_err("syntax error in CaseFolding.txt field 0 at %s\n", fields[0][0]); 3432 *pErrorCode=U_PARSE_ERROR; 3433 return; 3434 } 3435 3436 /* get the status of this mapping */ 3437 status=*u_skipWhitespace(fields[1][0]); 3438 if(status!='C' && status!='S' && status!='F' && status!='T') { 3439 log_err("unrecognized status field in CaseFolding.txt at %s\n", fields[0][0]); 3440 *pErrorCode=U_PARSE_ERROR; 3441 return; 3442 } 3443 3444 /* get the mapping */ 3445 count=u_parseString(fields[2][0], full, 32, (uint32_t *)&simple, pErrorCode); 3446 if(U_FAILURE(*pErrorCode)) { 3447 log_err("error parsing CaseFolding.txt mapping at %s\n", fields[0][0]); 3448 return; 3449 } 3450 3451 /* there is a simple mapping only if there is exactly one code point (count is in UChars) */ 3452 if(count==0 || count>2 || (count==2 && U16_IS_SINGLE(full[1]))) { 3453 simple=c; 3454 } 3455 3456 if(c!=(prev=pData->prev)) { 3457 /* 3458 * Test remaining mappings for the previous code point. 3459 * If a turkic folding was not mentioned, then it should fold the same 3460 * as the regular simple case folding. 3461 */ 3462 UChar prevString[2]; 3463 int32_t length; 3464 3465 length=0; 3466 U16_APPEND_UNSAFE(prevString, length, prev); 3467 testFold(prev, (~pData->which)&CF_ALL, 3468 prev, pData->prevSimple, 3469 prevString, length, 3470 pData->prevFull, pData->prevFullLength); 3471 pData->prev=pData->prevSimple=c; 3472 length=0; 3473 U16_APPEND_UNSAFE(pData->prevFull, length, c); 3474 pData->prevFullLength=length; 3475 pData->which=0; 3476 } 3477 3478 /* 3479 * Turn the status into a bit set of case foldings to test. 3480 * Remember non-Turkic case foldings as defaults for Turkic mode. 3481 */ 3482 switch(status) { 3483 case 'C': 3484 which=CF_SIMPLE|CF_FULL; 3485 pData->prevSimple=simple; 3486 u_memcpy(pData->prevFull, full, count); 3487 pData->prevFullLength=count; 3488 break; 3489 case 'S': 3490 which=CF_SIMPLE; 3491 pData->prevSimple=simple; 3492 break; 3493 case 'F': 3494 which=CF_FULL; 3495 u_memcpy(pData->prevFull, full, count); 3496 pData->prevFullLength=count; 3497 break; 3498 case 'T': 3499 which=CF_TURKIC; 3500 break; 3501 default: 3502 which=0; 3503 break; /* won't happen because of test above */ 3504 } 3505 3506 testFold(c, which, simple, simple, full, count, full, count); 3507 3508 /* remember which case foldings of c have been tested */ 3509 pData->which|=which; 3510 3511 /* remove c from the set of ones not mentioned in CaseFolding.txt */ 3512 uset_remove(pData->notSeen, c); 3513 } 3514 3515 static void 3516 TestCaseFolding() { 3517 CaseFoldingData data={ NULL }; 3518 char *fields[3][2]; 3519 UErrorCode errorCode; 3520 3521 static char *lastLine= (char *)"10FFFF; C; 10FFFF;"; 3522 3523 errorCode=U_ZERO_ERROR; 3524 /* test BMP & plane 1 - nothing interesting above */ 3525 data.notSeen=uset_open(0, 0x1ffff); 3526 data.prevFullLength=1; /* length of full case folding of U+0000 */ 3527 3528 parseUCDFile("CaseFolding.txt", fields, 3, caseFoldingLineFn, &data, &errorCode); 3529 if(U_SUCCESS(errorCode)) { 3530 int32_t i, start, end; 3531 3532 /* add a pseudo-last line to finish testing of the actual last one */ 3533 fields[0][0]=lastLine; 3534 fields[0][1]=lastLine+6; 3535 fields[1][0]=lastLine+7; 3536 fields[1][1]=lastLine+9; 3537 fields[2][0]=lastLine+10; 3538 fields[2][1]=lastLine+17; 3539 caseFoldingLineFn(&data, fields, 3, &errorCode); 3540 3541 /* verify that all code points that are not mentioned in CaseFolding.txt fold to themselves */ 3542 for(i=0; 3543 0==uset_getItem(data.notSeen, i, &start, &end, NULL, 0, &errorCode) && 3544 U_SUCCESS(errorCode); 3545 ++i 3546 ) { 3547 do { 3548 testFoldToSelf(start, CF_ALL); 3549 } while(++start<=end); 3550 } 3551 } 3552 3553 uset_close(data.notSeen); 3554 } 3555