1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /******************************************************************** 4 * Copyright (c) 1997-2016, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ********************************************************************/ 7 8 #include "unicode/ustring.h" 9 #include "unicode/uchar.h" 10 #include "unicode/uniset.h" 11 #include "unicode/putil.h" 12 #include "unicode/uscript.h" 13 #include "cstring.h" 14 #include "hash.h" 15 #include "patternprops.h" 16 #include "normalizer2impl.h" 17 #include "uparse.h" 18 #include "ucdtest.h" 19 20 static const char *ignorePropNames[]={ 21 "FC_NFKC", 22 "NFD_QC", 23 "NFC_QC", 24 "NFKD_QC", 25 "NFKC_QC", 26 "Expands_On_NFD", 27 "Expands_On_NFC", 28 "Expands_On_NFKD", 29 "Expands_On_NFKC", 30 "NFKC_CF" 31 }; 32 33 UnicodeTest::UnicodeTest() 34 { 35 UErrorCode errorCode=U_ZERO_ERROR; 36 unknownPropertyNames=new U_NAMESPACE_QUALIFIER Hashtable(errorCode); 37 if(U_FAILURE(errorCode)) { 38 delete unknownPropertyNames; 39 unknownPropertyNames=NULL; 40 } 41 // Ignore some property names altogether. 42 for(int32_t i=0; i<UPRV_LENGTHOF(ignorePropNames); ++i) { 43 unknownPropertyNames->puti(UnicodeString(ignorePropNames[i], -1, US_INV), 1, errorCode); 44 } 45 } 46 47 UnicodeTest::~UnicodeTest() 48 { 49 delete unknownPropertyNames; 50 } 51 52 void UnicodeTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) 53 { 54 if(exec) { 55 logln("TestSuite UnicodeTest: "); 56 } 57 TESTCASE_AUTO_BEGIN; 58 TESTCASE_AUTO(TestAdditionalProperties); 59 TESTCASE_AUTO(TestBinaryValues); 60 TESTCASE_AUTO(TestConsistency); 61 TESTCASE_AUTO(TestPatternProperties); 62 TESTCASE_AUTO(TestScriptMetadata); 63 TESTCASE_AUTO(TestBidiPairedBracketType); 64 TESTCASE_AUTO(TestEmojiProperties); 65 TESTCASE_AUTO(TestDefaultScriptExtensions); 66 TESTCASE_AUTO_END; 67 } 68 69 //==================================================== 70 // private data used by the tests 71 //==================================================== 72 73 // test DerivedCoreProperties.txt ------------------------------------------- 74 75 // copied from genprops.c 76 static int32_t 77 getTokenIndex(const char *const tokens[], int32_t countTokens, const char *s) { 78 const char *t, *z; 79 int32_t i, j; 80 81 s=u_skipWhitespace(s); 82 for(i=0; i<countTokens; ++i) { 83 t=tokens[i]; 84 if(t!=NULL) { 85 for(j=0;; ++j) { 86 if(t[j]!=0) { 87 if(s[j]!=t[j]) { 88 break; 89 } 90 } else { 91 z=u_skipWhitespace(s+j); 92 if(*z==';' || *z==0) { 93 return i; 94 } else { 95 break; 96 } 97 } 98 } 99 } 100 } 101 return -1; 102 } 103 104 static const char *const 105 derivedPropsNames[]={ 106 "Math", 107 "Alphabetic", 108 "Lowercase", 109 "Uppercase", 110 "ID_Start", 111 "ID_Continue", 112 "XID_Start", 113 "XID_Continue", 114 "Default_Ignorable_Code_Point", 115 "Full_Composition_Exclusion", 116 "Grapheme_Extend", 117 "Grapheme_Link", /* Unicode 5 moves this property here from PropList.txt */ 118 "Grapheme_Base", 119 "Cased", 120 "Case_Ignorable", 121 "Changes_When_Lowercased", 122 "Changes_When_Uppercased", 123 "Changes_When_Titlecased", 124 "Changes_When_Casefolded", 125 "Changes_When_Casemapped", 126 "Changes_When_NFKC_Casefolded" 127 }; 128 129 static const UProperty 130 derivedPropsIndex[]={ 131 UCHAR_MATH, 132 UCHAR_ALPHABETIC, 133 UCHAR_LOWERCASE, 134 UCHAR_UPPERCASE, 135 UCHAR_ID_START, 136 UCHAR_ID_CONTINUE, 137 UCHAR_XID_START, 138 UCHAR_XID_CONTINUE, 139 UCHAR_DEFAULT_IGNORABLE_CODE_POINT, 140 UCHAR_FULL_COMPOSITION_EXCLUSION, 141 UCHAR_GRAPHEME_EXTEND, 142 UCHAR_GRAPHEME_LINK, 143 UCHAR_GRAPHEME_BASE, 144 UCHAR_CASED, 145 UCHAR_CASE_IGNORABLE, 146 UCHAR_CHANGES_WHEN_LOWERCASED, 147 UCHAR_CHANGES_WHEN_UPPERCASED, 148 UCHAR_CHANGES_WHEN_TITLECASED, 149 UCHAR_CHANGES_WHEN_CASEFOLDED, 150 UCHAR_CHANGES_WHEN_CASEMAPPED, 151 UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED 152 }; 153 154 static int32_t numErrors[UPRV_LENGTHOF(derivedPropsIndex)]={ 0 }; 155 156 enum { MAX_ERRORS=50 }; 157 158 U_CFUNC void U_CALLCONV 159 derivedPropsLineFn(void *context, 160 char *fields[][2], int32_t /* fieldCount */, 161 UErrorCode *pErrorCode) 162 { 163 UnicodeTest *me=(UnicodeTest *)context; 164 uint32_t start, end; 165 int32_t i; 166 167 u_parseCodePointRange(fields[0][0], &start, &end, pErrorCode); 168 if(U_FAILURE(*pErrorCode)) { 169 me->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt or DerivedNormalizationProps.txt field 0 at %s\n", fields[0][0]); 170 return; 171 } 172 173 /* parse derived binary property name, ignore unknown names */ 174 i=getTokenIndex(derivedPropsNames, UPRV_LENGTHOF(derivedPropsNames), fields[1][0]); 175 if(i<0) { 176 UnicodeString propName(fields[1][0], (int32_t)(fields[1][1]-fields[1][0])); 177 propName.trim(); 178 if(me->unknownPropertyNames->find(propName)==NULL) { 179 UErrorCode errorCode=U_ZERO_ERROR; 180 me->unknownPropertyNames->puti(propName, 1, errorCode); 181 me->errln("UnicodeTest warning: unknown property name '%s' in DerivedCoreProperties.txt or DerivedNormalizationProps.txt\n", fields[1][0]); 182 } 183 return; 184 } 185 186 me->derivedProps[i].add(start, end); 187 } 188 189 void UnicodeTest::TestAdditionalProperties() { 190 #if !UCONFIG_NO_NORMALIZATION 191 // test DerivedCoreProperties.txt and DerivedNormalizationProps.txt 192 if(UPRV_LENGTHOF(derivedProps)<UPRV_LENGTHOF(derivedPropsNames)) { 193 errln("error: UnicodeTest::derivedProps[] too short, need at least %d UnicodeSets\n", 194 UPRV_LENGTHOF(derivedPropsNames)); 195 return; 196 } 197 if(UPRV_LENGTHOF(derivedPropsIndex)!=UPRV_LENGTHOF(derivedPropsNames)) { 198 errln("error in ucdtest.cpp: UPRV_LENGTHOF(derivedPropsIndex)!=UPRV_LENGTHOF(derivedPropsNames)\n"); 199 return; 200 } 201 202 char path[500]; 203 if(getUnidataPath(path) == NULL) { 204 errln("unable to find path to source/data/unidata/"); 205 return; 206 } 207 char *basename=strchr(path, 0); 208 strcpy(basename, "DerivedCoreProperties.txt"); 209 210 char *fields[2][2]; 211 UErrorCode errorCode=U_ZERO_ERROR; 212 u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode); 213 if(U_FAILURE(errorCode)) { 214 errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(errorCode)); 215 return; 216 } 217 218 strcpy(basename, "DerivedNormalizationProps.txt"); 219 u_parseDelimitedFile(path, ';', fields, 2, derivedPropsLineFn, this, &errorCode); 220 if(U_FAILURE(errorCode)) { 221 errln("error parsing DerivedNormalizationProps.txt: %s\n", u_errorName(errorCode)); 222 return; 223 } 224 225 // now we have all derived core properties in the UnicodeSets 226 // run them all through the API 227 int32_t rangeCount, range; 228 uint32_t i; 229 UChar32 start, end; 230 231 // test all TRUE properties 232 for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) { 233 rangeCount=derivedProps[i].getRangeCount(); 234 for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) { 235 start=derivedProps[i].getRangeStart(range); 236 end=derivedProps[i].getRangeEnd(range); 237 for(; start<=end; ++start) { 238 if(!u_hasBinaryProperty(start, derivedPropsIndex[i])) { 239 dataerrln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong", start, derivedPropsNames[i]); 240 if(++numErrors[i]>=MAX_ERRORS) { 241 dataerrln("Too many errors, moving to the next test"); 242 break; 243 } 244 } 245 } 246 } 247 } 248 249 // invert all properties 250 for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) { 251 derivedProps[i].complement(); 252 } 253 254 // test all FALSE properties 255 for(i=0; i<UPRV_LENGTHOF(derivedPropsNames); ++i) { 256 rangeCount=derivedProps[i].getRangeCount(); 257 for(range=0; range<rangeCount && numErrors[i]<MAX_ERRORS; ++range) { 258 start=derivedProps[i].getRangeStart(range); 259 end=derivedProps[i].getRangeEnd(range); 260 for(; start<=end; ++start) { 261 if(u_hasBinaryProperty(start, derivedPropsIndex[i])) { 262 errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start, derivedPropsNames[i]); 263 if(++numErrors[i]>=MAX_ERRORS) { 264 errln("Too many errors, moving to the next test"); 265 break; 266 } 267 } 268 } 269 } 270 } 271 #endif /* !UCONFIG_NO_NORMALIZATION */ 272 } 273 274 void UnicodeTest::TestBinaryValues() { 275 /* 276 * Unicode 5.1 explicitly defines binary property value aliases. 277 * Verify that they are all recognized. 278 */ 279 UErrorCode errorCode=U_ZERO_ERROR; 280 UnicodeSet alpha(UNICODE_STRING_SIMPLE("[:Alphabetic:]"), errorCode); 281 if(U_FAILURE(errorCode)) { 282 dataerrln("UnicodeSet([:Alphabetic:]) failed - %s", u_errorName(errorCode)); 283 return; 284 } 285 286 static const char *const falseValues[]={ "N", "No", "F", "False" }; 287 static const char *const trueValues[]={ "Y", "Yes", "T", "True" }; 288 int32_t i; 289 for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) { 290 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]"); 291 pattern.insert(pattern.length()-2, UnicodeString(falseValues[i], -1, US_INV)); 292 errorCode=U_ZERO_ERROR; 293 UnicodeSet set(pattern, errorCode); 294 if(U_FAILURE(errorCode)) { 295 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", falseValues[i], u_errorName(errorCode)); 296 continue; 297 } 298 set.complement(); 299 if(set!=alpha) { 300 errln("UnicodeSet([:Alphabetic=%s:]).complement()!=UnicodeSet([:Alphabetic:])\n", falseValues[i]); 301 } 302 } 303 for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) { 304 UnicodeString pattern=UNICODE_STRING_SIMPLE("[:Alphabetic=:]"); 305 pattern.insert(pattern.length()-2, UnicodeString(trueValues[i], -1, US_INV)); 306 errorCode=U_ZERO_ERROR; 307 UnicodeSet set(pattern, errorCode); 308 if(U_FAILURE(errorCode)) { 309 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", trueValues[i], u_errorName(errorCode)); 310 continue; 311 } 312 if(set!=alpha) { 313 errln("UnicodeSet([:Alphabetic=%s:])!=UnicodeSet([:Alphabetic:])\n", trueValues[i]); 314 } 315 } 316 } 317 318 void UnicodeTest::TestConsistency() { 319 #if !UCONFIG_NO_NORMALIZATION 320 /* 321 * Test for an example that getCanonStartSet() delivers 322 * all characters that compose from the input one, 323 * even in multiple steps. 324 * For example, the set for "I" (0049) should contain both 325 * I-diaeresis (00CF) and I-diaeresis-acute (1E2E). 326 * In general, the set for the middle such character should be a subset 327 * of the set for the first. 328 */ 329 IcuTestErrorCode errorCode(*this, "TestConsistency"); 330 const Normalizer2 *nfd=Normalizer2::getNFDInstance(errorCode); 331 const Normalizer2Impl *nfcImpl=Normalizer2Factory::getNFCImpl(errorCode); 332 if(!nfcImpl->ensureCanonIterData(errorCode) || errorCode.isFailure()) { 333 dataerrln("Normalizer2::getInstance(NFD) or Normalizer2Factory::getNFCImpl() failed - %s\n", 334 errorCode.errorName()); 335 errorCode.reset(); 336 return; 337 } 338 339 UnicodeSet set1, set2; 340 if (nfcImpl->getCanonStartSet(0x49, set1)) { 341 /* enumerate all characters that are plausible to be latin letters */ 342 for(UChar start=0xa0; start<0x2000; ++start) { 343 UnicodeString decomp=nfd->normalize(UnicodeString(start), errorCode); 344 if(decomp.length()>1 && decomp[0]==0x49) { 345 set2.add(start); 346 } 347 } 348 349 if (set1!=set2) { 350 errln("[canon start set of 0049] != [all c with canon decomp with 0049]"); 351 } 352 // This was available in cucdtst.c but the test had to move to intltest 353 // because the new internal normalization functions are in C++. 354 //compareUSets(set1, set2, 355 // "[canon start set of 0049]", "[all c with canon decomp with 0049]", 356 // TRUE); 357 } else { 358 errln("NFC.getCanonStartSet() returned FALSE"); 359 } 360 #endif 361 } 362 363 /** 364 * Test various implementations of Pattern_Syntax & Pattern_White_Space. 365 */ 366 void UnicodeTest::TestPatternProperties() { 367 IcuTestErrorCode errorCode(*this, "TestPatternProperties()"); 368 UnicodeSet syn_pp; 369 UnicodeSet syn_prop(UNICODE_STRING_SIMPLE("[:Pattern_Syntax:]"), errorCode); 370 UnicodeSet syn_list( 371 "[!-/\\:-@\\[-\\^`\\{-~" 372 "\\u00A1-\\u00A7\\u00A9\\u00AB\\u00AC\\u00AE\\u00B0\\u00B1\\u00B6\\u00BB\\u00BF\\u00D7\\u00F7" 373 "\\u2010-\\u2027\\u2030-\\u203E\\u2041-\\u2053\\u2055-\\u205E\\u2190-\\u245F\\u2500-\\u2775" 374 "\\u2794-\\u2BFF\\u2E00-\\u2E7F\\u3001-\\u3003\\u3008-\\u3020\\u3030\\uFD3E\\uFD3F\\uFE45\\uFE46]", errorCode); 375 UnicodeSet ws_pp; 376 UnicodeSet ws_prop(UNICODE_STRING_SIMPLE("[:Pattern_White_Space:]"), errorCode); 377 UnicodeSet ws_list(UNICODE_STRING_SIMPLE("[\\u0009-\\u000D\\ \\u0085\\u200E\\u200F\\u2028\\u2029]"), errorCode); 378 UnicodeSet syn_ws_pp; 379 UnicodeSet syn_ws_prop(syn_prop); 380 syn_ws_prop.addAll(ws_prop); 381 for(UChar32 c=0; c<=0xffff; ++c) { 382 if(PatternProps::isSyntax(c)) { 383 syn_pp.add(c); 384 } 385 if(PatternProps::isWhiteSpace(c)) { 386 ws_pp.add(c); 387 } 388 if(PatternProps::isSyntaxOrWhiteSpace(c)) { 389 syn_ws_pp.add(c); 390 } 391 } 392 compareUSets(syn_pp, syn_prop, 393 "PatternProps.isSyntax()", "[:Pattern_Syntax:]", TRUE); 394 compareUSets(syn_pp, syn_list, 395 "PatternProps.isSyntax()", "[Pattern_Syntax ranges]", TRUE); 396 compareUSets(ws_pp, ws_prop, 397 "PatternProps.isWhiteSpace()", "[:Pattern_White_Space:]", TRUE); 398 compareUSets(ws_pp, ws_list, 399 "PatternProps.isWhiteSpace()", "[Pattern_White_Space ranges]", TRUE); 400 compareUSets(syn_ws_pp, syn_ws_prop, 401 "PatternProps.isSyntaxOrWhiteSpace()", 402 "[[:Pattern_Syntax:][:Pattern_White_Space:]]", TRUE); 403 } 404 405 // So far only minimal port of Java & cucdtst.c compareUSets(). 406 UBool 407 UnicodeTest::compareUSets(const UnicodeSet &a, const UnicodeSet &b, 408 const char *a_name, const char *b_name, 409 UBool diffIsError) { 410 UBool same= a==b; 411 if(!same && diffIsError) { 412 errln("Sets are different: %s vs. %s\n", a_name, b_name); 413 } 414 return same; 415 } 416 417 namespace { 418 419 /** 420 * Maps a special script code to the most common script of its encoded characters. 421 */ 422 UScriptCode getCharScript(UScriptCode script) { 423 switch(script) { 424 case USCRIPT_HAN_WITH_BOPOMOFO: 425 case USCRIPT_SIMPLIFIED_HAN: 426 case USCRIPT_TRADITIONAL_HAN: 427 return USCRIPT_HAN; 428 case USCRIPT_JAPANESE: 429 return USCRIPT_HIRAGANA; 430 case USCRIPT_JAMO: 431 case USCRIPT_KOREAN: 432 return USCRIPT_HANGUL; 433 case USCRIPT_SYMBOLS_EMOJI: 434 return USCRIPT_SYMBOLS; 435 default: 436 return script; 437 } 438 } 439 440 } // namespace 441 442 void UnicodeTest::TestScriptMetadata() { 443 IcuTestErrorCode errorCode(*this, "TestScriptMetadata()"); 444 UnicodeSet rtl("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]", errorCode); 445 // So far, sample characters are uppercase. 446 // Georgian is special. 447 UnicodeSet cased("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]", errorCode); 448 for(int32_t sci = 0; sci < USCRIPT_CODE_LIMIT; ++sci) { 449 UScriptCode sc = (UScriptCode)sci; 450 // Run the test with -v to see which script has failures: 451 // .../intltest$ make && ./intltest utility/UnicodeTest/TestScriptMetadata -v | grep -C 6 FAIL 452 logln(uscript_getShortName(sc)); 453 UScriptUsage usage = uscript_getUsage(sc); 454 UnicodeString sample = uscript_getSampleUnicodeString(sc); 455 UnicodeSet scriptSet; 456 scriptSet.applyIntPropertyValue(UCHAR_SCRIPT, sc, errorCode); 457 if(usage == USCRIPT_USAGE_NOT_ENCODED) { 458 assertTrue("not encoded, no sample", sample.isEmpty()); 459 assertFalse("not encoded, not RTL", uscript_isRightToLeft(sc)); 460 assertFalse("not encoded, not LB letters", uscript_breaksBetweenLetters(sc)); 461 assertFalse("not encoded, not cased", uscript_isCased(sc)); 462 assertTrue("not encoded, no characters", scriptSet.isEmpty()); 463 } else { 464 assertFalse("encoded, has a sample character", sample.isEmpty()); 465 UChar32 firstChar = sample.char32At(0); 466 UScriptCode charScript = getCharScript(sc); 467 assertEquals("script(sample(script))", 468 (int32_t)charScript, (int32_t)uscript_getScript(firstChar, errorCode)); 469 assertEquals("RTL vs. set", (UBool)rtl.contains(firstChar), (UBool)uscript_isRightToLeft(sc)); 470 assertEquals("cased vs. set", (UBool)cased.contains(firstChar), (UBool)uscript_isCased(sc)); 471 assertEquals("encoded, has characters", (UBool)(sc == charScript), (UBool)(!scriptSet.isEmpty())); 472 if(uscript_isRightToLeft(sc)) { 473 rtl.removeAll(scriptSet); 474 } 475 if(uscript_isCased(sc)) { 476 cased.removeAll(scriptSet); 477 } 478 } 479 } 480 UnicodeString pattern; 481 assertEquals("no remaining RTL characters", 482 UnicodeString("[]"), rtl.toPattern(pattern)); 483 assertEquals("no remaining cased characters", 484 UnicodeString("[]"), cased.toPattern(pattern)); 485 486 assertTrue("Hani breaks between letters", uscript_breaksBetweenLetters(USCRIPT_HAN)); 487 assertTrue("Thai breaks between letters", uscript_breaksBetweenLetters(USCRIPT_THAI)); 488 assertFalse("Latn does not break between letters", uscript_breaksBetweenLetters(USCRIPT_LATIN)); 489 } 490 491 void UnicodeTest::TestBidiPairedBracketType() { 492 // BidiBrackets-6.3.0.txt says: 493 // 494 // The set of code points listed in this file was originally derived 495 // using the character properties General_Category (gc), Bidi_Class (bc), 496 // Bidi_Mirrored (Bidi_M), and Bidi_Mirroring_Glyph (bmg), as follows: 497 // two characters, A and B, form a pair if A has gc=Ps and B has gc=Pe, 498 // both have bc=ON and Bidi_M=Y, and bmg of A is B. Bidi_Paired_Bracket 499 // maps A to B and vice versa, and their Bidi_Paired_Bracket_Type 500 // property values are Open and Close, respectively. 501 IcuTestErrorCode errorCode(*this, "TestBidiPairedBracketType()"); 502 UnicodeSet bpt("[:^bpt=n:]", errorCode); 503 assertTrue("bpt!=None is not empty", !bpt.isEmpty()); 504 // The following should always be true. 505 UnicodeSet mirrored("[:Bidi_M:]", errorCode); 506 UnicodeSet other_neutral("[:bc=ON:]", errorCode); 507 assertTrue("bpt!=None is a subset of Bidi_M", mirrored.containsAll(bpt)); 508 assertTrue("bpt!=None is a subset of bc=ON", other_neutral.containsAll(bpt)); 509 // The following are true at least initially in Unicode 6.3. 510 UnicodeSet bpt_open("[:bpt=o:]", errorCode); 511 UnicodeSet bpt_close("[:bpt=c:]", errorCode); 512 UnicodeSet ps("[:Ps:]", errorCode); 513 UnicodeSet pe("[:Pe:]", errorCode); 514 assertTrue("bpt=Open is a subset of Ps", ps.containsAll(bpt_open)); 515 assertTrue("bpt=Close is a subset of Pe", pe.containsAll(bpt_close)); 516 } 517 518 void UnicodeTest::TestEmojiProperties() { 519 assertFalse("space is not Emoji", u_hasBinaryProperty(0x20, UCHAR_EMOJI)); 520 assertTrue("shooting star is Emoji", u_hasBinaryProperty(0x1F320, UCHAR_EMOJI)); 521 IcuTestErrorCode errorCode(*this, "TestEmojiProperties()"); 522 UnicodeSet emoji("[:Emoji:]", errorCode); 523 assertTrue("lots of Emoji", emoji.size() > 700); 524 525 assertTrue("shooting star is Emoji_Presentation", 526 u_hasBinaryProperty(0x1F320, UCHAR_EMOJI_PRESENTATION)); 527 assertTrue("Fitzpatrick 6 is Emoji_Modifier", 528 u_hasBinaryProperty(0x1F3FF, UCHAR_EMOJI_MODIFIER)); 529 assertTrue("happy person is Emoji_Modifier_Base", 530 u_hasBinaryProperty(0x1F64B, UCHAR_EMOJI_MODIFIER_BASE)); 531 assertTrue("asterisk is Emoji_Component", 532 u_hasBinaryProperty(0x2A, UCHAR_EMOJI_COMPONENT)); 533 } 534 535 void UnicodeTest::TestDefaultScriptExtensions() { 536 // Block 3000..303F CJK Symbols and Punctuation defaults to scx=Bopo Hang Hani Hira Kana Yiii 537 // but some of its characters revert to scx=<script> which is usually Common. 538 IcuTestErrorCode errorCode(*this, "TestDefaultScriptExtensions()"); 539 UScriptCode scx[20]; 540 scx[0] = USCRIPT_INVALID_CODE; 541 assertEquals("U+3000 num scx", 1, // IDEOGRAPHIC SPACE 542 uscript_getScriptExtensions(0x3000, scx, UPRV_LENGTHOF(scx), errorCode)); 543 assertEquals("U+3000 num scx[0]", USCRIPT_COMMON, scx[0]); 544 scx[0] = USCRIPT_INVALID_CODE; 545 assertEquals("U+3012 num scx", 1, // POSTAL MARK 546 uscript_getScriptExtensions(0x3012, scx, UPRV_LENGTHOF(scx), errorCode)); 547 assertEquals("U+3012 num scx[0]", USCRIPT_COMMON, scx[0]); 548 } 549