1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2002-2010, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: uprops.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2002feb24 14 * created by: Markus W. Scherer 15 * 16 * Implementations for mostly non-core Unicode character properties 17 * stored in uprops.icu. 18 * 19 * With the APIs implemented here, almost all properties files and 20 * their associated implementation files are used from this file, 21 * including those for normalization and case mappings. 22 */ 23 24 #include "unicode/utypes.h" 25 #include "unicode/uchar.h" 26 #include "unicode/unorm2.h" 27 #include "unicode/uscript.h" 28 #include "unicode/ustring.h" 29 #include "cstring.h" 30 #include "normalizer2impl.h" 31 #include "ucln_cmn.h" 32 #include "umutex.h" 33 #include "ubidi_props.h" 34 #include "uprops.h" 35 #include "ucase.h" 36 #include "ustr_imp.h" 37 38 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 39 40 U_NAMESPACE_USE 41 42 #define GET_BIDI_PROPS() ubidi_getSingleton() 43 44 /* general properties API functions ----------------------------------------- */ 45 46 struct BinaryProperty; 47 48 typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which); 49 50 struct BinaryProperty { 51 int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 52 uint32_t mask; 53 BinaryPropertyContains *contains; 54 }; 55 56 static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) { 57 /* systematic, directly stored properties */ 58 return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0; 59 } 60 61 static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { 62 return ucase_hasBinaryProperty(c, which); 63 } 64 65 static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 66 return ubidi_isBidiControl(GET_BIDI_PROPS(), c); 67 } 68 69 static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 70 return ubidi_isMirrored(GET_BIDI_PROPS(), c); 71 } 72 73 static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 74 return ubidi_isJoinControl(GET_BIDI_PROPS(), c); 75 } 76 77 #if UCONFIG_NO_NORMALIZATION 78 static UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UProperty) { 79 return FALSE; 80 } 81 #else 82 static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 83 // By definition, Full_Composition_Exclusion is the same as NFC_QC=No. 84 UErrorCode errorCode=U_ZERO_ERROR; 85 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 86 return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c)); 87 } 88 #endif 89 90 // UCHAR_NF*_INERT properties 91 #if UCONFIG_NO_NORMALIZATION 92 static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) { 93 return FALSE; 94 } 95 #else 96 static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { 97 UErrorCode errorCode=U_ZERO_ERROR; 98 const Normalizer2 *norm2=Normalizer2Factory::getInstance( 99 (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode); 100 return U_SUCCESS(errorCode) && norm2->isInert(c); 101 } 102 #endif 103 104 #if UCONFIG_NO_NORMALIZATION 105 static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) { 106 return FALSE; 107 } 108 #else 109 static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 110 UnicodeString nfd; 111 UErrorCode errorCode=U_ZERO_ERROR; 112 const Normalizer2 *nfcNorm2=Normalizer2Factory::getNFCInstance(errorCode); 113 if(U_FAILURE(errorCode)) { 114 return FALSE; 115 } 116 if(nfcNorm2->getDecomposition(c, nfd)) { 117 /* c has a decomposition */ 118 if(nfd.length()==1) { 119 c=nfd[0]; /* single BMP code point */ 120 } else if(nfd.length()<=U16_MAX_LENGTH && 121 nfd.length()==U16_LENGTH(c=nfd.char32At(0)) 122 ) { 123 /* single supplementary code point */ 124 } else { 125 c=U_SENTINEL; 126 } 127 } else if(c<0) { 128 return FALSE; /* protect against bad input */ 129 } 130 if(c>=0) { 131 /* single code point */ 132 const UCaseProps *csp=ucase_getSingleton(); 133 const UChar *resultString; 134 return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DEFAULT)>=0); 135 } else { 136 /* guess some large but stack-friendly capacity */ 137 UChar dest[2*UCASE_MAX_STRING_LENGTH]; 138 int32_t destLength; 139 destLength=u_strFoldCase(dest, LENGTHOF(dest), 140 nfd.getBuffer(), nfd.length(), 141 U_FOLD_CASE_DEFAULT, &errorCode); 142 return (UBool)(U_SUCCESS(errorCode) && 143 0!=u_strCompare(nfd.getBuffer(), nfd.length(), 144 dest, destLength, FALSE)); 145 } 146 } 147 #endif 148 149 #if UCONFIG_NO_NORMALIZATION 150 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProperty) { 151 return FALSE; 152 } 153 #else 154 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 155 UErrorCode errorCode=U_ZERO_ERROR; 156 const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode); 157 if(U_FAILURE(errorCode)) { 158 return FALSE; 159 } 160 UnicodeString src(c); 161 UnicodeString dest; 162 { 163 // The ReorderingBuffer must be in a block because its destructor 164 // needs to release dest's buffer before we look at its contents. 165 ReorderingBuffer buffer(*kcf, dest); 166 // Small destCapacity for NFKC_CF(c). 167 if(buffer.init(5, errorCode)) { 168 const UChar *srcArray=src.getBuffer(); 169 kcf->compose(srcArray, srcArray+src.length(), FALSE, 170 TRUE, buffer, errorCode); 171 } 172 } 173 return U_SUCCESS(errorCode) && dest!=src; 174 } 175 #endif 176 177 #if UCONFIG_NO_NORMALIZATION 178 static UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) { 179 return FALSE; 180 } 181 #else 182 static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 183 UErrorCode errorCode=U_ZERO_ERROR; 184 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 185 return 186 U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) && 187 impl->isCanonSegmentStarter(c); 188 } 189 #endif 190 191 static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 192 return u_isalnumPOSIX(c); 193 } 194 195 static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 196 return u_isblank(c); 197 } 198 199 static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 200 return u_isgraphPOSIX(c); 201 } 202 203 static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 204 return u_isprintPOSIX(c); 205 } 206 207 static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 208 return u_isxdigit(c); 209 } 210 211 static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={ 212 /* 213 * column and mask values for binary properties from u_getUnicodeProperties(). 214 * Must be in order of corresponding UProperty, 215 * and there must be exactly one entry per binary UProperty. 216 * 217 * Properties with mask==0 and contains==NULL are handled in code. 218 * For them, column is the UPropertySource value. 219 */ 220 { 1, U_MASK(UPROPS_ALPHABETIC), defaultContains }, 221 { 1, U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains }, 222 { UPROPS_SRC_BIDI, 0, isBidiControl }, 223 { UPROPS_SRC_BIDI, 0, isMirrored }, 224 { 1, U_MASK(UPROPS_DASH), defaultContains }, 225 { 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains }, 226 { 1, U_MASK(UPROPS_DEPRECATED), defaultContains }, 227 { 1, U_MASK(UPROPS_DIACRITIC), defaultContains }, 228 { 1, U_MASK(UPROPS_EXTENDER), defaultContains }, 229 { UPROPS_SRC_NFC, 0, hasFullCompositionExclusion }, 230 { 1, U_MASK(UPROPS_GRAPHEME_BASE), defaultContains }, 231 { 1, U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains }, 232 { 1, U_MASK(UPROPS_GRAPHEME_LINK), defaultContains }, 233 { 1, U_MASK(UPROPS_HEX_DIGIT), defaultContains }, 234 { 1, U_MASK(UPROPS_HYPHEN), defaultContains }, 235 { 1, U_MASK(UPROPS_ID_CONTINUE), defaultContains }, 236 { 1, U_MASK(UPROPS_ID_START), defaultContains }, 237 { 1, U_MASK(UPROPS_IDEOGRAPHIC), defaultContains }, 238 { 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains }, 239 { 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains }, 240 { UPROPS_SRC_BIDI, 0, isJoinControl }, 241 { 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains }, 242 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_LOWERCASE 243 { 1, U_MASK(UPROPS_MATH), defaultContains }, 244 { 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains }, 245 { 1, U_MASK(UPROPS_QUOTATION_MARK), defaultContains }, 246 { 1, U_MASK(UPROPS_RADICAL), defaultContains }, 247 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_SOFT_DOTTED 248 { 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains }, 249 { 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains }, 250 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_UPPERCASE 251 { 1, U_MASK(UPROPS_WHITE_SPACE), defaultContains }, 252 { 1, U_MASK(UPROPS_XID_CONTINUE), defaultContains }, 253 { 1, U_MASK(UPROPS_XID_START), defaultContains }, 254 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_SENSITIVE 255 { 1, U_MASK(UPROPS_S_TERM), defaultContains }, 256 { 1, U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains }, 257 { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFD_INERT 258 { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKD_INERT 259 { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFC_INERT 260 { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKC_INERT 261 { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter }, 262 { 1, U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains }, 263 { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains }, 264 { UPROPS_SRC_CHAR_AND_PROPSVEC, 0, isPOSIX_alnum }, 265 { UPROPS_SRC_CHAR, 0, isPOSIX_blank }, 266 { UPROPS_SRC_CHAR, 0, isPOSIX_graph }, 267 { UPROPS_SRC_CHAR, 0, isPOSIX_print }, 268 { UPROPS_SRC_CHAR, 0, isPOSIX_xdigit }, 269 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASED 270 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_IGNORABLE 271 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_LOWERCASED 272 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_UPPERCASED 273 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED 274 { UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded }, 275 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED 276 { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded } 277 }; 278 279 U_CAPI UBool U_EXPORT2 280 u_hasBinaryProperty(UChar32 c, UProperty which) { 281 /* c is range-checked in the functions that are called from here */ 282 if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) { 283 /* not a known binary property */ 284 return FALSE; 285 } else { 286 const BinaryProperty &prop=binProps[which]; 287 return prop.contains(prop, c, which); 288 } 289 } 290 291 #if !UCONFIG_NO_NORMALIZATION 292 293 U_CAPI uint8_t U_EXPORT2 294 u_getCombiningClass(UChar32 c) { 295 UErrorCode errorCode=U_ZERO_ERROR; 296 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 297 if(U_SUCCESS(errorCode)) { 298 return impl->getCC(impl->getNorm16(c)); 299 } else { 300 return 0; 301 } 302 } 303 304 static uint16_t 305 getFCD16(UChar32 c) { 306 UErrorCode errorCode=U_ZERO_ERROR; 307 const UTrie2 *trie=Normalizer2Factory::getFCDTrie(errorCode); 308 if(U_SUCCESS(errorCode)) { 309 return UTRIE2_GET16(trie, c); 310 } else { 311 return 0; 312 } 313 } 314 315 #endif 316 317 struct IntProperty; 318 319 typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which); 320 typedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which); 321 322 struct IntProperty { 323 int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 324 uint32_t mask; 325 int32_t shift; // =maxValue if getMaxValueFromShift() is used 326 IntPropertyGetValue *getValue; 327 IntPropertyGetMaxValue *getMaxValue; 328 }; 329 330 static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) { 331 /* systematic, directly stored properties */ 332 return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift; 333 } 334 335 static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) { 336 return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift; 337 } 338 339 static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) { 340 return prop.shift; 341 } 342 343 static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 344 return (int32_t)u_charDirection(c); 345 } 346 347 static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) { 348 return ubidi_getMaxValue(GET_BIDI_PROPS(), which); 349 } 350 351 #if UCONFIG_NO_NORMALIZATION 352 static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) { 353 return 0; 354 } 355 #else 356 static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 357 return u_getCombiningClass(c); 358 } 359 #endif 360 361 static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 362 return (int32_t)u_charType(c); 363 } 364 365 static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 366 return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); 367 } 368 369 static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 370 return ubidi_getJoiningType(GET_BIDI_PROPS(), c); 371 } 372 373 static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 374 int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getUnicodeProperties(c, -1)); 375 return UPROPS_NTV_GET_TYPE(ntv); 376 } 377 378 static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 379 UErrorCode errorCode=U_ZERO_ERROR; 380 return (int32_t)uscript_getScript(c, &errorCode); 381 } 382 383 /* 384 * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. 385 * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. 386 */ 387 static const UHangulSyllableType gcbToHst[]={ 388 U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */ 389 U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */ 390 U_HST_NOT_APPLICABLE, /* U_GCB_CR */ 391 U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */ 392 U_HST_LEADING_JAMO, /* U_GCB_L */ 393 U_HST_NOT_APPLICABLE, /* U_GCB_LF */ 394 U_HST_LV_SYLLABLE, /* U_GCB_LV */ 395 U_HST_LVT_SYLLABLE, /* U_GCB_LVT */ 396 U_HST_TRAILING_JAMO, /* U_GCB_T */ 397 U_HST_VOWEL_JAMO /* U_GCB_V */ 398 /* 399 * Omit GCB values beyond what we need for hst. 400 * The code below checks for the array length. 401 */ 402 }; 403 404 static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 405 /* see comments on gcbToHst[] above */ 406 int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; 407 if(gcb<LENGTHOF(gcbToHst)) { 408 return gcbToHst[gcb]; 409 } else { 410 return U_HST_NOT_APPLICABLE; 411 } 412 } 413 414 #if UCONFIG_NO_NORMALIZATION 415 static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) { 416 return 0; 417 } 418 #else 419 static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) { 420 return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD)); 421 } 422 #endif 423 424 #if UCONFIG_NO_NORMALIZATION 425 static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) { 426 return 0; 427 } 428 #else 429 static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 430 return getFCD16(c)>>8; 431 } 432 #endif 433 434 #if UCONFIG_NO_NORMALIZATION 435 static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) { 436 return 0; 437 } 438 #else 439 static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 440 return getFCD16(c)&0xff; 441 } 442 #endif 443 444 static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={ 445 /* 446 * column, mask and shift values for int-value properties from u_getUnicodeProperties(). 447 * Must be in order of corresponding UProperty, 448 * and there must be exactly one entry per int UProperty. 449 * 450 * Properties with mask==0 and getValue==NULL are handled in code. 451 * For them, column is the UPropertySource value. 452 */ 453 { UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue }, 454 { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue }, 455 { UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift }, 456 { 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue }, 457 { 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue }, 458 { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift }, 459 { UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue }, 460 { UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue }, 461 { 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue }, 462 { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift }, 463 { 0, UPROPS_SCRIPT_MASK, 0, getScript, defaultGetMaxValue }, 464 { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift }, 465 // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" 466 { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, 467 // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" 468 { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, 469 // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE 470 { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, 471 // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE 472 { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, 473 { UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift }, 474 { UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift }, 475 { 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue }, 476 { 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue }, 477 { 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue } 478 }; 479 480 U_CAPI int32_t U_EXPORT2 481 u_getIntPropertyValue(UChar32 c, UProperty which) { 482 if(which<UCHAR_INT_START) { 483 if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { 484 const BinaryProperty &prop=binProps[which]; 485 return prop.contains(prop, c, which); 486 } 487 } else if(which<UCHAR_INT_LIMIT) { 488 const IntProperty &prop=intProps[which-UCHAR_INT_START]; 489 return prop.getValue(prop, c, which); 490 } else if(which==UCHAR_GENERAL_CATEGORY_MASK) { 491 return U_MASK(u_charType(c)); 492 } 493 return 0; // undefined 494 } 495 496 U_CAPI int32_t U_EXPORT2 497 u_getIntPropertyMinValue(UProperty /*which*/) { 498 return 0; /* all binary/enum/int properties have a minimum value of 0 */ 499 } 500 501 U_CAPI int32_t U_EXPORT2 502 u_getIntPropertyMaxValue(UProperty which) { 503 if(which<UCHAR_INT_START) { 504 if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { 505 return 1; // maximum TRUE for all binary properties 506 } 507 } else if(which<UCHAR_INT_LIMIT) { 508 const IntProperty &prop=intProps[which-UCHAR_INT_START]; 509 return prop.getMaxValue(prop, which); 510 } 511 return -1; // undefined 512 } 513 514 U_CFUNC UPropertySource U_EXPORT2 515 uprops_getSource(UProperty which) { 516 if(which<UCHAR_BINARY_START) { 517 return UPROPS_SRC_NONE; /* undefined */ 518 } else if(which<UCHAR_BINARY_LIMIT) { 519 const BinaryProperty &prop=binProps[which]; 520 if(prop.mask!=0) { 521 return UPROPS_SRC_PROPSVEC; 522 } else { 523 return (UPropertySource)prop.column; 524 } 525 } else if(which<UCHAR_INT_START) { 526 return UPROPS_SRC_NONE; /* undefined */ 527 } else if(which<UCHAR_INT_LIMIT) { 528 const IntProperty &prop=intProps[which-UCHAR_INT_START]; 529 if(prop.mask!=0) { 530 return UPROPS_SRC_PROPSVEC; 531 } else { 532 return (UPropertySource)prop.column; 533 } 534 } else if(which<UCHAR_STRING_START) { 535 switch(which) { 536 case UCHAR_GENERAL_CATEGORY_MASK: 537 case UCHAR_NUMERIC_VALUE: 538 return UPROPS_SRC_CHAR; 539 540 default: 541 return UPROPS_SRC_NONE; 542 } 543 } else if(which<UCHAR_STRING_LIMIT) { 544 switch(which) { 545 case UCHAR_AGE: 546 return UPROPS_SRC_PROPSVEC; 547 548 case UCHAR_BIDI_MIRRORING_GLYPH: 549 return UPROPS_SRC_BIDI; 550 551 case UCHAR_CASE_FOLDING: 552 case UCHAR_LOWERCASE_MAPPING: 553 case UCHAR_SIMPLE_CASE_FOLDING: 554 case UCHAR_SIMPLE_LOWERCASE_MAPPING: 555 case UCHAR_SIMPLE_TITLECASE_MAPPING: 556 case UCHAR_SIMPLE_UPPERCASE_MAPPING: 557 case UCHAR_TITLECASE_MAPPING: 558 case UCHAR_UPPERCASE_MAPPING: 559 return UPROPS_SRC_CASE; 560 561 case UCHAR_ISO_COMMENT: 562 case UCHAR_NAME: 563 case UCHAR_UNICODE_1_NAME: 564 return UPROPS_SRC_NAMES; 565 566 default: 567 return UPROPS_SRC_NONE; 568 } 569 } else { 570 switch(which) { 571 case UCHAR_SCRIPT_EXTENSIONS: 572 return UPROPS_SRC_PROPSVEC; 573 default: 574 return UPROPS_SRC_NONE; /* undefined */ 575 } 576 } 577 } 578 579 #if !UCONFIG_NO_NORMALIZATION 580 581 U_CAPI int32_t U_EXPORT2 582 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) { 583 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 584 return 0; 585 } 586 if(destCapacity<0 || (dest==NULL && destCapacity>0)) { 587 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 588 return 0; 589 } 590 // Compute the FC_NFKC_Closure on the fly: 591 // We have the API for complete coverage of Unicode properties, although 592 // this value by itself is not useful via API. 593 // (What could be useful is a custom normalization table that combines 594 // case folding and NFKC.) 595 // For the derivation, see Unicode's DerivedNormalizationProps.txt. 596 const Normalizer2 *nfkc=Normalizer2Factory::getNFKCInstance(*pErrorCode); 597 const UCaseProps *csp=ucase_getSingleton(); 598 if(U_FAILURE(*pErrorCode)) { 599 return 0; 600 } 601 // first: b = NFKC(Fold(a)) 602 UnicodeString folded1String; 603 const UChar *folded1; 604 int32_t folded1Length=ucase_toFullFolding(csp, c, &folded1, U_FOLD_CASE_DEFAULT); 605 if(folded1Length<0) { 606 const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); 607 if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) { 608 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC 609 } 610 folded1String.setTo(c); 611 } else { 612 if(folded1Length>UCASE_MAX_STRING_LENGTH) { 613 folded1String.setTo(folded1Length); 614 } else { 615 folded1String.setTo(FALSE, folded1, folded1Length); 616 } 617 } 618 UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode); 619 // second: c = NFKC(Fold(b)) 620 UnicodeString folded2String(kc1); 621 UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode); 622 // if (c != b) add the mapping from a to c 623 if(U_FAILURE(*pErrorCode) || kc1==kc2) { 624 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); 625 } else { 626 return kc2.extract(dest, destCapacity, *pErrorCode); 627 } 628 } 629 630 #endif 631