1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2002-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: uprops.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2002feb24 16 * created by: Markus W. Scherer 17 * 18 * Implementations for mostly non-core Unicode character properties 19 * stored in uprops.icu. 20 * 21 * With the APIs implemented here, almost all properties files and 22 * their associated implementation files are used from this file, 23 * including those for normalization and case mappings. 24 */ 25 26 #include "unicode/utypes.h" 27 #include "unicode/uchar.h" 28 #include "unicode/unorm2.h" 29 #include "unicode/uscript.h" 30 #include "unicode/ustring.h" 31 #include "cstring.h" 32 #include "normalizer2impl.h" 33 #include "umutex.h" 34 #include "ubidi_props.h" 35 #include "uprops.h" 36 #include "ucase.h" 37 #include "ustr_imp.h" 38 39 U_NAMESPACE_USE 40 41 #define GET_BIDI_PROPS() ubidi_getSingleton() 42 43 /* general properties API functions ----------------------------------------- */ 44 45 struct BinaryProperty; 46 47 typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which); 48 49 struct BinaryProperty { 50 int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 51 uint32_t mask; 52 BinaryPropertyContains *contains; 53 }; 54 55 static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) { 56 /* systematic, directly stored properties */ 57 return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0; 58 } 59 60 static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { 61 return ucase_hasBinaryProperty(c, which); 62 } 63 64 static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 65 return ubidi_isBidiControl(GET_BIDI_PROPS(), c); 66 } 67 68 static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 69 return ubidi_isMirrored(GET_BIDI_PROPS(), c); 70 } 71 72 static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 73 return ubidi_isJoinControl(GET_BIDI_PROPS(), c); 74 } 75 76 #if UCONFIG_NO_NORMALIZATION 77 static UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UProperty) { 78 return FALSE; 79 } 80 #else 81 static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 82 // By definition, Full_Composition_Exclusion is the same as NFC_QC=No. 83 UErrorCode errorCode=U_ZERO_ERROR; 84 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 85 return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c)); 86 } 87 #endif 88 89 // UCHAR_NF*_INERT properties 90 #if UCONFIG_NO_NORMALIZATION 91 static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) { 92 return FALSE; 93 } 94 #else 95 static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { 96 UErrorCode errorCode=U_ZERO_ERROR; 97 const Normalizer2 *norm2=Normalizer2Factory::getInstance( 98 (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode); 99 return U_SUCCESS(errorCode) && norm2->isInert(c); 100 } 101 #endif 102 103 #if UCONFIG_NO_NORMALIZATION 104 static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) { 105 return FALSE; 106 } 107 #else 108 static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 109 UnicodeString nfd; 110 UErrorCode errorCode=U_ZERO_ERROR; 111 const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode); 112 if(U_FAILURE(errorCode)) { 113 return FALSE; 114 } 115 if(nfcNorm2->getDecomposition(c, nfd)) { 116 /* c has a decomposition */ 117 if(nfd.length()==1) { 118 c=nfd[0]; /* single BMP code point */ 119 } else if(nfd.length()<=U16_MAX_LENGTH && 120 nfd.length()==U16_LENGTH(c=nfd.char32At(0)) 121 ) { 122 /* single supplementary code point */ 123 } else { 124 c=U_SENTINEL; 125 } 126 } else if(c<0) { 127 return FALSE; /* protect against bad input */ 128 } 129 if(c>=0) { 130 /* single code point */ 131 const UChar *resultString; 132 return (UBool)(ucase_toFullFolding(c, &resultString, U_FOLD_CASE_DEFAULT)>=0); 133 } else { 134 /* guess some large but stack-friendly capacity */ 135 UChar dest[2*UCASE_MAX_STRING_LENGTH]; 136 int32_t destLength; 137 destLength=u_strFoldCase(dest, UPRV_LENGTHOF(dest), 138 nfd.getBuffer(), nfd.length(), 139 U_FOLD_CASE_DEFAULT, &errorCode); 140 return (UBool)(U_SUCCESS(errorCode) && 141 0!=u_strCompare(nfd.getBuffer(), nfd.length(), 142 dest, destLength, FALSE)); 143 } 144 } 145 #endif 146 147 #if UCONFIG_NO_NORMALIZATION 148 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProperty) { 149 return FALSE; 150 } 151 #else 152 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 153 UErrorCode errorCode=U_ZERO_ERROR; 154 const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode); 155 if(U_FAILURE(errorCode)) { 156 return FALSE; 157 } 158 UnicodeString src(c); 159 UnicodeString dest; 160 { 161 // The ReorderingBuffer must be in a block because its destructor 162 // needs to release dest's buffer before we look at its contents. 163 ReorderingBuffer buffer(*kcf, dest); 164 // Small destCapacity for NFKC_CF(c). 165 if(buffer.init(5, errorCode)) { 166 const UChar *srcArray=src.getBuffer(); 167 kcf->compose(srcArray, srcArray+src.length(), FALSE, 168 TRUE, buffer, errorCode); 169 } 170 } 171 return U_SUCCESS(errorCode) && dest!=src; 172 } 173 #endif 174 175 #if UCONFIG_NO_NORMALIZATION 176 static UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) { 177 return FALSE; 178 } 179 #else 180 static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 181 UErrorCode errorCode=U_ZERO_ERROR; 182 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 183 return 184 U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) && 185 impl->isCanonSegmentStarter(c); 186 } 187 #endif 188 189 static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 190 return u_isalnumPOSIX(c); 191 } 192 193 static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 194 return u_isblank(c); 195 } 196 197 static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 198 return u_isgraphPOSIX(c); 199 } 200 201 static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 202 return u_isprintPOSIX(c); 203 } 204 205 static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 206 return u_isxdigit(c); 207 } 208 209 static UBool isRegionalIndicator(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 210 // Property starts are a subset of lb=RI etc. 211 return 0x1F1E6<=c && c<=0x1F1FF; 212 } 213 214 static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={ 215 /* 216 * column and mask values for binary properties from u_getUnicodeProperties(). 217 * Must be in order of corresponding UProperty, 218 * and there must be exactly one entry per binary UProperty. 219 * 220 * Properties with mask==0 are handled in code. 221 * For them, column is the UPropertySource value. 222 */ 223 { 1, U_MASK(UPROPS_ALPHABETIC), defaultContains }, 224 { 1, U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains }, 225 { UPROPS_SRC_BIDI, 0, isBidiControl }, 226 { UPROPS_SRC_BIDI, 0, isMirrored }, 227 { 1, U_MASK(UPROPS_DASH), defaultContains }, 228 { 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains }, 229 { 1, U_MASK(UPROPS_DEPRECATED), defaultContains }, 230 { 1, U_MASK(UPROPS_DIACRITIC), defaultContains }, 231 { 1, U_MASK(UPROPS_EXTENDER), defaultContains }, 232 { UPROPS_SRC_NFC, 0, hasFullCompositionExclusion }, 233 { 1, U_MASK(UPROPS_GRAPHEME_BASE), defaultContains }, 234 { 1, U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains }, 235 { 1, U_MASK(UPROPS_GRAPHEME_LINK), defaultContains }, 236 { 1, U_MASK(UPROPS_HEX_DIGIT), defaultContains }, 237 { 1, U_MASK(UPROPS_HYPHEN), defaultContains }, 238 { 1, U_MASK(UPROPS_ID_CONTINUE), defaultContains }, 239 { 1, U_MASK(UPROPS_ID_START), defaultContains }, 240 { 1, U_MASK(UPROPS_IDEOGRAPHIC), defaultContains }, 241 { 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains }, 242 { 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains }, 243 { UPROPS_SRC_BIDI, 0, isJoinControl }, 244 { 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains }, 245 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_LOWERCASE 246 { 1, U_MASK(UPROPS_MATH), defaultContains }, 247 { 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains }, 248 { 1, U_MASK(UPROPS_QUOTATION_MARK), defaultContains }, 249 { 1, U_MASK(UPROPS_RADICAL), defaultContains }, 250 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_SOFT_DOTTED 251 { 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains }, 252 { 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains }, 253 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_UPPERCASE 254 { 1, U_MASK(UPROPS_WHITE_SPACE), defaultContains }, 255 { 1, U_MASK(UPROPS_XID_CONTINUE), defaultContains }, 256 { 1, U_MASK(UPROPS_XID_START), defaultContains }, 257 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_SENSITIVE 258 { 1, U_MASK(UPROPS_S_TERM), defaultContains }, 259 { 1, U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains }, 260 { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFD_INERT 261 { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKD_INERT 262 { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFC_INERT 263 { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKC_INERT 264 { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter }, 265 { 1, U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains }, 266 { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains }, 267 { UPROPS_SRC_CHAR_AND_PROPSVEC, 0, isPOSIX_alnum }, 268 { UPROPS_SRC_CHAR, 0, isPOSIX_blank }, 269 { UPROPS_SRC_CHAR, 0, isPOSIX_graph }, 270 { UPROPS_SRC_CHAR, 0, isPOSIX_print }, 271 { UPROPS_SRC_CHAR, 0, isPOSIX_xdigit }, 272 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASED 273 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_IGNORABLE 274 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_LOWERCASED 275 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_UPPERCASED 276 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED 277 { UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded }, 278 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED 279 { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded }, 280 { 2, U_MASK(UPROPS_2_EMOJI), defaultContains }, 281 { 2, U_MASK(UPROPS_2_EMOJI_PRESENTATION), defaultContains }, 282 { 2, U_MASK(UPROPS_2_EMOJI_MODIFIER), defaultContains }, 283 { 2, U_MASK(UPROPS_2_EMOJI_MODIFIER_BASE), defaultContains }, 284 { 2, U_MASK(UPROPS_2_EMOJI_COMPONENT), defaultContains }, 285 { 2, 0, isRegionalIndicator }, 286 { 1, U_MASK(UPROPS_PREPENDED_CONCATENATION_MARK), defaultContains }, 287 }; 288 289 U_CAPI UBool U_EXPORT2 290 u_hasBinaryProperty(UChar32 c, UProperty which) { 291 /* c is range-checked in the functions that are called from here */ 292 if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) { 293 /* not a known binary property */ 294 return FALSE; 295 } else { 296 const BinaryProperty &prop=binProps[which]; 297 return prop.contains(prop, c, which); 298 } 299 } 300 301 struct IntProperty; 302 303 typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which); 304 typedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which); 305 306 struct IntProperty { 307 int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 308 uint32_t mask; 309 int32_t shift; // =maxValue if getMaxValueFromShift() is used 310 IntPropertyGetValue *getValue; 311 IntPropertyGetMaxValue *getMaxValue; 312 }; 313 314 static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) { 315 /* systematic, directly stored properties */ 316 return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift; 317 } 318 319 static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) { 320 return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift; 321 } 322 323 static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) { 324 return prop.shift; 325 } 326 327 static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 328 return (int32_t)u_charDirection(c); 329 } 330 331 static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 332 return (int32_t)ubidi_getPairedBracketType(GET_BIDI_PROPS(), c); 333 } 334 335 static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) { 336 return ubidi_getMaxValue(GET_BIDI_PROPS(), which); 337 } 338 339 #if UCONFIG_NO_NORMALIZATION 340 static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) { 341 return 0; 342 } 343 #else 344 static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 345 return u_getCombiningClass(c); 346 } 347 #endif 348 349 static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 350 return (int32_t)u_charType(c); 351 } 352 353 static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 354 return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); 355 } 356 357 static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 358 return ubidi_getJoiningType(GET_BIDI_PROPS(), c); 359 } 360 361 static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 362 int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c)); 363 return UPROPS_NTV_GET_TYPE(ntv); 364 } 365 366 static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 367 UErrorCode errorCode=U_ZERO_ERROR; 368 return (int32_t)uscript_getScript(c, &errorCode); 369 } 370 371 /* 372 * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. 373 * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. 374 */ 375 static const UHangulSyllableType gcbToHst[]={ 376 U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */ 377 U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */ 378 U_HST_NOT_APPLICABLE, /* U_GCB_CR */ 379 U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */ 380 U_HST_LEADING_JAMO, /* U_GCB_L */ 381 U_HST_NOT_APPLICABLE, /* U_GCB_LF */ 382 U_HST_LV_SYLLABLE, /* U_GCB_LV */ 383 U_HST_LVT_SYLLABLE, /* U_GCB_LVT */ 384 U_HST_TRAILING_JAMO, /* U_GCB_T */ 385 U_HST_VOWEL_JAMO /* U_GCB_V */ 386 /* 387 * Omit GCB values beyond what we need for hst. 388 * The code below checks for the array length. 389 */ 390 }; 391 392 static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 393 /* see comments on gcbToHst[] above */ 394 int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; 395 if(gcb<UPRV_LENGTHOF(gcbToHst)) { 396 return gcbToHst[gcb]; 397 } else { 398 return U_HST_NOT_APPLICABLE; 399 } 400 } 401 402 #if UCONFIG_NO_NORMALIZATION 403 static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) { 404 return 0; 405 } 406 #else 407 static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) { 408 return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD)); 409 } 410 #endif 411 412 #if UCONFIG_NO_NORMALIZATION 413 static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) { 414 return 0; 415 } 416 #else 417 static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 418 return unorm_getFCD16(c)>>8; 419 } 420 #endif 421 422 #if UCONFIG_NO_NORMALIZATION 423 static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) { 424 return 0; 425 } 426 #else 427 static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 428 return unorm_getFCD16(c)&0xff; 429 } 430 #endif 431 432 static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={ 433 /* 434 * column, mask and shift values for int-value properties from u_getUnicodeProperties(). 435 * Must be in order of corresponding UProperty, 436 * and there must be exactly one entry per int UProperty. 437 * 438 * Properties with mask==0 are handled in code. 439 * For them, column is the UPropertySource value. 440 */ 441 { UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue }, 442 { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue }, 443 { UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift }, 444 { 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue }, 445 { 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue }, 446 { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift }, 447 { UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue }, 448 { UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue }, 449 { 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue }, 450 { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift }, 451 { 0, UPROPS_SCRIPT_MASK, 0, getScript, defaultGetMaxValue }, 452 { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift }, 453 // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" 454 { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, 455 // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" 456 { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, 457 // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE 458 { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, 459 // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE 460 { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, 461 { UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift }, 462 { UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift }, 463 { 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue }, 464 { 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue }, 465 { 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue }, 466 { UPROPS_SRC_BIDI, 0, 0, getBiDiPairedBracketType, biDiGetMaxValue }, 467 }; 468 469 U_CAPI int32_t U_EXPORT2 470 u_getIntPropertyValue(UChar32 c, UProperty which) { 471 if(which<UCHAR_INT_START) { 472 if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { 473 const BinaryProperty &prop=binProps[which]; 474 return prop.contains(prop, c, which); 475 } 476 } else if(which<UCHAR_INT_LIMIT) { 477 const IntProperty &prop=intProps[which-UCHAR_INT_START]; 478 return prop.getValue(prop, c, which); 479 } else if(which==UCHAR_GENERAL_CATEGORY_MASK) { 480 return U_MASK(u_charType(c)); 481 } 482 return 0; // undefined 483 } 484 485 U_CAPI int32_t U_EXPORT2 486 u_getIntPropertyMinValue(UProperty /*which*/) { 487 return 0; /* all binary/enum/int properties have a minimum value of 0 */ 488 } 489 490 U_CAPI int32_t U_EXPORT2 491 u_getIntPropertyMaxValue(UProperty which) { 492 if(which<UCHAR_INT_START) { 493 if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { 494 return 1; // maximum TRUE for all binary properties 495 } 496 } else if(which<UCHAR_INT_LIMIT) { 497 const IntProperty &prop=intProps[which-UCHAR_INT_START]; 498 return prop.getMaxValue(prop, which); 499 } 500 return -1; // undefined 501 } 502 503 U_CFUNC UPropertySource U_EXPORT2 504 uprops_getSource(UProperty which) { 505 if(which<UCHAR_BINARY_START) { 506 return UPROPS_SRC_NONE; /* undefined */ 507 } else if(which<UCHAR_BINARY_LIMIT) { 508 const BinaryProperty &prop=binProps[which]; 509 if(prop.mask!=0) { 510 return UPROPS_SRC_PROPSVEC; 511 } else { 512 return (UPropertySource)prop.column; 513 } 514 } else if(which<UCHAR_INT_START) { 515 return UPROPS_SRC_NONE; /* undefined */ 516 } else if(which<UCHAR_INT_LIMIT) { 517 const IntProperty &prop=intProps[which-UCHAR_INT_START]; 518 if(prop.mask!=0) { 519 return UPROPS_SRC_PROPSVEC; 520 } else { 521 return (UPropertySource)prop.column; 522 } 523 } else if(which<UCHAR_STRING_START) { 524 switch(which) { 525 case UCHAR_GENERAL_CATEGORY_MASK: 526 case UCHAR_NUMERIC_VALUE: 527 return UPROPS_SRC_CHAR; 528 529 default: 530 return UPROPS_SRC_NONE; 531 } 532 } else if(which<UCHAR_STRING_LIMIT) { 533 switch(which) { 534 case UCHAR_AGE: 535 return UPROPS_SRC_PROPSVEC; 536 537 case UCHAR_BIDI_MIRRORING_GLYPH: 538 return UPROPS_SRC_BIDI; 539 540 case UCHAR_CASE_FOLDING: 541 case UCHAR_LOWERCASE_MAPPING: 542 case UCHAR_SIMPLE_CASE_FOLDING: 543 case UCHAR_SIMPLE_LOWERCASE_MAPPING: 544 case UCHAR_SIMPLE_TITLECASE_MAPPING: 545 case UCHAR_SIMPLE_UPPERCASE_MAPPING: 546 case UCHAR_TITLECASE_MAPPING: 547 case UCHAR_UPPERCASE_MAPPING: 548 return UPROPS_SRC_CASE; 549 550 case UCHAR_ISO_COMMENT: 551 case UCHAR_NAME: 552 case UCHAR_UNICODE_1_NAME: 553 return UPROPS_SRC_NAMES; 554 555 default: 556 return UPROPS_SRC_NONE; 557 } 558 } else { 559 switch(which) { 560 case UCHAR_SCRIPT_EXTENSIONS: 561 return UPROPS_SRC_PROPSVEC; 562 default: 563 return UPROPS_SRC_NONE; /* undefined */ 564 } 565 } 566 } 567 568 #if !UCONFIG_NO_NORMALIZATION 569 570 U_CAPI int32_t U_EXPORT2 571 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) { 572 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 573 return 0; 574 } 575 if(destCapacity<0 || (dest==NULL && destCapacity>0)) { 576 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 577 return 0; 578 } 579 // Compute the FC_NFKC_Closure on the fly: 580 // We have the API for complete coverage of Unicode properties, although 581 // this value by itself is not useful via API. 582 // (What could be useful is a custom normalization table that combines 583 // case folding and NFKC.) 584 // For the derivation, see Unicode's DerivedNormalizationProps.txt. 585 const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode); 586 if(U_FAILURE(*pErrorCode)) { 587 return 0; 588 } 589 // first: b = NFKC(Fold(a)) 590 UnicodeString folded1String; 591 const UChar *folded1; 592 int32_t folded1Length=ucase_toFullFolding(c, &folded1, U_FOLD_CASE_DEFAULT); 593 if(folded1Length<0) { 594 const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); 595 if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) { 596 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC 597 } 598 folded1String.setTo(c); 599 } else { 600 if(folded1Length>UCASE_MAX_STRING_LENGTH) { 601 folded1String.setTo(folded1Length); 602 } else { 603 folded1String.setTo(FALSE, folded1, folded1Length); 604 } 605 } 606 UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode); 607 // second: c = NFKC(Fold(b)) 608 UnicodeString folded2String(kc1); 609 UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode); 610 // if (c != b) add the mapping from a to c 611 if(U_FAILURE(*pErrorCode) || kc1==kc2) { 612 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); 613 } else { 614 return kc2.extract(dest, destCapacity, *pErrorCode); 615 } 616 } 617 618 #endif 619