1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2002-2014, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: uprops.cpp 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2002feb24 14 * created by: Markus W. Scherer 15 * 16 * Implementations for mostly non-core Unicode character properties 17 * stored in uprops.icu. 18 * 19 * With the APIs implemented here, almost all properties files and 20 * their associated implementation files are used from this file, 21 * including those for normalization and case mappings. 22 */ 23 24 #include "unicode/utypes.h" 25 #include "unicode/uchar.h" 26 #include "unicode/unorm2.h" 27 #include "unicode/uscript.h" 28 #include "unicode/ustring.h" 29 #include "cstring.h" 30 #include "normalizer2impl.h" 31 #include "umutex.h" 32 #include "ubidi_props.h" 33 #include "uprops.h" 34 #include "ucase.h" 35 #include "ustr_imp.h" 36 37 U_NAMESPACE_USE 38 39 #define GET_BIDI_PROPS() ubidi_getSingleton() 40 41 /* general properties API functions ----------------------------------------- */ 42 43 struct BinaryProperty; 44 45 typedef UBool BinaryPropertyContains(const BinaryProperty &prop, UChar32 c, UProperty which); 46 47 struct BinaryProperty { 48 int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 49 uint32_t mask; 50 BinaryPropertyContains *contains; 51 }; 52 53 static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /*which*/) { 54 /* systematic, directly stored properties */ 55 return (u_getUnicodeProperties(c, prop.column)&prop.mask)!=0; 56 } 57 58 static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { 59 return ucase_hasBinaryProperty(c, which); 60 } 61 62 static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 63 return ubidi_isBidiControl(GET_BIDI_PROPS(), c); 64 } 65 66 static UBool isMirrored(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 67 return ubidi_isMirrored(GET_BIDI_PROPS(), c); 68 } 69 70 static UBool isJoinControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 71 return ubidi_isJoinControl(GET_BIDI_PROPS(), c); 72 } 73 74 #if UCONFIG_NO_NORMALIZATION 75 static UBool hasFullCompositionExclusion(const BinaryProperty &, UChar32, UProperty) { 76 return FALSE; 77 } 78 #else 79 static UBool hasFullCompositionExclusion(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 80 // By definition, Full_Composition_Exclusion is the same as NFC_QC=No. 81 UErrorCode errorCode=U_ZERO_ERROR; 82 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 83 return U_SUCCESS(errorCode) && impl->isCompNo(impl->getNorm16(c)); 84 } 85 #endif 86 87 // UCHAR_NF*_INERT properties 88 #if UCONFIG_NO_NORMALIZATION 89 static UBool isNormInert(const BinaryProperty &, UChar32, UProperty) { 90 return FALSE; 91 } 92 #else 93 static UBool isNormInert(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { 94 UErrorCode errorCode=U_ZERO_ERROR; 95 const Normalizer2 *norm2=Normalizer2Factory::getInstance( 96 (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD), errorCode); 97 return U_SUCCESS(errorCode) && norm2->isInert(c); 98 } 99 #endif 100 101 #if UCONFIG_NO_NORMALIZATION 102 static UBool changesWhenCasefolded(const BinaryProperty &, UChar32, UProperty) { 103 return FALSE; 104 } 105 #else 106 static UBool changesWhenCasefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 107 UnicodeString nfd; 108 UErrorCode errorCode=U_ZERO_ERROR; 109 const Normalizer2 *nfcNorm2=Normalizer2::getNFCInstance(errorCode); 110 if(U_FAILURE(errorCode)) { 111 return FALSE; 112 } 113 if(nfcNorm2->getDecomposition(c, nfd)) { 114 /* c has a decomposition */ 115 if(nfd.length()==1) { 116 c=nfd[0]; /* single BMP code point */ 117 } else if(nfd.length()<=U16_MAX_LENGTH && 118 nfd.length()==U16_LENGTH(c=nfd.char32At(0)) 119 ) { 120 /* single supplementary code point */ 121 } else { 122 c=U_SENTINEL; 123 } 124 } else if(c<0) { 125 return FALSE; /* protect against bad input */ 126 } 127 if(c>=0) { 128 /* single code point */ 129 const UCaseProps *csp=ucase_getSingleton(); 130 const UChar *resultString; 131 return (UBool)(ucase_toFullFolding(csp, c, &resultString, U_FOLD_CASE_DEFAULT)>=0); 132 } else { 133 /* guess some large but stack-friendly capacity */ 134 UChar dest[2*UCASE_MAX_STRING_LENGTH]; 135 int32_t destLength; 136 destLength=u_strFoldCase(dest, UPRV_LENGTHOF(dest), 137 nfd.getBuffer(), nfd.length(), 138 U_FOLD_CASE_DEFAULT, &errorCode); 139 return (UBool)(U_SUCCESS(errorCode) && 140 0!=u_strCompare(nfd.getBuffer(), nfd.length(), 141 dest, destLength, FALSE)); 142 } 143 } 144 #endif 145 146 #if UCONFIG_NO_NORMALIZATION 147 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &, UChar32, UProperty) { 148 return FALSE; 149 } 150 #else 151 static UBool changesWhenNFKC_Casefolded(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 152 UErrorCode errorCode=U_ZERO_ERROR; 153 const Normalizer2Impl *kcf=Normalizer2Factory::getNFKC_CFImpl(errorCode); 154 if(U_FAILURE(errorCode)) { 155 return FALSE; 156 } 157 UnicodeString src(c); 158 UnicodeString dest; 159 { 160 // The ReorderingBuffer must be in a block because its destructor 161 // needs to release dest's buffer before we look at its contents. 162 ReorderingBuffer buffer(*kcf, dest); 163 // Small destCapacity for NFKC_CF(c). 164 if(buffer.init(5, errorCode)) { 165 const UChar *srcArray=src.getBuffer(); 166 kcf->compose(srcArray, srcArray+src.length(), FALSE, 167 TRUE, buffer, errorCode); 168 } 169 } 170 return U_SUCCESS(errorCode) && dest!=src; 171 } 172 #endif 173 174 #if UCONFIG_NO_NORMALIZATION 175 static UBool isCanonSegmentStarter(const BinaryProperty &, UChar32, UProperty) { 176 return FALSE; 177 } 178 #else 179 static UBool isCanonSegmentStarter(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 180 UErrorCode errorCode=U_ZERO_ERROR; 181 const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); 182 return 183 U_SUCCESS(errorCode) && impl->ensureCanonIterData(errorCode) && 184 impl->isCanonSegmentStarter(c); 185 } 186 #endif 187 188 static UBool isPOSIX_alnum(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 189 return u_isalnumPOSIX(c); 190 } 191 192 static UBool isPOSIX_blank(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 193 return u_isblank(c); 194 } 195 196 static UBool isPOSIX_graph(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 197 return u_isgraphPOSIX(c); 198 } 199 200 static UBool isPOSIX_print(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 201 return u_isprintPOSIX(c); 202 } 203 204 static UBool isPOSIX_xdigit(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 205 return u_isxdigit(c); 206 } 207 208 static const BinaryProperty binProps[UCHAR_BINARY_LIMIT]={ 209 /* 210 * column and mask values for binary properties from u_getUnicodeProperties(). 211 * Must be in order of corresponding UProperty, 212 * and there must be exactly one entry per binary UProperty. 213 * 214 * Properties with mask==0 are handled in code. 215 * For them, column is the UPropertySource value. 216 */ 217 { 1, U_MASK(UPROPS_ALPHABETIC), defaultContains }, 218 { 1, U_MASK(UPROPS_ASCII_HEX_DIGIT), defaultContains }, 219 { UPROPS_SRC_BIDI, 0, isBidiControl }, 220 { UPROPS_SRC_BIDI, 0, isMirrored }, 221 { 1, U_MASK(UPROPS_DASH), defaultContains }, 222 { 1, U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT), defaultContains }, 223 { 1, U_MASK(UPROPS_DEPRECATED), defaultContains }, 224 { 1, U_MASK(UPROPS_DIACRITIC), defaultContains }, 225 { 1, U_MASK(UPROPS_EXTENDER), defaultContains }, 226 { UPROPS_SRC_NFC, 0, hasFullCompositionExclusion }, 227 { 1, U_MASK(UPROPS_GRAPHEME_BASE), defaultContains }, 228 { 1, U_MASK(UPROPS_GRAPHEME_EXTEND), defaultContains }, 229 { 1, U_MASK(UPROPS_GRAPHEME_LINK), defaultContains }, 230 { 1, U_MASK(UPROPS_HEX_DIGIT), defaultContains }, 231 { 1, U_MASK(UPROPS_HYPHEN), defaultContains }, 232 { 1, U_MASK(UPROPS_ID_CONTINUE), defaultContains }, 233 { 1, U_MASK(UPROPS_ID_START), defaultContains }, 234 { 1, U_MASK(UPROPS_IDEOGRAPHIC), defaultContains }, 235 { 1, U_MASK(UPROPS_IDS_BINARY_OPERATOR), defaultContains }, 236 { 1, U_MASK(UPROPS_IDS_TRINARY_OPERATOR), defaultContains }, 237 { UPROPS_SRC_BIDI, 0, isJoinControl }, 238 { 1, U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION), defaultContains }, 239 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_LOWERCASE 240 { 1, U_MASK(UPROPS_MATH), defaultContains }, 241 { 1, U_MASK(UPROPS_NONCHARACTER_CODE_POINT), defaultContains }, 242 { 1, U_MASK(UPROPS_QUOTATION_MARK), defaultContains }, 243 { 1, U_MASK(UPROPS_RADICAL), defaultContains }, 244 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_SOFT_DOTTED 245 { 1, U_MASK(UPROPS_TERMINAL_PUNCTUATION), defaultContains }, 246 { 1, U_MASK(UPROPS_UNIFIED_IDEOGRAPH), defaultContains }, 247 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_UPPERCASE 248 { 1, U_MASK(UPROPS_WHITE_SPACE), defaultContains }, 249 { 1, U_MASK(UPROPS_XID_CONTINUE), defaultContains }, 250 { 1, U_MASK(UPROPS_XID_START), defaultContains }, 251 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_SENSITIVE 252 { 1, U_MASK(UPROPS_S_TERM), defaultContains }, 253 { 1, U_MASK(UPROPS_VARIATION_SELECTOR), defaultContains }, 254 { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFD_INERT 255 { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKD_INERT 256 { UPROPS_SRC_NFC, 0, isNormInert }, // UCHAR_NFC_INERT 257 { UPROPS_SRC_NFKC, 0, isNormInert }, // UCHAR_NFKC_INERT 258 { UPROPS_SRC_NFC_CANON_ITER, 0, isCanonSegmentStarter }, 259 { 1, U_MASK(UPROPS_PATTERN_SYNTAX), defaultContains }, 260 { 1, U_MASK(UPROPS_PATTERN_WHITE_SPACE), defaultContains }, 261 { UPROPS_SRC_CHAR_AND_PROPSVEC, 0, isPOSIX_alnum }, 262 { UPROPS_SRC_CHAR, 0, isPOSIX_blank }, 263 { UPROPS_SRC_CHAR, 0, isPOSIX_graph }, 264 { UPROPS_SRC_CHAR, 0, isPOSIX_print }, 265 { UPROPS_SRC_CHAR, 0, isPOSIX_xdigit }, 266 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASED 267 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CASE_IGNORABLE 268 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_LOWERCASED 269 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_UPPERCASED 270 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_TITLECASED 271 { UPROPS_SRC_CASE_AND_NORM, 0, changesWhenCasefolded }, 272 { UPROPS_SRC_CASE, 0, caseBinaryPropertyContains }, // UCHAR_CHANGES_WHEN_CASEMAPPED 273 { UPROPS_SRC_NFKC_CF, 0, changesWhenNFKC_Casefolded } 274 }; 275 276 U_CAPI UBool U_EXPORT2 277 u_hasBinaryProperty(UChar32 c, UProperty which) { 278 /* c is range-checked in the functions that are called from here */ 279 if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) { 280 /* not a known binary property */ 281 return FALSE; 282 } else { 283 const BinaryProperty &prop=binProps[which]; 284 return prop.contains(prop, c, which); 285 } 286 } 287 288 struct IntProperty; 289 290 typedef int32_t IntPropertyGetValue(const IntProperty &prop, UChar32 c, UProperty which); 291 typedef int32_t IntPropertyGetMaxValue(const IntProperty &prop, UProperty which); 292 293 struct IntProperty { 294 int32_t column; // SRC_PROPSVEC column, or "source" if mask==0 295 uint32_t mask; 296 int32_t shift; // =maxValue if getMaxValueFromShift() is used 297 IntPropertyGetValue *getValue; 298 IntPropertyGetMaxValue *getMaxValue; 299 }; 300 301 static int32_t defaultGetValue(const IntProperty &prop, UChar32 c, UProperty /*which*/) { 302 /* systematic, directly stored properties */ 303 return (int32_t)(u_getUnicodeProperties(c, prop.column)&prop.mask)>>prop.shift; 304 } 305 306 static int32_t defaultGetMaxValue(const IntProperty &prop, UProperty /*which*/) { 307 return (uprv_getMaxValues(prop.column)&prop.mask)>>prop.shift; 308 } 309 310 static int32_t getMaxValueFromShift(const IntProperty &prop, UProperty /*which*/) { 311 return prop.shift; 312 } 313 314 static int32_t getBiDiClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 315 return (int32_t)u_charDirection(c); 316 } 317 318 static int32_t getBiDiPairedBracketType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 319 return (int32_t)ubidi_getPairedBracketType(GET_BIDI_PROPS(), c); 320 } 321 322 static int32_t biDiGetMaxValue(const IntProperty &/*prop*/, UProperty which) { 323 return ubidi_getMaxValue(GET_BIDI_PROPS(), which); 324 } 325 326 #if UCONFIG_NO_NORMALIZATION 327 static int32_t getCombiningClass(const IntProperty &, UChar32, UProperty) { 328 return 0; 329 } 330 #else 331 static int32_t getCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 332 return u_getCombiningClass(c); 333 } 334 #endif 335 336 static int32_t getGeneralCategory(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 337 return (int32_t)u_charType(c); 338 } 339 340 static int32_t getJoiningGroup(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 341 return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c); 342 } 343 344 static int32_t getJoiningType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 345 return ubidi_getJoiningType(GET_BIDI_PROPS(), c); 346 } 347 348 static int32_t getNumericType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 349 int32_t ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(u_getMainProperties(c)); 350 return UPROPS_NTV_GET_TYPE(ntv); 351 } 352 353 static int32_t getScript(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 354 UErrorCode errorCode=U_ZERO_ERROR; 355 return (int32_t)uscript_getScript(c, &errorCode); 356 } 357 358 /* 359 * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. 360 * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. 361 */ 362 static const UHangulSyllableType gcbToHst[]={ 363 U_HST_NOT_APPLICABLE, /* U_GCB_OTHER */ 364 U_HST_NOT_APPLICABLE, /* U_GCB_CONTROL */ 365 U_HST_NOT_APPLICABLE, /* U_GCB_CR */ 366 U_HST_NOT_APPLICABLE, /* U_GCB_EXTEND */ 367 U_HST_LEADING_JAMO, /* U_GCB_L */ 368 U_HST_NOT_APPLICABLE, /* U_GCB_LF */ 369 U_HST_LV_SYLLABLE, /* U_GCB_LV */ 370 U_HST_LVT_SYLLABLE, /* U_GCB_LVT */ 371 U_HST_TRAILING_JAMO, /* U_GCB_T */ 372 U_HST_VOWEL_JAMO /* U_GCB_V */ 373 /* 374 * Omit GCB values beyond what we need for hst. 375 * The code below checks for the array length. 376 */ 377 }; 378 379 static int32_t getHangulSyllableType(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 380 /* see comments on gcbToHst[] above */ 381 int32_t gcb=(int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT; 382 if(gcb<UPRV_LENGTHOF(gcbToHst)) { 383 return gcbToHst[gcb]; 384 } else { 385 return U_HST_NOT_APPLICABLE; 386 } 387 } 388 389 #if UCONFIG_NO_NORMALIZATION 390 static int32_t getNormQuickCheck(const IntProperty &, UChar32, UProperty) { 391 return 0; 392 } 393 #else 394 static int32_t getNormQuickCheck(const IntProperty &/*prop*/, UChar32 c, UProperty which) { 395 return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD)); 396 } 397 #endif 398 399 #if UCONFIG_NO_NORMALIZATION 400 static int32_t getLeadCombiningClass(const IntProperty &, UChar32, UProperty) { 401 return 0; 402 } 403 #else 404 static int32_t getLeadCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 405 return unorm_getFCD16(c)>>8; 406 } 407 #endif 408 409 #if UCONFIG_NO_NORMALIZATION 410 static int32_t getTrailCombiningClass(const IntProperty &, UChar32, UProperty) { 411 return 0; 412 } 413 #else 414 static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UProperty /*which*/) { 415 return unorm_getFCD16(c)&0xff; 416 } 417 #endif 418 419 static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={ 420 /* 421 * column, mask and shift values for int-value properties from u_getUnicodeProperties(). 422 * Must be in order of corresponding UProperty, 423 * and there must be exactly one entry per int UProperty. 424 * 425 * Properties with mask==0 are handled in code. 426 * For them, column is the UPropertySource value. 427 */ 428 { UPROPS_SRC_BIDI, 0, 0, getBiDiClass, biDiGetMaxValue }, 429 { 0, UPROPS_BLOCK_MASK, UPROPS_BLOCK_SHIFT, defaultGetValue, defaultGetMaxValue }, 430 { UPROPS_SRC_NFC, 0, 0xff, getCombiningClass, getMaxValueFromShift }, 431 { 2, UPROPS_DT_MASK, 0, defaultGetValue, defaultGetMaxValue }, 432 { 0, UPROPS_EA_MASK, UPROPS_EA_SHIFT, defaultGetValue, defaultGetMaxValue }, 433 { UPROPS_SRC_CHAR, 0, (int32_t)U_CHAR_CATEGORY_COUNT-1,getGeneralCategory, getMaxValueFromShift }, 434 { UPROPS_SRC_BIDI, 0, 0, getJoiningGroup, biDiGetMaxValue }, 435 { UPROPS_SRC_BIDI, 0, 0, getJoiningType, biDiGetMaxValue }, 436 { 2, UPROPS_LB_MASK, UPROPS_LB_SHIFT, defaultGetValue, defaultGetMaxValue }, 437 { UPROPS_SRC_CHAR, 0, (int32_t)U_NT_COUNT-1, getNumericType, getMaxValueFromShift }, 438 { 0, UPROPS_SCRIPT_MASK, 0, getScript, defaultGetMaxValue }, 439 { UPROPS_SRC_PROPSVEC, 0, (int32_t)U_HST_COUNT-1, getHangulSyllableType, getMaxValueFromShift }, 440 // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" 441 { UPROPS_SRC_NFC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, 442 // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" 443 { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_YES, getNormQuickCheck, getMaxValueFromShift }, 444 // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE 445 { UPROPS_SRC_NFC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, 446 // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE 447 { UPROPS_SRC_NFKC, 0, (int32_t)UNORM_MAYBE, getNormQuickCheck, getMaxValueFromShift }, 448 { UPROPS_SRC_NFC, 0, 0xff, getLeadCombiningClass, getMaxValueFromShift }, 449 { UPROPS_SRC_NFC, 0, 0xff, getTrailCombiningClass, getMaxValueFromShift }, 450 { 2, UPROPS_GCB_MASK, UPROPS_GCB_SHIFT, defaultGetValue, defaultGetMaxValue }, 451 { 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue }, 452 { 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue }, 453 { UPROPS_SRC_BIDI, 0, 0, getBiDiPairedBracketType, biDiGetMaxValue }, 454 }; 455 456 U_CAPI int32_t U_EXPORT2 457 u_getIntPropertyValue(UChar32 c, UProperty which) { 458 if(which<UCHAR_INT_START) { 459 if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { 460 const BinaryProperty &prop=binProps[which]; 461 return prop.contains(prop, c, which); 462 } 463 } else if(which<UCHAR_INT_LIMIT) { 464 const IntProperty &prop=intProps[which-UCHAR_INT_START]; 465 return prop.getValue(prop, c, which); 466 } else if(which==UCHAR_GENERAL_CATEGORY_MASK) { 467 return U_MASK(u_charType(c)); 468 } 469 return 0; // undefined 470 } 471 472 U_CAPI int32_t U_EXPORT2 473 u_getIntPropertyMinValue(UProperty /*which*/) { 474 return 0; /* all binary/enum/int properties have a minimum value of 0 */ 475 } 476 477 U_CAPI int32_t U_EXPORT2 478 u_getIntPropertyMaxValue(UProperty which) { 479 if(which<UCHAR_INT_START) { 480 if(UCHAR_BINARY_START<=which && which<UCHAR_BINARY_LIMIT) { 481 return 1; // maximum TRUE for all binary properties 482 } 483 } else if(which<UCHAR_INT_LIMIT) { 484 const IntProperty &prop=intProps[which-UCHAR_INT_START]; 485 return prop.getMaxValue(prop, which); 486 } 487 return -1; // undefined 488 } 489 490 U_CFUNC UPropertySource U_EXPORT2 491 uprops_getSource(UProperty which) { 492 if(which<UCHAR_BINARY_START) { 493 return UPROPS_SRC_NONE; /* undefined */ 494 } else if(which<UCHAR_BINARY_LIMIT) { 495 const BinaryProperty &prop=binProps[which]; 496 if(prop.mask!=0) { 497 return UPROPS_SRC_PROPSVEC; 498 } else { 499 return (UPropertySource)prop.column; 500 } 501 } else if(which<UCHAR_INT_START) { 502 return UPROPS_SRC_NONE; /* undefined */ 503 } else if(which<UCHAR_INT_LIMIT) { 504 const IntProperty &prop=intProps[which-UCHAR_INT_START]; 505 if(prop.mask!=0) { 506 return UPROPS_SRC_PROPSVEC; 507 } else { 508 return (UPropertySource)prop.column; 509 } 510 } else if(which<UCHAR_STRING_START) { 511 switch(which) { 512 case UCHAR_GENERAL_CATEGORY_MASK: 513 case UCHAR_NUMERIC_VALUE: 514 return UPROPS_SRC_CHAR; 515 516 default: 517 return UPROPS_SRC_NONE; 518 } 519 } else if(which<UCHAR_STRING_LIMIT) { 520 switch(which) { 521 case UCHAR_AGE: 522 return UPROPS_SRC_PROPSVEC; 523 524 case UCHAR_BIDI_MIRRORING_GLYPH: 525 return UPROPS_SRC_BIDI; 526 527 case UCHAR_CASE_FOLDING: 528 case UCHAR_LOWERCASE_MAPPING: 529 case UCHAR_SIMPLE_CASE_FOLDING: 530 case UCHAR_SIMPLE_LOWERCASE_MAPPING: 531 case UCHAR_SIMPLE_TITLECASE_MAPPING: 532 case UCHAR_SIMPLE_UPPERCASE_MAPPING: 533 case UCHAR_TITLECASE_MAPPING: 534 case UCHAR_UPPERCASE_MAPPING: 535 return UPROPS_SRC_CASE; 536 537 case UCHAR_ISO_COMMENT: 538 case UCHAR_NAME: 539 case UCHAR_UNICODE_1_NAME: 540 return UPROPS_SRC_NAMES; 541 542 default: 543 return UPROPS_SRC_NONE; 544 } 545 } else { 546 switch(which) { 547 case UCHAR_SCRIPT_EXTENSIONS: 548 return UPROPS_SRC_PROPSVEC; 549 default: 550 return UPROPS_SRC_NONE; /* undefined */ 551 } 552 } 553 } 554 555 #if !UCONFIG_NO_NORMALIZATION 556 557 U_CAPI int32_t U_EXPORT2 558 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode) { 559 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 560 return 0; 561 } 562 if(destCapacity<0 || (dest==NULL && destCapacity>0)) { 563 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 564 return 0; 565 } 566 // Compute the FC_NFKC_Closure on the fly: 567 // We have the API for complete coverage of Unicode properties, although 568 // this value by itself is not useful via API. 569 // (What could be useful is a custom normalization table that combines 570 // case folding and NFKC.) 571 // For the derivation, see Unicode's DerivedNormalizationProps.txt. 572 const Normalizer2 *nfkc=Normalizer2::getNFKCInstance(*pErrorCode); 573 const UCaseProps *csp=ucase_getSingleton(); 574 if(U_FAILURE(*pErrorCode)) { 575 return 0; 576 } 577 // first: b = NFKC(Fold(a)) 578 UnicodeString folded1String; 579 const UChar *folded1; 580 int32_t folded1Length=ucase_toFullFolding(csp, c, &folded1, U_FOLD_CASE_DEFAULT); 581 if(folded1Length<0) { 582 const Normalizer2Impl *nfkcImpl=Normalizer2Factory::getImpl(nfkc); 583 if(nfkcImpl->getCompQuickCheck(nfkcImpl->getNorm16(c))!=UNORM_NO) { 584 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); // c does not change at all under CaseFolding+NFKC 585 } 586 folded1String.setTo(c); 587 } else { 588 if(folded1Length>UCASE_MAX_STRING_LENGTH) { 589 folded1String.setTo(folded1Length); 590 } else { 591 folded1String.setTo(FALSE, folded1, folded1Length); 592 } 593 } 594 UnicodeString kc1=nfkc->normalize(folded1String, *pErrorCode); 595 // second: c = NFKC(Fold(b)) 596 UnicodeString folded2String(kc1); 597 UnicodeString kc2=nfkc->normalize(folded2String.foldCase(), *pErrorCode); 598 // if (c != b) add the mapping from a to c 599 if(U_FAILURE(*pErrorCode) || kc1==kc2) { 600 return u_terminateUChars(dest, destCapacity, 0, pErrorCode); 601 } else { 602 return kc2.extract(dest, destCapacity, *pErrorCode); 603 } 604 } 605 606 #endif 607