Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2002-2008, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  uprops.h
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2002feb24
     14 *   created by: Markus W. Scherer
     15 *
     16 *   Implementations for mostly non-core Unicode character properties
     17 *   stored in uprops.icu.
     18 *
     19 *   With the APIs implemented here, almost all properties files and
     20 *   their associated implementation files are used from this file,
     21 *   including those for normalization and case mappings.
     22 */
     23 
     24 #include "unicode/utypes.h"
     25 #include "unicode/uchar.h"
     26 #include "unicode/uscript.h"
     27 #include "cstring.h"
     28 #include "ucln_cmn.h"
     29 #include "umutex.h"
     30 #include "unormimp.h"
     31 #include "ubidi_props.h"
     32 #include "uprops.h"
     33 #include "ucase.h"
     34 
     35 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     36 
     37 /* cleanup ------------------------------------------------------------------ */
     38 
     39 static const UBiDiProps *gBdp=NULL;
     40 
     41 static UBool U_CALLCONV uprops_cleanup(void) {
     42     gBdp=NULL;
     43     return TRUE;
     44 }
     45 
     46 /* bidi/shaping properties API ---------------------------------------------- */
     47 
     48 /* get the UBiDiProps singleton, or else its dummy, once and for all */
     49 static const UBiDiProps *
     50 getBiDiProps() {
     51     /*
     52      * This lazy intialization with double-checked locking (without mutex protection for
     53      * the initial check) is transiently unsafe under certain circumstances.
     54      * Check the readme and use u_init() if necessary.
     55      */
     56 
     57     /* the initial check is performed by the GET_BIDI_PROPS() macro */
     58     const UBiDiProps *bdp;
     59     UErrorCode errorCode=U_ZERO_ERROR;
     60 
     61     bdp=ubidi_getSingleton(&errorCode);
     62 #if !UBIDI_HARDCODE_DATA
     63     if(U_FAILURE(errorCode)) {
     64         errorCode=U_ZERO_ERROR;
     65         bdp=ubidi_getDummy(&errorCode);
     66         if(U_FAILURE(errorCode)) {
     67             return NULL;
     68         }
     69     }
     70 #endif
     71 
     72     umtx_lock(NULL);
     73     if(gBdp==NULL) {
     74         gBdp=bdp;
     75         ucln_common_registerCleanup(UCLN_COMMON_UPROPS, uprops_cleanup);
     76     }
     77     umtx_unlock(NULL);
     78 
     79     return gBdp;
     80 }
     81 
     82 /* see comment for GET_CASE_PROPS() */
     83 #define GET_BIDI_PROPS() (gBdp!=NULL ? gBdp : getBiDiProps())
     84 
     85 /* general properties API functions ----------------------------------------- */
     86 
     87 static const struct {
     88     int32_t column;
     89     uint32_t mask;
     90 } binProps[UCHAR_BINARY_LIMIT]={
     91     /*
     92      * column and mask values for binary properties from u_getUnicodeProperties().
     93      * Must be in order of corresponding UProperty,
     94      * and there must be exacly one entry per binary UProperty.
     95      *
     96      * Properties with mask 0 are handled in code.
     97      * For them, column is the UPropertySource value.
     98      */
     99     {  1,               U_MASK(UPROPS_ALPHABETIC) },
    100     {  1,               U_MASK(UPROPS_ASCII_HEX_DIGIT) },
    101     { UPROPS_SRC_BIDI,  0 },                                    /* UCHAR_BIDI_CONTROL */
    102     { UPROPS_SRC_BIDI,  0 },                                    /* UCHAR_BIDI_MIRRORED */
    103     {  1,               U_MASK(UPROPS_DASH) },
    104     {  1,               U_MASK(UPROPS_DEFAULT_IGNORABLE_CODE_POINT) },
    105     {  1,               U_MASK(UPROPS_DEPRECATED) },
    106     {  1,               U_MASK(UPROPS_DIACRITIC) },
    107     {  1,               U_MASK(UPROPS_EXTENDER) },
    108     { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_FULL_COMPOSITION_EXCLUSION */
    109     {  1,               U_MASK(UPROPS_GRAPHEME_BASE) },
    110     {  1,               U_MASK(UPROPS_GRAPHEME_EXTEND) },
    111     {  1,               U_MASK(UPROPS_GRAPHEME_LINK) },
    112     {  1,               U_MASK(UPROPS_HEX_DIGIT) },
    113     {  1,               U_MASK(UPROPS_HYPHEN) },
    114     {  1,               U_MASK(UPROPS_ID_CONTINUE) },
    115     {  1,               U_MASK(UPROPS_ID_START) },
    116     {  1,               U_MASK(UPROPS_IDEOGRAPHIC) },
    117     {  1,               U_MASK(UPROPS_IDS_BINARY_OPERATOR) },
    118     {  1,               U_MASK(UPROPS_IDS_TRINARY_OPERATOR) },
    119     { UPROPS_SRC_BIDI,  0 },                                    /* UCHAR_JOIN_CONTROL */
    120     {  1,               U_MASK(UPROPS_LOGICAL_ORDER_EXCEPTION) },
    121     { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_LOWERCASE */
    122     {  1,               U_MASK(UPROPS_MATH) },
    123     {  1,               U_MASK(UPROPS_NONCHARACTER_CODE_POINT) },
    124     {  1,               U_MASK(UPROPS_QUOTATION_MARK) },
    125     {  1,               U_MASK(UPROPS_RADICAL) },
    126     { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_SOFT_DOTTED */
    127     {  1,               U_MASK(UPROPS_TERMINAL_PUNCTUATION) },
    128     {  1,               U_MASK(UPROPS_UNIFIED_IDEOGRAPH) },
    129     { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_UPPERCASE */
    130     {  1,               U_MASK(UPROPS_WHITE_SPACE) },
    131     {  1,               U_MASK(UPROPS_XID_CONTINUE) },
    132     {  1,               U_MASK(UPROPS_XID_START) },
    133     { UPROPS_SRC_CASE,  0 },                                    /* UCHAR_CASE_SENSITIVE */
    134     {  1,               U_MASK(UPROPS_S_TERM) },
    135     {  1,               U_MASK(UPROPS_VARIATION_SELECTOR) },
    136     { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFD_INERT */
    137     { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFKD_INERT */
    138     { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFC_INERT */
    139     { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_NFKC_INERT */
    140     { UPROPS_SRC_NORM,  0 },                                    /* UCHAR_SEGMENT_STARTER */
    141     {  1,               U_MASK(UPROPS_PATTERN_SYNTAX) },
    142     {  1,               U_MASK(UPROPS_PATTERN_WHITE_SPACE) },
    143     { UPROPS_SRC_CHAR_AND_PROPSVEC,  0 },                       /* UCHAR_POSIX_ALNUM */
    144     { UPROPS_SRC_CHAR,  0 },                                    /* UCHAR_POSIX_BLANK */
    145     { UPROPS_SRC_CHAR,  0 },                                    /* UCHAR_POSIX_GRAPH */
    146     { UPROPS_SRC_CHAR,  0 },                                    /* UCHAR_POSIX_PRINT */
    147     { UPROPS_SRC_CHAR,  0 }                                     /* UCHAR_POSIX_XDIGIT */
    148 };
    149 
    150 U_CAPI UBool U_EXPORT2
    151 u_hasBinaryProperty(UChar32 c, UProperty which) {
    152     /* c is range-checked in the functions that are called from here */
    153     if(which<UCHAR_BINARY_START || UCHAR_BINARY_LIMIT<=which) {
    154         /* not a known binary property */
    155     } else {
    156         uint32_t mask=binProps[which].mask;
    157         int32_t column=binProps[which].column;
    158         if(mask!=0) {
    159             /* systematic, directly stored properties */
    160             return (u_getUnicodeProperties(c, column)&mask)!=0;
    161         } else {
    162             if(column==UPROPS_SRC_CASE) {
    163                 return ucase_hasBinaryProperty(c, which);
    164             } else if(column==UPROPS_SRC_NORM) {
    165 #if !UCONFIG_NO_NORMALIZATION
    166                 /* normalization properties from unorm.icu */
    167                 switch(which) {
    168                 case UCHAR_FULL_COMPOSITION_EXCLUSION:
    169                     return unorm_internalIsFullCompositionExclusion(c);
    170                 case UCHAR_NFD_INERT:
    171                 case UCHAR_NFKD_INERT:
    172                 case UCHAR_NFC_INERT:
    173                 case UCHAR_NFKC_INERT:
    174                     return unorm_isNFSkippable(c, (UNormalizationMode)(which-UCHAR_NFD_INERT+UNORM_NFD));
    175                 case UCHAR_SEGMENT_STARTER:
    176                     return unorm_isCanonSafeStart(c);
    177                 default:
    178                     break;
    179                 }
    180 #endif
    181             } else if(column==UPROPS_SRC_BIDI) {
    182                 /* bidi/shaping properties */
    183                 const UBiDiProps *bdp=GET_BIDI_PROPS();
    184                 if(bdp!=NULL) {
    185                     switch(which) {
    186                     case UCHAR_BIDI_MIRRORED:
    187                         return ubidi_isMirrored(bdp, c);
    188                     case UCHAR_BIDI_CONTROL:
    189                         return ubidi_isBidiControl(bdp, c);
    190                     case UCHAR_JOIN_CONTROL:
    191                         return ubidi_isJoinControl(bdp, c);
    192                     default:
    193                         break;
    194                     }
    195                 }
    196                 /* else return FALSE below */
    197             } else if(column==UPROPS_SRC_CHAR) {
    198                 switch(which) {
    199                 case UCHAR_POSIX_BLANK:
    200                     return u_isblank(c);
    201                 case UCHAR_POSIX_GRAPH:
    202                     return u_isgraphPOSIX(c);
    203                 case UCHAR_POSIX_PRINT:
    204                     return u_isprintPOSIX(c);
    205                 case UCHAR_POSIX_XDIGIT:
    206                     return u_isxdigit(c);
    207                 default:
    208                     break;
    209                 }
    210             } else if(column==UPROPS_SRC_CHAR_AND_PROPSVEC) {
    211                 switch(which) {
    212                 case UCHAR_POSIX_ALNUM:
    213                     return u_isalnumPOSIX(c);
    214                 default:
    215                     break;
    216                 }
    217             }
    218         }
    219     }
    220     return FALSE;
    221 }
    222 
    223 U_CAPI int32_t U_EXPORT2
    224 u_getIntPropertyValue(UChar32 c, UProperty which) {
    225     UErrorCode errorCode;
    226     int32_t type;
    227 
    228     if(which<UCHAR_BINARY_START) {
    229         return 0; /* undefined */
    230     } else if(which<UCHAR_BINARY_LIMIT) {
    231         return (int32_t)u_hasBinaryProperty(c, which);
    232     } else if(which<UCHAR_INT_START) {
    233         return 0; /* undefined */
    234     } else if(which<UCHAR_INT_LIMIT) {
    235         switch(which) {
    236         case UCHAR_BIDI_CLASS:
    237             return (int32_t)u_charDirection(c);
    238         case UCHAR_BLOCK:
    239             return (int32_t)ublock_getCode(c);
    240         case UCHAR_CANONICAL_COMBINING_CLASS:
    241 #if !UCONFIG_NO_NORMALIZATION
    242             return u_getCombiningClass(c);
    243 #else
    244             return 0;
    245 #endif
    246         case UCHAR_DECOMPOSITION_TYPE:
    247             return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_DT_MASK);
    248         case UCHAR_EAST_ASIAN_WIDTH:
    249             return (int32_t)(u_getUnicodeProperties(c, 0)&UPROPS_EA_MASK)>>UPROPS_EA_SHIFT;
    250         case UCHAR_GENERAL_CATEGORY:
    251             return (int32_t)u_charType(c);
    252         case UCHAR_JOINING_GROUP:
    253             return ubidi_getJoiningGroup(GET_BIDI_PROPS(), c);
    254         case UCHAR_JOINING_TYPE:
    255             return ubidi_getJoiningType(GET_BIDI_PROPS(), c);
    256         case UCHAR_LINE_BREAK:
    257             return (int32_t)(u_getUnicodeProperties(c, UPROPS_LB_VWORD)&UPROPS_LB_MASK)>>UPROPS_LB_SHIFT;
    258         case UCHAR_NUMERIC_TYPE:
    259             type=(int32_t)GET_NUMERIC_TYPE(u_getUnicodeProperties(c, -1));
    260             if(type>U_NT_NUMERIC) {
    261                 /* keep internal variants of U_NT_NUMERIC from becoming visible */
    262                 type=U_NT_NUMERIC;
    263             }
    264             return type;
    265         case UCHAR_SCRIPT:
    266             errorCode=U_ZERO_ERROR;
    267             return (int32_t)uscript_getScript(c, &errorCode);
    268         case UCHAR_HANGUL_SYLLABLE_TYPE:
    269             return uchar_getHST(c);
    270 #if !UCONFIG_NO_NORMALIZATION
    271         case UCHAR_NFD_QUICK_CHECK:
    272         case UCHAR_NFKD_QUICK_CHECK:
    273         case UCHAR_NFC_QUICK_CHECK:
    274         case UCHAR_NFKC_QUICK_CHECK:
    275             return (int32_t)unorm_getQuickCheck(c, (UNormalizationMode)(which-UCHAR_NFD_QUICK_CHECK+UNORM_NFD));
    276         case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
    277             return unorm_getFCD16FromCodePoint(c)>>8;
    278         case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
    279             return unorm_getFCD16FromCodePoint(c)&0xff;
    280 #endif
    281         case UCHAR_GRAPHEME_CLUSTER_BREAK:
    282             return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
    283         case UCHAR_SENTENCE_BREAK:
    284             return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_SB_MASK)>>UPROPS_SB_SHIFT;
    285         case UCHAR_WORD_BREAK:
    286             return (int32_t)(u_getUnicodeProperties(c, 2)&UPROPS_WB_MASK)>>UPROPS_WB_SHIFT;
    287         default:
    288             return 0; /* undefined */
    289         }
    290     } else if(which==UCHAR_GENERAL_CATEGORY_MASK) {
    291         return U_MASK(u_charType(c));
    292     } else {
    293         return 0; /* undefined */
    294     }
    295 }
    296 
    297 U_CAPI int32_t U_EXPORT2
    298 u_getIntPropertyMinValue(UProperty which) {
    299     return 0; /* all binary/enum/int properties have a minimum value of 0 */
    300 }
    301 
    302 U_CAPI int32_t U_EXPORT2
    303 u_getIntPropertyMaxValue(UProperty which) {
    304     if(which<UCHAR_BINARY_START) {
    305         return -1; /* undefined */
    306     } else if(which<UCHAR_BINARY_LIMIT) {
    307         return 1; /* maximum TRUE for all binary properties */
    308     } else if(which<UCHAR_INT_START) {
    309         return -1; /* undefined */
    310     } else if(which<UCHAR_INT_LIMIT) {
    311         switch(which) {
    312         case UCHAR_BIDI_CLASS:
    313         case UCHAR_JOINING_GROUP:
    314         case UCHAR_JOINING_TYPE:
    315             return ubidi_getMaxValue(GET_BIDI_PROPS(), which);
    316         case UCHAR_BLOCK:
    317             return (uprv_getMaxValues(0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT;
    318         case UCHAR_CANONICAL_COMBINING_CLASS:
    319         case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
    320         case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
    321             return 0xff; /* TODO do we need to be more precise, getting the actual maximum? */
    322         case UCHAR_DECOMPOSITION_TYPE:
    323             return uprv_getMaxValues(2)&UPROPS_DT_MASK;
    324         case UCHAR_EAST_ASIAN_WIDTH:
    325             return (uprv_getMaxValues(0)&UPROPS_EA_MASK)>>UPROPS_EA_SHIFT;
    326         case UCHAR_GENERAL_CATEGORY:
    327             return (int32_t)U_CHAR_CATEGORY_COUNT-1;
    328         case UCHAR_LINE_BREAK:
    329             return (uprv_getMaxValues(UPROPS_LB_VWORD)&UPROPS_LB_MASK)>>UPROPS_LB_SHIFT;
    330         case UCHAR_NUMERIC_TYPE:
    331             return (int32_t)U_NT_COUNT-1;
    332         case UCHAR_SCRIPT:
    333             return uprv_getMaxValues(0)&UPROPS_SCRIPT_MASK;
    334         case UCHAR_HANGUL_SYLLABLE_TYPE:
    335             return (int32_t)U_HST_COUNT-1;
    336 #if !UCONFIG_NO_NORMALIZATION
    337         case UCHAR_NFD_QUICK_CHECK:
    338         case UCHAR_NFKD_QUICK_CHECK:
    339             return (int32_t)UNORM_YES; /* these are never "maybe", only "no" or "yes" */
    340         case UCHAR_NFC_QUICK_CHECK:
    341         case UCHAR_NFKC_QUICK_CHECK:
    342             return (int32_t)UNORM_MAYBE;
    343 #endif
    344         case UCHAR_GRAPHEME_CLUSTER_BREAK:
    345             return (uprv_getMaxValues(2)&UPROPS_GCB_MASK)>>UPROPS_GCB_SHIFT;
    346         case UCHAR_SENTENCE_BREAK:
    347             return (uprv_getMaxValues(2)&UPROPS_SB_MASK)>>UPROPS_SB_SHIFT;
    348         case UCHAR_WORD_BREAK:
    349             return (uprv_getMaxValues(2)&UPROPS_WB_MASK)>>UPROPS_WB_SHIFT;
    350         default:
    351             return -1; /* undefined */
    352         }
    353     } else {
    354         return -1; /* undefined */
    355     }
    356 }
    357 
    358 U_CFUNC UPropertySource U_EXPORT2
    359 uprops_getSource(UProperty which) {
    360     if(which<UCHAR_BINARY_START) {
    361         return UPROPS_SRC_NONE; /* undefined */
    362     } else if(which<UCHAR_BINARY_LIMIT) {
    363         if(binProps[which].mask!=0) {
    364             return UPROPS_SRC_PROPSVEC;
    365         } else {
    366             return (UPropertySource)binProps[which].column;
    367         }
    368     } else if(which<UCHAR_INT_START) {
    369         return UPROPS_SRC_NONE; /* undefined */
    370     } else if(which<UCHAR_INT_LIMIT) {
    371         switch(which) {
    372         case UCHAR_GENERAL_CATEGORY:
    373         case UCHAR_NUMERIC_TYPE:
    374             return UPROPS_SRC_CHAR;
    375 
    376         case UCHAR_HANGUL_SYLLABLE_TYPE:
    377             return UPROPS_SRC_HST;
    378 
    379         case UCHAR_CANONICAL_COMBINING_CLASS:
    380         case UCHAR_NFD_QUICK_CHECK:
    381         case UCHAR_NFKD_QUICK_CHECK:
    382         case UCHAR_NFC_QUICK_CHECK:
    383         case UCHAR_NFKC_QUICK_CHECK:
    384         case UCHAR_LEAD_CANONICAL_COMBINING_CLASS:
    385         case UCHAR_TRAIL_CANONICAL_COMBINING_CLASS:
    386             return UPROPS_SRC_NORM;
    387 
    388         case UCHAR_BIDI_CLASS:
    389         case UCHAR_JOINING_GROUP:
    390         case UCHAR_JOINING_TYPE:
    391             return UPROPS_SRC_BIDI;
    392 
    393         default:
    394             return UPROPS_SRC_PROPSVEC;
    395         }
    396     } else if(which<UCHAR_STRING_START) {
    397         switch(which) {
    398         case UCHAR_GENERAL_CATEGORY_MASK:
    399         case UCHAR_NUMERIC_VALUE:
    400             return UPROPS_SRC_CHAR;
    401 
    402         default:
    403             return UPROPS_SRC_NONE;
    404         }
    405     } else if(which<UCHAR_STRING_LIMIT) {
    406         switch(which) {
    407         case UCHAR_AGE:
    408             return UPROPS_SRC_PROPSVEC;
    409 
    410         case UCHAR_BIDI_MIRRORING_GLYPH:
    411             return UPROPS_SRC_BIDI;
    412 
    413         case UCHAR_CASE_FOLDING:
    414         case UCHAR_LOWERCASE_MAPPING:
    415         case UCHAR_SIMPLE_CASE_FOLDING:
    416         case UCHAR_SIMPLE_LOWERCASE_MAPPING:
    417         case UCHAR_SIMPLE_TITLECASE_MAPPING:
    418         case UCHAR_SIMPLE_UPPERCASE_MAPPING:
    419         case UCHAR_TITLECASE_MAPPING:
    420         case UCHAR_UPPERCASE_MAPPING:
    421             return UPROPS_SRC_CASE;
    422 
    423         case UCHAR_ISO_COMMENT:
    424         case UCHAR_NAME:
    425         case UCHAR_UNICODE_1_NAME:
    426             return UPROPS_SRC_NAMES;
    427 
    428         default:
    429             return UPROPS_SRC_NONE;
    430         }
    431     } else {
    432         return UPROPS_SRC_NONE; /* undefined */
    433     }
    434 }
    435 
    436 /*----------------------------------------------------------------
    437  * Inclusions list
    438  *----------------------------------------------------------------*/
    439 
    440 /*
    441  * Return a set of characters for property enumeration.
    442  * The set implicitly contains 0x110000 as well, which is one more than the highest
    443  * Unicode code point.
    444  *
    445  * This set is used as an ordered list - its code points are ordered, and
    446  * consecutive code points (in Unicode code point order) in the set define a range.
    447  * For each two consecutive characters (start, limit) in the set,
    448  * all of the UCD/normalization and related properties for
    449  * all code points start..limit-1 are all the same,
    450  * except for character names and ISO comments.
    451  *
    452  * All Unicode code points U+0000..U+10ffff are covered by these ranges.
    453  * The ranges define a partition of the Unicode code space.
    454  * ICU uses the inclusions set to enumerate properties for generating
    455  * UnicodeSets containing all code points that have a certain property value.
    456  *
    457  * The Inclusion List is generated from the UCD. It is generated
    458  * by enumerating the data tries, and code points for hardcoded properties
    459  * are added as well.
    460  *
    461  * --------------------------------------------------------------------------
    462  *
    463  * The following are ideas for getting properties-unique code point ranges,
    464  * with possible optimizations beyond the current implementation.
    465  * These optimizations would require more code and be more fragile.
    466  * The current implementation generates one single list (set) for all properties.
    467  *
    468  * To enumerate properties efficiently, one needs to know ranges of
    469  * repetitive values, so that the value of only each start code point
    470  * can be applied to the whole range.
    471  * This information is in principle available in the uprops.icu/unorm.icu data.
    472  *
    473  * There are two obstacles:
    474  *
    475  * 1. Some properties are computed from multiple data structures,
    476  *    making it necessary to get repetitive ranges by intersecting
    477  *    ranges from multiple tries.
    478  *
    479  * 2. It is not economical to write code for getting repetitive ranges
    480  *    that are precise for each of some 50 properties.
    481  *
    482  * Compromise ideas:
    483  *
    484  * - Get ranges per trie, not per individual property.
    485  *   Each range contains the same values for a whole group of properties.
    486  *   This would generate currently five range sets, two for uprops.icu tries
    487  *   and three for unorm.icu tries.
    488  *
    489  * - Combine sets of ranges for multiple tries to get sufficient sets
    490  *   for properties, e.g., the uprops.icu main and auxiliary tries
    491  *   for all non-normalization properties.
    492  *
    493  * Ideas for representing ranges and combining them:
    494  *
    495  * - A UnicodeSet could hold just the start code points of ranges.
    496  *   Multiple sets are easily combined by or-ing them together.
    497  *
    498  * - Alternatively, a UnicodeSet could hold each even-numbered range.
    499  *   All ranges could be enumerated by using each start code point
    500  *   (for the even-numbered ranges) as well as each limit (end+1) code point
    501  *   (for the odd-numbered ranges).
    502  *   It should be possible to combine two such sets by xor-ing them,
    503  *   but no more than two.
    504  *
    505  * The second way to represent ranges may(?!) yield smaller UnicodeSet arrays,
    506  * but the first one is certainly simpler and applicable for combining more than
    507  * two range sets.
    508  *
    509  * It is possible to combine all range sets for all uprops/unorm tries into one
    510  * set that can be used for all properties.
    511  * As an optimization, there could be less-combined range sets for certain
    512  * groups of properties.
    513  * The relationship of which less-combined range set to use for which property
    514  * depends on the implementation of the properties and must be hardcoded
    515  * - somewhat error-prone and higher maintenance but can be tested easily
    516  * by building property sets "the simple way" in test code.
    517  *
    518  * ---
    519  *
    520  * Do not use a UnicodeSet pattern because that causes infinite recursion;
    521  * UnicodeSet depends on the inclusions set.
    522  *
    523  * ---
    524  *
    525  * uprv_getInclusions() is commented out starting 2004-sep-13 because
    526  * uniset_props.cpp now calls the uxyz_addPropertyStarts() directly,
    527  * and only for the relevant property source.
    528  */
    529 #if 0
    530 
    531 U_CAPI void U_EXPORT2
    532 uprv_getInclusions(const USetAdder *sa, UErrorCode *pErrorCode) {
    533     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    534         return;
    535     }
    536 
    537 #if !UCONFIG_NO_NORMALIZATION
    538     unorm_addPropertyStarts(sa, pErrorCode);
    539 #endif
    540     uchar_addPropertyStarts(sa, pErrorCode);
    541     uhst_addPropertyStarts(sa, pErrorCode);
    542     ucase_addPropertyStarts(ucase_getSingleton(pErrorCode), sa, pErrorCode);
    543     ubidi_addPropertyStarts(ubidi_getSingleton(pErrorCode), sa, pErrorCode);
    544 }
    545 
    546 #endif
    547