Home | History | Annotate | Download | only in common
      1 /*
      2 ********************************************************************************
      3 *   Copyright (C) 1996-2014, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 ********************************************************************************
      6 *
      7 * File UCHAR.C
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   04/02/97    aliu        Creation.
     13 *   4/15/99     Madhu       Updated all the function definitions for C Implementation
     14 *   5/20/99     Madhu       Added the function u_getVersion()
     15 *   8/19/1999   srl         Upgraded scripts to Unicode3.0
     16 *   11/11/1999  weiv        added u_isalnum(), cleaned comments
     17 *   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion.
     18 *   06/20/2000  helena      OS/400 port changes; mostly typecast.
     19 ******************************************************************************
     20 */
     21 
     22 #include "unicode/utypes.h"
     23 #include "unicode/uchar.h"
     24 #include "unicode/uscript.h"
     25 #include "unicode/udata.h"
     26 #include "uassert.h"
     27 #include "cmemory.h"
     28 #include "ucln_cmn.h"
     29 #include "utrie2.h"
     30 #include "udataswp.h"
     31 #include "uprops.h"
     32 #include "ustr_imp.h"
     33 
     34 /* uchar_props_data.h is machine-generated by genprops --csource */
     35 #define INCLUDED_FROM_UCHAR_C
     36 #include "uchar_props_data.h"
     37 
     38 /* constants and macros for access to the data ------------------------------ */
     39 
     40 /* getting a uint32_t properties word from the data */
     41 #define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c));
     42 
     43 U_CFUNC UBool
     44 uprv_haveProperties(UErrorCode *pErrorCode) {
     45     if(U_FAILURE(*pErrorCode)) {
     46         return FALSE;
     47     }
     48     return TRUE;
     49 }
     50 
     51 /* API functions ------------------------------------------------------------ */
     52 
     53 /* Gets the Unicode character's general category.*/
     54 U_CAPI int8_t U_EXPORT2
     55 u_charType(UChar32 c) {
     56     uint32_t props;
     57     GET_PROPS(c, props);
     58     return (int8_t)GET_CATEGORY(props);
     59 }
     60 
     61 /* Enumerate all code points with their general categories. */
     62 struct _EnumTypeCallback {
     63     UCharEnumTypeRange *enumRange;
     64     const void *context;
     65 };
     66 
     67 static uint32_t U_CALLCONV
     68 _enumTypeValue(const void *context, uint32_t value) {
     69     return GET_CATEGORY(value);
     70 }
     71 
     72 static UBool U_CALLCONV
     73 _enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
     74     /* just cast the value to UCharCategory */
     75     return ((struct _EnumTypeCallback *)context)->
     76         enumRange(((struct _EnumTypeCallback *)context)->context,
     77                   start, end+1, (UCharCategory)value);
     78 }
     79 
     80 U_CAPI void U_EXPORT2
     81 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) {
     82     struct _EnumTypeCallback callback;
     83 
     84     if(enumRange==NULL) {
     85         return;
     86     }
     87 
     88     callback.enumRange=enumRange;
     89     callback.context=context;
     90     utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback);
     91 }
     92 
     93 /* Checks if ch is a lower case letter.*/
     94 U_CAPI UBool U_EXPORT2
     95 u_islower(UChar32 c) {
     96     uint32_t props;
     97     GET_PROPS(c, props);
     98     return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER);
     99 }
    100 
    101 /* Checks if ch is an upper case letter.*/
    102 U_CAPI UBool U_EXPORT2
    103 u_isupper(UChar32 c) {
    104     uint32_t props;
    105     GET_PROPS(c, props);
    106     return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER);
    107 }
    108 
    109 /* Checks if ch is a title case letter; usually upper case letters.*/
    110 U_CAPI UBool U_EXPORT2
    111 u_istitle(UChar32 c) {
    112     uint32_t props;
    113     GET_PROPS(c, props);
    114     return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER);
    115 }
    116 
    117 /* Checks if ch is a decimal digit. */
    118 U_CAPI UBool U_EXPORT2
    119 u_isdigit(UChar32 c) {
    120     uint32_t props;
    121     GET_PROPS(c, props);
    122     return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
    123 }
    124 
    125 U_CAPI UBool U_EXPORT2
    126 u_isxdigit(UChar32 c) {
    127     uint32_t props;
    128 
    129     /* check ASCII and Fullwidth ASCII a-fA-F */
    130     if(
    131         (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) ||
    132         (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41))
    133     ) {
    134         return TRUE;
    135     }
    136 
    137     GET_PROPS(c, props);
    138     return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
    139 }
    140 
    141 /* Checks if the Unicode character is a letter.*/
    142 U_CAPI UBool U_EXPORT2
    143 u_isalpha(UChar32 c) {
    144     uint32_t props;
    145     GET_PROPS(c, props);
    146     return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0);
    147 }
    148 
    149 U_CAPI UBool U_EXPORT2
    150 u_isUAlphabetic(UChar32 c) {
    151     return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0;
    152 }
    153 
    154 /* Checks if c is a letter or a decimal digit */
    155 U_CAPI UBool U_EXPORT2
    156 u_isalnum(UChar32 c) {
    157     uint32_t props;
    158     GET_PROPS(c, props);
    159     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0);
    160 }
    161 
    162 /**
    163  * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
    164  * @internal
    165  */
    166 U_CFUNC UBool
    167 u_isalnumPOSIX(UChar32 c) {
    168     return (UBool)(u_isUAlphabetic(c) || u_isdigit(c));
    169 }
    170 
    171 /* Checks if ch is a unicode character with assigned character type.*/
    172 U_CAPI UBool U_EXPORT2
    173 u_isdefined(UChar32 c) {
    174     uint32_t props;
    175     GET_PROPS(c, props);
    176     return (UBool)(GET_CATEGORY(props)!=0);
    177 }
    178 
    179 /* Checks if the Unicode character is a base form character that can take a diacritic.*/
    180 U_CAPI UBool U_EXPORT2
    181 u_isbase(UChar32 c) {
    182     uint32_t props;
    183     GET_PROPS(c, props);
    184     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0);
    185 }
    186 
    187 /* Checks if the Unicode character is a control character.*/
    188 U_CAPI UBool U_EXPORT2
    189 u_iscntrl(UChar32 c) {
    190     uint32_t props;
    191     GET_PROPS(c, props);
    192     return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0);
    193 }
    194 
    195 U_CAPI UBool U_EXPORT2
    196 u_isISOControl(UChar32 c) {
    197     return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f);
    198 }
    199 
    200 /* Some control characters that are used as space. */
    201 #define IS_THAT_CONTROL_SPACE(c) \
    202     (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL))
    203 
    204 /* Java has decided that U+0085 New Line is not whitespace any more. */
    205 #define IS_THAT_ASCII_CONTROL_SPACE(c) \
    206     (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c))
    207 
    208 /* Checks if the Unicode character is a space character.*/
    209 U_CAPI UBool U_EXPORT2
    210 u_isspace(UChar32 c) {
    211     uint32_t props;
    212     GET_PROPS(c, props);
    213     return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c));
    214 }
    215 
    216 U_CAPI UBool U_EXPORT2
    217 u_isJavaSpaceChar(UChar32 c) {
    218     uint32_t props;
    219     GET_PROPS(c, props);
    220     return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0);
    221 }
    222 
    223 /* Checks if the Unicode character is a whitespace character.*/
    224 U_CAPI UBool U_EXPORT2
    225 u_isWhitespace(UChar32 c) {
    226     uint32_t props;
    227     GET_PROPS(c, props);
    228     return (UBool)(
    229                 ((CAT_MASK(props)&U_GC_Z_MASK)!=0 &&
    230                     c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */
    231                 IS_THAT_ASCII_CONTROL_SPACE(c)
    232            );
    233 }
    234 
    235 U_CAPI UBool U_EXPORT2
    236 u_isblank(UChar32 c) {
    237     if((uint32_t)c<=0x9f) {
    238         return c==9 || c==0x20; /* TAB or SPACE */
    239     } else {
    240         /* Zs */
    241         uint32_t props;
    242         GET_PROPS(c, props);
    243         return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR);
    244     }
    245 }
    246 
    247 U_CAPI UBool U_EXPORT2
    248 u_isUWhiteSpace(UChar32 c) {
    249     return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0;
    250 }
    251 
    252 /* Checks if the Unicode character is printable.*/
    253 U_CAPI UBool U_EXPORT2
    254 u_isprint(UChar32 c) {
    255     uint32_t props;
    256     GET_PROPS(c, props);
    257     /* comparing ==0 returns FALSE for the categories mentioned */
    258     return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0);
    259 }
    260 
    261 /**
    262  * Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
    263  * Implements UCHAR_POSIX_PRINT.
    264  * @internal
    265  */
    266 U_CFUNC UBool
    267 u_isprintPOSIX(UChar32 c) {
    268     uint32_t props;
    269     GET_PROPS(c, props);
    270     /*
    271      * The only cntrl character in graph+blank is TAB (in blank).
    272      * Here we implement (blank-TAB)=Zs instead of calling u_isblank().
    273      */
    274     return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c));
    275 }
    276 
    277 U_CAPI UBool U_EXPORT2
    278 u_isgraph(UChar32 c) {
    279     uint32_t props;
    280     GET_PROPS(c, props);
    281     /* comparing ==0 returns FALSE for the categories mentioned */
    282     return (UBool)((CAT_MASK(props)&
    283                     (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
    284                    ==0);
    285 }
    286 
    287 /**
    288  * Checks if c is in
    289  * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
    290  * with space=\p{Whitespace} and Control=Cc.
    291  * Implements UCHAR_POSIX_GRAPH.
    292  * @internal
    293  */
    294 U_CFUNC UBool
    295 u_isgraphPOSIX(UChar32 c) {
    296     uint32_t props;
    297     GET_PROPS(c, props);
    298     /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
    299     /* comparing ==0 returns FALSE for the categories mentioned */
    300     return (UBool)((CAT_MASK(props)&
    301                     (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
    302                    ==0);
    303 }
    304 
    305 U_CAPI UBool U_EXPORT2
    306 u_ispunct(UChar32 c) {
    307     uint32_t props;
    308     GET_PROPS(c, props);
    309     return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0);
    310 }
    311 
    312 /* Checks if the Unicode character can start a Unicode identifier.*/
    313 U_CAPI UBool U_EXPORT2
    314 u_isIDStart(UChar32 c) {
    315     /* same as u_isalpha() */
    316     uint32_t props;
    317     GET_PROPS(c, props);
    318     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0);
    319 }
    320 
    321 /* Checks if the Unicode character can be a Unicode identifier part other than starting the
    322  identifier.*/
    323 U_CAPI UBool U_EXPORT2
    324 u_isIDPart(UChar32 c) {
    325     uint32_t props;
    326     GET_PROPS(c, props);
    327     return (UBool)(
    328            (CAT_MASK(props)&
    329             (U_GC_ND_MASK|U_GC_NL_MASK|
    330              U_GC_L_MASK|
    331              U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK)
    332            )!=0 ||
    333            u_isIDIgnorable(c));
    334 }
    335 
    336 /*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/
    337 U_CAPI UBool U_EXPORT2
    338 u_isIDIgnorable(UChar32 c) {
    339     if(c<=0x9f) {
    340         return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c);
    341     } else {
    342         uint32_t props;
    343         GET_PROPS(c, props);
    344         return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR);
    345     }
    346 }
    347 
    348 /*Checks if the Unicode character can start a Java identifier.*/
    349 U_CAPI UBool U_EXPORT2
    350 u_isJavaIDStart(UChar32 c) {
    351     uint32_t props;
    352     GET_PROPS(c, props);
    353     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0);
    354 }
    355 
    356 /*Checks if the Unicode character can be a Java identifier part other than starting the
    357  * identifier.
    358  */
    359 U_CAPI UBool U_EXPORT2
    360 u_isJavaIDPart(UChar32 c) {
    361     uint32_t props;
    362     GET_PROPS(c, props);
    363     return (UBool)(
    364            (CAT_MASK(props)&
    365             (U_GC_ND_MASK|U_GC_NL_MASK|
    366              U_GC_L_MASK|
    367              U_GC_SC_MASK|U_GC_PC_MASK|
    368              U_GC_MC_MASK|U_GC_MN_MASK)
    369            )!=0 ||
    370            u_isIDIgnorable(c));
    371 }
    372 
    373 U_CAPI int32_t U_EXPORT2
    374 u_charDigitValue(UChar32 c) {
    375     uint32_t props;
    376     int32_t value;
    377     GET_PROPS(c, props);
    378     value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START;
    379     if(value<=9) {
    380         return value;
    381     } else {
    382         return -1;
    383     }
    384 }
    385 
    386 U_CAPI double U_EXPORT2
    387 u_getNumericValue(UChar32 c) {
    388     uint32_t props;
    389     int32_t ntv;
    390     GET_PROPS(c, props);
    391     ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props);
    392 
    393     if(ntv==UPROPS_NTV_NONE) {
    394         return U_NO_NUMERIC_VALUE;
    395     } else if(ntv<UPROPS_NTV_DIGIT_START) {
    396         /* decimal digit */
    397         return ntv-UPROPS_NTV_DECIMAL_START;
    398     } else if(ntv<UPROPS_NTV_NUMERIC_START) {
    399         /* other digit */
    400         return ntv-UPROPS_NTV_DIGIT_START;
    401     } else if(ntv<UPROPS_NTV_FRACTION_START) {
    402         /* small integer */
    403         return ntv-UPROPS_NTV_NUMERIC_START;
    404     } else if(ntv<UPROPS_NTV_LARGE_START) {
    405         /* fraction */
    406         int32_t numerator=(ntv>>4)-12;
    407         int32_t denominator=(ntv&0xf)+1;
    408         return (double)numerator/denominator;
    409     } else if(ntv<UPROPS_NTV_BASE60_START) {
    410         /* large, single-significant-digit integer */
    411         double numValue;
    412         int32_t mant=(ntv>>5)-14;
    413         int32_t exp=(ntv&0x1f)+2;
    414         numValue=mant;
    415 
    416         /* multiply by 10^exp without math.h */
    417         while(exp>=4) {
    418             numValue*=10000.;
    419             exp-=4;
    420         }
    421         switch(exp) {
    422         case 3:
    423             numValue*=1000.;
    424             break;
    425         case 2:
    426             numValue*=100.;
    427             break;
    428         case 1:
    429             numValue*=10.;
    430             break;
    431         case 0:
    432         default:
    433             break;
    434         }
    435 
    436         return numValue;
    437     } else if(ntv<UPROPS_NTV_RESERVED_START) {
    438         /* sexagesimal (base 60) integer */
    439         int32_t numValue=(ntv>>2)-0xbf;
    440         int32_t exp=(ntv&3)+1;
    441 
    442         switch(exp) {
    443         case 4:
    444             numValue*=60*60*60*60;
    445             break;
    446         case 3:
    447             numValue*=60*60*60;
    448             break;
    449         case 2:
    450             numValue*=60*60;
    451             break;
    452         case 1:
    453             numValue*=60;
    454             break;
    455         case 0:
    456         default:
    457             break;
    458         }
    459 
    460         return numValue;
    461     } else {
    462         /* reserved */
    463         return U_NO_NUMERIC_VALUE;
    464     }
    465 }
    466 
    467 U_CAPI int32_t U_EXPORT2
    468 u_digit(UChar32 ch, int8_t radix) {
    469     int8_t value;
    470     if((uint8_t)(radix-2)<=(36-2)) {
    471         value=(int8_t)u_charDigitValue(ch);
    472         if(value<0) {
    473             /* ch is not a decimal digit, try latin letters */
    474             if(ch>=0x61 && ch<=0x7A) {
    475                 value=(int8_t)(ch-0x57);  /* ch - 'a' + 10 */
    476             } else if(ch>=0x41 && ch<=0x5A) {
    477                 value=(int8_t)(ch-0x37);  /* ch - 'A' + 10 */
    478             } else if(ch>=0xFF41 && ch<=0xFF5A) {
    479                 value=(int8_t)(ch-0xFF37);  /* fullwidth ASCII a-z */
    480             } else if(ch>=0xFF21 && ch<=0xFF3A) {
    481                 value=(int8_t)(ch-0xFF17);  /* fullwidth ASCII A-Z */
    482             }
    483         }
    484     } else {
    485         value=-1;   /* invalid radix */
    486     }
    487     return (int8_t)((value<radix) ? value : -1);
    488 }
    489 
    490 U_CAPI UChar32 U_EXPORT2
    491 u_forDigit(int32_t digit, int8_t radix) {
    492     if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) {
    493         return 0;
    494     } else if(digit<10) {
    495         return (UChar32)(0x30+digit);
    496     } else {
    497         return (UChar32)((0x61-10)+digit);
    498     }
    499 }
    500 
    501 /* miscellaneous, and support for uprops.cpp -------------------------------- */
    502 
    503 U_CAPI void U_EXPORT2
    504 u_getUnicodeVersion(UVersionInfo versionArray) {
    505     if(versionArray!=NULL) {
    506         uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH);
    507     }
    508 }
    509 
    510 U_CFUNC uint32_t
    511 u_getMainProperties(UChar32 c) {
    512     uint32_t props;
    513     GET_PROPS(c, props);
    514     return props;
    515 }
    516 
    517 U_CFUNC uint32_t
    518 u_getUnicodeProperties(UChar32 c, int32_t column) {
    519     U_ASSERT(column>=0);
    520     if(column>=propsVectorsColumns) {
    521         return 0;
    522     } else {
    523         uint16_t vecIndex=UTRIE2_GET16(&propsVectorsTrie, c);
    524         return propsVectors[vecIndex+column];
    525     }
    526 }
    527 
    528 U_CFUNC int32_t
    529 uprv_getMaxValues(int32_t column) {
    530     switch(column) {
    531     case 0:
    532         return indexes[UPROPS_MAX_VALUES_INDEX];
    533     case 2:
    534         return indexes[UPROPS_MAX_VALUES_2_INDEX];
    535     default:
    536         return 0;
    537     }
    538 }
    539 
    540 U_CAPI void U_EXPORT2
    541 u_charAge(UChar32 c, UVersionInfo versionArray) {
    542     if(versionArray!=NULL) {
    543         uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT;
    544         versionArray[0]=(uint8_t)(version>>4);
    545         versionArray[1]=(uint8_t)(version&0xf);
    546         versionArray[2]=versionArray[3]=0;
    547     }
    548 }
    549 
    550 U_CAPI UScriptCode U_EXPORT2
    551 uscript_getScript(UChar32 c, UErrorCode *pErrorCode) {
    552     uint32_t scriptX;
    553     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    554         return USCRIPT_INVALID_CODE;
    555     }
    556     if((uint32_t)c>0x10ffff) {
    557         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    558         return USCRIPT_INVALID_CODE;
    559     }
    560     scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
    561     if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
    562         return (UScriptCode)scriptX;
    563     } else if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) {
    564         return USCRIPT_COMMON;
    565     } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) {
    566         return USCRIPT_INHERITED;
    567     } else {
    568         return (UScriptCode)scriptExtensions[scriptX&UPROPS_SCRIPT_MASK];
    569     }
    570 }
    571 
    572 U_CAPI UBool U_EXPORT2
    573 uscript_hasScript(UChar32 c, UScriptCode sc) {
    574     const uint16_t *scx;
    575     uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
    576     if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
    577         return sc==(UScriptCode)scriptX;
    578     }
    579 
    580     scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK);
    581     if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) {
    582         scx=scriptExtensions+scx[1];
    583     }
    584     if(sc>=USCRIPT_CODE_LIMIT) {
    585         /* Guard against bogus input that would make us go past the Script_Extensions terminator. */
    586         return FALSE;
    587     }
    588     while(sc>*scx) {
    589         ++scx;
    590     }
    591     return sc==(*scx&0x7fff);
    592 }
    593 
    594 U_CAPI int32_t U_EXPORT2
    595 uscript_getScriptExtensions(UChar32 c,
    596                             UScriptCode *scripts, int32_t capacity,
    597                             UErrorCode *pErrorCode) {
    598     uint32_t scriptX;
    599     int32_t length;
    600     const uint16_t *scx;
    601     uint16_t sx;
    602     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    603         return 0;
    604     }
    605     if(capacity<0 || (capacity>0 && scripts==NULL)) {
    606         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    607         return 0;
    608     }
    609     scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
    610     if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
    611         if(capacity==0) {
    612             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    613         } else {
    614             scripts[0]=(UScriptCode)scriptX;
    615         }
    616         return 1;
    617     }
    618 
    619     scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK);
    620     if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) {
    621         scx=scriptExtensions+scx[1];
    622     }
    623     length=0;
    624     do {
    625         sx=*scx++;
    626         if(length<capacity) {
    627             scripts[length]=(UScriptCode)(sx&0x7fff);
    628         }
    629         ++length;
    630     } while(sx<0x8000);
    631     if(length>capacity) {
    632         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    633     }
    634     return length;
    635 }
    636 
    637 U_CAPI UBlockCode U_EXPORT2
    638 ublock_getCode(UChar32 c) {
    639     return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT);
    640 }
    641 
    642 /* property starts for UnicodeSet ------------------------------------------- */
    643 
    644 static UBool U_CALLCONV
    645 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
    646     /* add the start code point to the USet */
    647     const USetAdder *sa=(const USetAdder *)context;
    648     sa->add(sa->set, start);
    649     return TRUE;
    650 }
    651 
    652 #define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1)
    653 
    654 U_CFUNC void U_EXPORT2
    655 uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
    656     if(U_FAILURE(*pErrorCode)) {
    657         return;
    658     }
    659 
    660     /* add the start code point of each same-value range of the main trie */
    661     utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa);
    662 
    663     /* add code points with hardcoded properties, plus the ones following them */
    664 
    665     /* add for u_isblank() */
    666     USET_ADD_CP_AND_NEXT(sa, TAB);
    667 
    668     /* add for IS_THAT_CONTROL_SPACE() */
    669     sa->add(sa->set, CR+1); /* range TAB..CR */
    670     sa->add(sa->set, 0x1c);
    671     sa->add(sa->set, 0x1f+1);
    672     USET_ADD_CP_AND_NEXT(sa, NL);
    673 
    674     /* add for u_isIDIgnorable() what was not added above */
    675     sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */
    676     sa->add(sa->set, HAIRSP);
    677     sa->add(sa->set, RLM+1);
    678     sa->add(sa->set, INHSWAP);
    679     sa->add(sa->set, NOMDIG+1);
    680     USET_ADD_CP_AND_NEXT(sa, ZWNBSP);
    681 
    682     /* add no-break spaces for u_isWhitespace() what was not added above */
    683     USET_ADD_CP_AND_NEXT(sa, NBSP);
    684     USET_ADD_CP_AND_NEXT(sa, FIGURESP);
    685     USET_ADD_CP_AND_NEXT(sa, NNBSP);
    686 
    687     /* add for u_digit() */
    688     sa->add(sa->set, U_a);
    689     sa->add(sa->set, U_z+1);
    690     sa->add(sa->set, U_A);
    691     sa->add(sa->set, U_Z+1);
    692     sa->add(sa->set, U_FW_a);
    693     sa->add(sa->set, U_FW_z+1);
    694     sa->add(sa->set, U_FW_A);
    695     sa->add(sa->set, U_FW_Z+1);
    696 
    697     /* add for u_isxdigit() */
    698     sa->add(sa->set, U_f+1);
    699     sa->add(sa->set, U_F+1);
    700     sa->add(sa->set, U_FW_f+1);
    701     sa->add(sa->set, U_FW_F+1);
    702 
    703     /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
    704     sa->add(sa->set, WJ); /* range WJ..NOMDIG */
    705     sa->add(sa->set, 0xfff0);
    706     sa->add(sa->set, 0xfffb+1);
    707     sa->add(sa->set, 0xe0000);
    708     sa->add(sa->set, 0xe0fff+1);
    709 
    710     /* add for UCHAR_GRAPHEME_BASE and others */
    711     USET_ADD_CP_AND_NEXT(sa, CGJ);
    712 }
    713 
    714 U_CFUNC void U_EXPORT2
    715 upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
    716     if(U_FAILURE(*pErrorCode)) {
    717         return;
    718     }
    719 
    720     /* add the start code point of each same-value range of the properties vectors trie */
    721     if(propsVectorsColumns>0) {
    722         /* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */
    723         utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
    724     }
    725 }
    726