Home | History | Annotate | Download | only in common
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ********************************************************************************
      5 *   Copyright (C) 1996-2016, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 ********************************************************************************
      8 *
      9 * File UCHAR.C
     10 *
     11 * Modification History:
     12 *
     13 *   Date        Name        Description
     14 *   04/02/97    aliu        Creation.
     15 *   4/15/99     Madhu       Updated all the function definitions for C Implementation
     16 *   5/20/99     Madhu       Added the function u_getVersion()
     17 *   8/19/1999   srl         Upgraded scripts to Unicode3.0
     18 *   11/11/1999  weiv        added u_isalnum(), cleaned comments
     19 *   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion.
     20 *   06/20/2000  helena      OS/400 port changes; mostly typecast.
     21 ******************************************************************************
     22 */
     23 
     24 #include "unicode/utypes.h"
     25 #include "unicode/uchar.h"
     26 #include "unicode/uscript.h"
     27 #include "unicode/udata.h"
     28 #include "uassert.h"
     29 #include "cmemory.h"
     30 #include "ucln_cmn.h"
     31 #include "utrie2.h"
     32 #include "udataswp.h"
     33 #include "uprops.h"
     34 #include "ustr_imp.h"
     35 
     36 /* uchar_props_data.h is machine-generated by genprops --csource */
     37 #define INCLUDED_FROM_UCHAR_C
     38 #include "uchar_props_data.h"
     39 
     40 /* constants and macros for access to the data ------------------------------ */
     41 
     42 /* getting a uint32_t properties word from the data */
     43 #define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c));
     44 
     45 U_CFUNC UBool
     46 uprv_haveProperties(UErrorCode *pErrorCode) {
     47     if(U_FAILURE(*pErrorCode)) {
     48         return FALSE;
     49     }
     50     return TRUE;
     51 }
     52 
     53 /* API functions ------------------------------------------------------------ */
     54 
     55 /* Gets the Unicode character's general category.*/
     56 U_CAPI int8_t U_EXPORT2
     57 u_charType(UChar32 c) {
     58     uint32_t props;
     59     GET_PROPS(c, props);
     60     return (int8_t)GET_CATEGORY(props);
     61 }
     62 
     63 /* Enumerate all code points with their general categories. */
     64 struct _EnumTypeCallback {
     65     UCharEnumTypeRange *enumRange;
     66     const void *context;
     67 };
     68 
     69 static uint32_t U_CALLCONV
     70 _enumTypeValue(const void *context, uint32_t value) {
     71     (void)context;
     72     return GET_CATEGORY(value);
     73 }
     74 
     75 static UBool U_CALLCONV
     76 _enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
     77     /* just cast the value to UCharCategory */
     78     return ((struct _EnumTypeCallback *)context)->
     79         enumRange(((struct _EnumTypeCallback *)context)->context,
     80                   start, end+1, (UCharCategory)value);
     81 }
     82 
     83 U_CAPI void U_EXPORT2
     84 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) {
     85     struct _EnumTypeCallback callback;
     86 
     87     if(enumRange==NULL) {
     88         return;
     89     }
     90 
     91     callback.enumRange=enumRange;
     92     callback.context=context;
     93     utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback);
     94 }
     95 
     96 /* Checks if ch is a lower case letter.*/
     97 U_CAPI UBool U_EXPORT2
     98 u_islower(UChar32 c) {
     99     uint32_t props;
    100     GET_PROPS(c, props);
    101     return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER);
    102 }
    103 
    104 /* Checks if ch is an upper case letter.*/
    105 U_CAPI UBool U_EXPORT2
    106 u_isupper(UChar32 c) {
    107     uint32_t props;
    108     GET_PROPS(c, props);
    109     return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER);
    110 }
    111 
    112 /* Checks if ch is a title case letter; usually upper case letters.*/
    113 U_CAPI UBool U_EXPORT2
    114 u_istitle(UChar32 c) {
    115     uint32_t props;
    116     GET_PROPS(c, props);
    117     return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER);
    118 }
    119 
    120 /* Checks if ch is a decimal digit. */
    121 U_CAPI UBool U_EXPORT2
    122 u_isdigit(UChar32 c) {
    123     uint32_t props;
    124     GET_PROPS(c, props);
    125     return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
    126 }
    127 
    128 U_CAPI UBool U_EXPORT2
    129 u_isxdigit(UChar32 c) {
    130     uint32_t props;
    131 
    132     /* check ASCII and Fullwidth ASCII a-fA-F */
    133     if(
    134         (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) ||
    135         (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41))
    136     ) {
    137         return TRUE;
    138     }
    139 
    140     GET_PROPS(c, props);
    141     return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
    142 }
    143 
    144 /* Checks if the Unicode character is a letter.*/
    145 U_CAPI UBool U_EXPORT2
    146 u_isalpha(UChar32 c) {
    147     uint32_t props;
    148     GET_PROPS(c, props);
    149     return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0);
    150 }
    151 
    152 U_CAPI UBool U_EXPORT2
    153 u_isUAlphabetic(UChar32 c) {
    154     return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0;
    155 }
    156 
    157 /* Checks if c is a letter or a decimal digit */
    158 U_CAPI UBool U_EXPORT2
    159 u_isalnum(UChar32 c) {
    160     uint32_t props;
    161     GET_PROPS(c, props);
    162     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0);
    163 }
    164 
    165 /**
    166  * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
    167  * @internal
    168  */
    169 U_CFUNC UBool
    170 u_isalnumPOSIX(UChar32 c) {
    171     return (UBool)(u_isUAlphabetic(c) || u_isdigit(c));
    172 }
    173 
    174 /* Checks if ch is a unicode character with assigned character type.*/
    175 U_CAPI UBool U_EXPORT2
    176 u_isdefined(UChar32 c) {
    177     uint32_t props;
    178     GET_PROPS(c, props);
    179     return (UBool)(GET_CATEGORY(props)!=0);
    180 }
    181 
    182 /* Checks if the Unicode character is a base form character that can take a diacritic.*/
    183 U_CAPI UBool U_EXPORT2
    184 u_isbase(UChar32 c) {
    185     uint32_t props;
    186     GET_PROPS(c, props);
    187     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0);
    188 }
    189 
    190 /* Checks if the Unicode character is a control character.*/
    191 U_CAPI UBool U_EXPORT2
    192 u_iscntrl(UChar32 c) {
    193     uint32_t props;
    194     GET_PROPS(c, props);
    195     return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0);
    196 }
    197 
    198 U_CAPI UBool U_EXPORT2
    199 u_isISOControl(UChar32 c) {
    200     return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f);
    201 }
    202 
    203 /* Some control characters that are used as space. */
    204 #define IS_THAT_CONTROL_SPACE(c) \
    205     (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL))
    206 
    207 /* Java has decided that U+0085 New Line is not whitespace any more. */
    208 #define IS_THAT_ASCII_CONTROL_SPACE(c) \
    209     (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c))
    210 
    211 /* Checks if the Unicode character is a space character.*/
    212 U_CAPI UBool U_EXPORT2
    213 u_isspace(UChar32 c) {
    214     uint32_t props;
    215     GET_PROPS(c, props);
    216     return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c));
    217 }
    218 
    219 U_CAPI UBool U_EXPORT2
    220 u_isJavaSpaceChar(UChar32 c) {
    221     uint32_t props;
    222     GET_PROPS(c, props);
    223     return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0);
    224 }
    225 
    226 /* Checks if the Unicode character is a whitespace character.*/
    227 U_CAPI UBool U_EXPORT2
    228 u_isWhitespace(UChar32 c) {
    229     uint32_t props;
    230     GET_PROPS(c, props);
    231     return (UBool)(
    232                 ((CAT_MASK(props)&U_GC_Z_MASK)!=0 &&
    233                     c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */
    234                 IS_THAT_ASCII_CONTROL_SPACE(c)
    235            );
    236 }
    237 
    238 U_CAPI UBool U_EXPORT2
    239 u_isblank(UChar32 c) {
    240     if((uint32_t)c<=0x9f) {
    241         return c==9 || c==0x20; /* TAB or SPACE */
    242     } else {
    243         /* Zs */
    244         uint32_t props;
    245         GET_PROPS(c, props);
    246         return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR);
    247     }
    248 }
    249 
    250 U_CAPI UBool U_EXPORT2
    251 u_isUWhiteSpace(UChar32 c) {
    252     return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0;
    253 }
    254 
    255 /* Checks if the Unicode character is printable.*/
    256 U_CAPI UBool U_EXPORT2
    257 u_isprint(UChar32 c) {
    258     uint32_t props;
    259     GET_PROPS(c, props);
    260     /* comparing ==0 returns FALSE for the categories mentioned */
    261     return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0);
    262 }
    263 
    264 /**
    265  * Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
    266  * Implements UCHAR_POSIX_PRINT.
    267  * @internal
    268  */
    269 U_CFUNC UBool
    270 u_isprintPOSIX(UChar32 c) {
    271     uint32_t props;
    272     GET_PROPS(c, props);
    273     /*
    274      * The only cntrl character in graph+blank is TAB (in blank).
    275      * Here we implement (blank-TAB)=Zs instead of calling u_isblank().
    276      */
    277     return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c));
    278 }
    279 
    280 U_CAPI UBool U_EXPORT2
    281 u_isgraph(UChar32 c) {
    282     uint32_t props;
    283     GET_PROPS(c, props);
    284     /* comparing ==0 returns FALSE for the categories mentioned */
    285     return (UBool)((CAT_MASK(props)&
    286                     (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
    287                    ==0);
    288 }
    289 
    290 /**
    291  * Checks if c is in
    292  * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
    293  * with space=\p{Whitespace} and Control=Cc.
    294  * Implements UCHAR_POSIX_GRAPH.
    295  * @internal
    296  */
    297 U_CFUNC UBool
    298 u_isgraphPOSIX(UChar32 c) {
    299     uint32_t props;
    300     GET_PROPS(c, props);
    301     /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
    302     /* comparing ==0 returns FALSE for the categories mentioned */
    303     return (UBool)((CAT_MASK(props)&
    304                     (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
    305                    ==0);
    306 }
    307 
    308 U_CAPI UBool U_EXPORT2
    309 u_ispunct(UChar32 c) {
    310     uint32_t props;
    311     GET_PROPS(c, props);
    312     return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0);
    313 }
    314 
    315 /* Checks if the Unicode character can start a Unicode identifier.*/
    316 U_CAPI UBool U_EXPORT2
    317 u_isIDStart(UChar32 c) {
    318     /* same as u_isalpha() */
    319     uint32_t props;
    320     GET_PROPS(c, props);
    321     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0);
    322 }
    323 
    324 /* Checks if the Unicode character can be a Unicode identifier part other than starting the
    325  identifier.*/
    326 U_CAPI UBool U_EXPORT2
    327 u_isIDPart(UChar32 c) {
    328     uint32_t props;
    329     GET_PROPS(c, props);
    330     return (UBool)(
    331            (CAT_MASK(props)&
    332             (U_GC_ND_MASK|U_GC_NL_MASK|
    333              U_GC_L_MASK|
    334              U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK)
    335            )!=0 ||
    336            u_isIDIgnorable(c));
    337 }
    338 
    339 /*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/
    340 U_CAPI UBool U_EXPORT2
    341 u_isIDIgnorable(UChar32 c) {
    342     if(c<=0x9f) {
    343         return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c);
    344     } else {
    345         uint32_t props;
    346         GET_PROPS(c, props);
    347         return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR);
    348     }
    349 }
    350 
    351 /*Checks if the Unicode character can start a Java identifier.*/
    352 U_CAPI UBool U_EXPORT2
    353 u_isJavaIDStart(UChar32 c) {
    354     uint32_t props;
    355     GET_PROPS(c, props);
    356     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0);
    357 }
    358 
    359 /*Checks if the Unicode character can be a Java identifier part other than starting the
    360  * identifier.
    361  */
    362 U_CAPI UBool U_EXPORT2
    363 u_isJavaIDPart(UChar32 c) {
    364     uint32_t props;
    365     GET_PROPS(c, props);
    366     return (UBool)(
    367            (CAT_MASK(props)&
    368             (U_GC_ND_MASK|U_GC_NL_MASK|
    369              U_GC_L_MASK|
    370              U_GC_SC_MASK|U_GC_PC_MASK|
    371              U_GC_MC_MASK|U_GC_MN_MASK)
    372            )!=0 ||
    373            u_isIDIgnorable(c));
    374 }
    375 
    376 U_CAPI int32_t U_EXPORT2
    377 u_charDigitValue(UChar32 c) {
    378     uint32_t props;
    379     int32_t value;
    380     GET_PROPS(c, props);
    381     value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START;
    382     if(value<=9) {
    383         return value;
    384     } else {
    385         return -1;
    386     }
    387 }
    388 
    389 U_CAPI double U_EXPORT2
    390 u_getNumericValue(UChar32 c) {
    391     uint32_t props;
    392     int32_t ntv;
    393     GET_PROPS(c, props);
    394     ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props);
    395 
    396     if(ntv==UPROPS_NTV_NONE) {
    397         return U_NO_NUMERIC_VALUE;
    398     } else if(ntv<UPROPS_NTV_DIGIT_START) {
    399         /* decimal digit */
    400         return ntv-UPROPS_NTV_DECIMAL_START;
    401     } else if(ntv<UPROPS_NTV_NUMERIC_START) {
    402         /* other digit */
    403         return ntv-UPROPS_NTV_DIGIT_START;
    404     } else if(ntv<UPROPS_NTV_FRACTION_START) {
    405         /* small integer */
    406         return ntv-UPROPS_NTV_NUMERIC_START;
    407     } else if(ntv<UPROPS_NTV_LARGE_START) {
    408         /* fraction */
    409         int32_t numerator=(ntv>>4)-12;
    410         int32_t denominator=(ntv&0xf)+1;
    411         return (double)numerator/denominator;
    412     } else if(ntv<UPROPS_NTV_BASE60_START) {
    413         /* large, single-significant-digit integer */
    414         double numValue;
    415         int32_t mant=(ntv>>5)-14;
    416         int32_t exp=(ntv&0x1f)+2;
    417         numValue=mant;
    418 
    419         /* multiply by 10^exp without math.h */
    420         while(exp>=4) {
    421             numValue*=10000.;
    422             exp-=4;
    423         }
    424         switch(exp) {
    425         case 3:
    426             numValue*=1000.;
    427             break;
    428         case 2:
    429             numValue*=100.;
    430             break;
    431         case 1:
    432             numValue*=10.;
    433             break;
    434         case 0:
    435         default:
    436             break;
    437         }
    438 
    439         return numValue;
    440     } else if(ntv<UPROPS_NTV_FRACTION20_START) {
    441         /* sexagesimal (base 60) integer */
    442         int32_t numValue=(ntv>>2)-0xbf;
    443         int32_t exp=(ntv&3)+1;
    444 
    445         switch(exp) {
    446         case 4:
    447             numValue*=60*60*60*60;
    448             break;
    449         case 3:
    450             numValue*=60*60*60;
    451             break;
    452         case 2:
    453             numValue*=60*60;
    454             break;
    455         case 1:
    456             numValue*=60;
    457             break;
    458         case 0:
    459         default:
    460             break;
    461         }
    462 
    463         return numValue;
    464     } else if(ntv<UPROPS_NTV_RESERVED_START) {
    465         // fraction-20 e.g. 3/80
    466         int32_t frac20=ntv-UPROPS_NTV_FRACTION20_START;  // 0..0x17
    467         int32_t numerator=2*(frac20&3)+1;
    468         int32_t denominator=20<<(frac20>>2);
    469         return (double)numerator/denominator;
    470     } else {
    471         /* reserved */
    472         return U_NO_NUMERIC_VALUE;
    473     }
    474 }
    475 
    476 U_CAPI int32_t U_EXPORT2
    477 u_digit(UChar32 ch, int8_t radix) {
    478     int8_t value;
    479     if((uint8_t)(radix-2)<=(36-2)) {
    480         value=(int8_t)u_charDigitValue(ch);
    481         if(value<0) {
    482             /* ch is not a decimal digit, try latin letters */
    483             if(ch>=0x61 && ch<=0x7A) {
    484                 value=(int8_t)(ch-0x57);  /* ch - 'a' + 10 */
    485             } else if(ch>=0x41 && ch<=0x5A) {
    486                 value=(int8_t)(ch-0x37);  /* ch - 'A' + 10 */
    487             } else if(ch>=0xFF41 && ch<=0xFF5A) {
    488                 value=(int8_t)(ch-0xFF37);  /* fullwidth ASCII a-z */
    489             } else if(ch>=0xFF21 && ch<=0xFF3A) {
    490                 value=(int8_t)(ch-0xFF17);  /* fullwidth ASCII A-Z */
    491             }
    492         }
    493     } else {
    494         value=-1;   /* invalid radix */
    495     }
    496     return (int8_t)((value<radix) ? value : -1);
    497 }
    498 
    499 U_CAPI UChar32 U_EXPORT2
    500 u_forDigit(int32_t digit, int8_t radix) {
    501     if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) {
    502         return 0;
    503     } else if(digit<10) {
    504         return (UChar32)(0x30+digit);
    505     } else {
    506         return (UChar32)((0x61-10)+digit);
    507     }
    508 }
    509 
    510 /* miscellaneous, and support for uprops.cpp -------------------------------- */
    511 
    512 U_CAPI void U_EXPORT2
    513 u_getUnicodeVersion(UVersionInfo versionArray) {
    514     if(versionArray!=NULL) {
    515         uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH);
    516     }
    517 }
    518 
    519 U_CFUNC uint32_t
    520 u_getMainProperties(UChar32 c) {
    521     uint32_t props;
    522     GET_PROPS(c, props);
    523     return props;
    524 }
    525 
    526 U_CFUNC uint32_t
    527 u_getUnicodeProperties(UChar32 c, int32_t column) {
    528     U_ASSERT(column>=0);
    529     if(column>=propsVectorsColumns) {
    530         return 0;
    531     } else {
    532         uint16_t vecIndex=UTRIE2_GET16(&propsVectorsTrie, c);
    533         return propsVectors[vecIndex+column];
    534     }
    535 }
    536 
    537 U_CFUNC int32_t
    538 uprv_getMaxValues(int32_t column) {
    539     switch(column) {
    540     case 0:
    541         return indexes[UPROPS_MAX_VALUES_INDEX];
    542     case 2:
    543         return indexes[UPROPS_MAX_VALUES_2_INDEX];
    544     default:
    545         return 0;
    546     }
    547 }
    548 
    549 U_CAPI void U_EXPORT2
    550 u_charAge(UChar32 c, UVersionInfo versionArray) {
    551     if(versionArray!=NULL) {
    552         uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT;
    553         versionArray[0]=(uint8_t)(version>>4);
    554         versionArray[1]=(uint8_t)(version&0xf);
    555         versionArray[2]=versionArray[3]=0;
    556     }
    557 }
    558 
    559 U_CAPI UScriptCode U_EXPORT2
    560 uscript_getScript(UChar32 c, UErrorCode *pErrorCode) {
    561     uint32_t scriptX;
    562     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    563         return USCRIPT_INVALID_CODE;
    564     }
    565     if((uint32_t)c>0x10ffff) {
    566         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    567         return USCRIPT_INVALID_CODE;
    568     }
    569     scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
    570     if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
    571         return (UScriptCode)scriptX;
    572     } else if(scriptX<UPROPS_SCRIPT_X_WITH_INHERITED) {
    573         return USCRIPT_COMMON;
    574     } else if(scriptX<UPROPS_SCRIPT_X_WITH_OTHER) {
    575         return USCRIPT_INHERITED;
    576     } else {
    577         return (UScriptCode)scriptExtensions[scriptX&UPROPS_SCRIPT_MASK];
    578     }
    579 }
    580 
    581 U_CAPI UBool U_EXPORT2
    582 uscript_hasScript(UChar32 c, UScriptCode sc) {
    583     const uint16_t *scx;
    584     uint32_t scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
    585     if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
    586         return sc==(UScriptCode)scriptX;
    587     }
    588 
    589     scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK);
    590     if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) {
    591         scx=scriptExtensions+scx[1];
    592     }
    593     if(sc>=USCRIPT_CODE_LIMIT) {
    594         /* Guard against bogus input that would make us go past the Script_Extensions terminator. */
    595         return FALSE;
    596     }
    597     while(sc>*scx) {
    598         ++scx;
    599     }
    600     return sc==(*scx&0x7fff);
    601 }
    602 
    603 U_CAPI int32_t U_EXPORT2
    604 uscript_getScriptExtensions(UChar32 c,
    605                             UScriptCode *scripts, int32_t capacity,
    606                             UErrorCode *pErrorCode) {
    607     uint32_t scriptX;
    608     int32_t length;
    609     const uint16_t *scx;
    610     uint16_t sx;
    611     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    612         return 0;
    613     }
    614     if(capacity<0 || (capacity>0 && scripts==NULL)) {
    615         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    616         return 0;
    617     }
    618     scriptX=u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_X_MASK;
    619     if(scriptX<UPROPS_SCRIPT_X_WITH_COMMON) {
    620         if(capacity==0) {
    621             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    622         } else {
    623             scripts[0]=(UScriptCode)scriptX;
    624         }
    625         return 1;
    626     }
    627 
    628     scx=scriptExtensions+(scriptX&UPROPS_SCRIPT_MASK);
    629     if(scriptX>=UPROPS_SCRIPT_X_WITH_OTHER) {
    630         scx=scriptExtensions+scx[1];
    631     }
    632     length=0;
    633     do {
    634         sx=*scx++;
    635         if(length<capacity) {
    636             scripts[length]=(UScriptCode)(sx&0x7fff);
    637         }
    638         ++length;
    639     } while(sx<0x8000);
    640     if(length>capacity) {
    641         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    642     }
    643     return length;
    644 }
    645 
    646 U_CAPI UBlockCode U_EXPORT2
    647 ublock_getCode(UChar32 c) {
    648     return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT);
    649 }
    650 
    651 /* property starts for UnicodeSet ------------------------------------------- */
    652 
    653 static UBool U_CALLCONV
    654 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
    655     /* add the start code point to the USet */
    656     const USetAdder *sa=(const USetAdder *)context;
    657     sa->add(sa->set, start);
    658     (void)end;
    659     (void)value;
    660     return TRUE;
    661 }
    662 
    663 #define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1)
    664 
    665 U_CFUNC void U_EXPORT2
    666 uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
    667     if(U_FAILURE(*pErrorCode)) {
    668         return;
    669     }
    670 
    671     /* add the start code point of each same-value range of the main trie */
    672     utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa);
    673 
    674     /* add code points with hardcoded properties, plus the ones following them */
    675 
    676     /* add for u_isblank() */
    677     USET_ADD_CP_AND_NEXT(sa, TAB);
    678 
    679     /* add for IS_THAT_CONTROL_SPACE() */
    680     sa->add(sa->set, CR+1); /* range TAB..CR */
    681     sa->add(sa->set, 0x1c);
    682     sa->add(sa->set, 0x1f+1);
    683     USET_ADD_CP_AND_NEXT(sa, NL);
    684 
    685     /* add for u_isIDIgnorable() what was not added above */
    686     sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */
    687     sa->add(sa->set, HAIRSP);
    688     sa->add(sa->set, RLM+1);
    689     sa->add(sa->set, INHSWAP);
    690     sa->add(sa->set, NOMDIG+1);
    691     USET_ADD_CP_AND_NEXT(sa, ZWNBSP);
    692 
    693     /* add no-break spaces for u_isWhitespace() what was not added above */
    694     USET_ADD_CP_AND_NEXT(sa, NBSP);
    695     USET_ADD_CP_AND_NEXT(sa, FIGURESP);
    696     USET_ADD_CP_AND_NEXT(sa, NNBSP);
    697 
    698     /* add for u_digit() */
    699     sa->add(sa->set, U_a);
    700     sa->add(sa->set, U_z+1);
    701     sa->add(sa->set, U_A);
    702     sa->add(sa->set, U_Z+1);
    703     sa->add(sa->set, U_FW_a);
    704     sa->add(sa->set, U_FW_z+1);
    705     sa->add(sa->set, U_FW_A);
    706     sa->add(sa->set, U_FW_Z+1);
    707 
    708     /* add for u_isxdigit() */
    709     sa->add(sa->set, U_f+1);
    710     sa->add(sa->set, U_F+1);
    711     sa->add(sa->set, U_FW_f+1);
    712     sa->add(sa->set, U_FW_F+1);
    713 
    714     /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
    715     sa->add(sa->set, WJ); /* range WJ..NOMDIG */
    716     sa->add(sa->set, 0xfff0);
    717     sa->add(sa->set, 0xfffb+1);
    718     sa->add(sa->set, 0xe0000);
    719     sa->add(sa->set, 0xe0fff+1);
    720 
    721     /* add for UCHAR_GRAPHEME_BASE and others */
    722     USET_ADD_CP_AND_NEXT(sa, CGJ);
    723 }
    724 
    725 U_CFUNC void U_EXPORT2
    726 upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
    727     if(U_FAILURE(*pErrorCode)) {
    728         return;
    729     }
    730 
    731     /* add the start code point of each same-value range of the properties vectors trie */
    732     utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
    733 }
    734