Home | History | Annotate | Download | only in common
      1 /*
      2 ********************************************************************************
      3 *   Copyright (C) 1996-2010, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 ********************************************************************************
      6 *
      7 * File UCHAR.C
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   04/02/97    aliu        Creation.
     13 *   4/15/99     Madhu       Updated all the function definitions for C Implementation
     14 *   5/20/99     Madhu       Added the function u_getVersion()
     15 *   8/19/1999   srl         Upgraded scripts to Unicode3.0
     16 *   11/11/1999  weiv        added u_isalnum(), cleaned comments
     17 *   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion.
     18 *   06/20/2000  helena      OS/400 port changes; mostly typecast.
     19 ******************************************************************************
     20 */
     21 
     22 #include "unicode/utypes.h"
     23 #include "unicode/uchar.h"
     24 #include "unicode/uscript.h"
     25 #include "unicode/udata.h"
     26 #include "umutex.h"
     27 #include "cmemory.h"
     28 #include "ucln_cmn.h"
     29 #include "utrie2.h"
     30 #include "udataswp.h"
     31 #include "uprops.h"
     32 
     33 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     34 
     35 /* dynamically loaded Unicode character properties -------------------------- */
     36 
     37 #define UCHAR_HARDCODE_DATA 1
     38 
     39 #if UCHAR_HARDCODE_DATA
     40 
     41 /* uchar_props_data.c is machine-generated by genprops --csource */
     42 #include "uchar_props_data.c"
     43 
     44 #else
     45 
     46 /*
     47  * loaded uprops.dat -
     48  * for a description of the file format, see icu/source/tools/genprops/store.c
     49  */
     50 static const char DATA_NAME[] = "uprops";
     51 static const char DATA_TYPE[] = "icu";
     52 
     53 static UDataMemory *propsData=NULL;
     54 static UErrorCode dataErrorCode=U_ZERO_ERROR;
     55 
     56 static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
     57 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
     58 
     59 static UTrie propsTrie={ 0 }, propsVectorsTrie={ 0 };
     60 static const uint32_t *pData32=NULL, *propsVectors=NULL;
     61 static int32_t countPropsVectors=0, propsVectorsColumns=0;
     62 
     63 static int8_t havePropsData=0;     /*  == 0   ->  Data has not been loaded.
     64                                     *   < 0   ->  Error occured attempting to load data.
     65                                     *   > 0   ->  Data has been successfully loaded.
     66                                     */
     67 
     68 /* index values loaded from uprops.dat */
     69 static int32_t indexes[UPROPS_INDEX_COUNT];
     70 
     71 static UBool U_CALLCONV
     72 isAcceptable(void *context,
     73              const char *type, const char *name,
     74              const UDataInfo *pInfo) {
     75     if(
     76         pInfo->size>=20 &&
     77         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     78         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     79         pInfo->dataFormat[0]==0x55 &&   /* dataFormat="UPro" */
     80         pInfo->dataFormat[1]==0x50 &&
     81         pInfo->dataFormat[2]==0x72 &&
     82         pInfo->dataFormat[3]==0x6f &&
     83         pInfo->formatVersion[0]==4 &&
     84         pInfo->formatVersion[2]==UTRIE_SHIFT &&
     85         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
     86     ) {
     87         uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
     88         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
     89         return TRUE;
     90     } else {
     91         return FALSE;
     92     }
     93 }
     94 
     95 static UBool U_CALLCONV uchar_cleanup(void)
     96 {
     97     if (propsData) {
     98         udata_close(propsData);
     99         propsData=NULL;
    100     }
    101     pData32=NULL;
    102     propsVectors=NULL;
    103     countPropsVectors=0;
    104     uprv_memset(dataVersion, 0, U_MAX_VERSION_LENGTH);
    105     dataErrorCode=U_ZERO_ERROR;
    106     havePropsData=0;
    107 
    108     return TRUE;
    109 }
    110 
    111 struct UCharProps {
    112     UDataMemory *propsData;
    113     UTrie propsTrie, propsVectorsTrie;
    114     const uint32_t *pData32;
    115 };
    116 typedef struct UCharProps UCharProps;
    117 
    118 /* open uprops.icu */
    119 static void
    120 _openProps(UCharProps *ucp, UErrorCode *pErrorCode) {
    121     const uint32_t *p;
    122     int32_t length;
    123 
    124     ucp->propsData=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
    125     if(U_FAILURE(*pErrorCode)) {
    126         return;
    127     }
    128 
    129     ucp->pData32=p=(const uint32_t *)udata_getMemory(ucp->propsData);
    130 
    131     /* unserialize the trie; it is directly after the int32_t indexes[UPROPS_INDEX_COUNT] */
    132     length=(int32_t)p[UPROPS_PROPS32_INDEX]*4;
    133     length=utrie_unserialize(&ucp->propsTrie, (const uint8_t *)(p+UPROPS_INDEX_COUNT), length-64, pErrorCode);
    134     if(U_FAILURE(*pErrorCode)) {
    135         return;
    136     }
    137 
    138     /* unserialize the properties vectors trie */
    139     length=(int32_t)(p[UPROPS_ADDITIONAL_VECTORS_INDEX]-p[UPROPS_ADDITIONAL_TRIE_INDEX])*4;
    140     if(length>0) {
    141         length=utrie_unserialize(&ucp->propsVectorsTrie, (const uint8_t *)(p+p[UPROPS_ADDITIONAL_TRIE_INDEX]), length, pErrorCode);
    142     }
    143     if(length<=0 || U_FAILURE(*pErrorCode)) {
    144         /*
    145          * length==0:
    146          * Allow the properties vectors trie to be missing -
    147          * also requires propsVectorsColumns=indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]
    148          * to be zero so that this trie is never accessed.
    149          */
    150         uprv_memset(&ucp->propsVectorsTrie, 0, sizeof(ucp->propsVectorsTrie));
    151     }
    152 }
    153 
    154 #endif
    155 
    156 #if !UCHAR_HARDCODE_DATA
    157 static int8_t
    158 uprv_loadPropsData(UErrorCode *pErrorCode) {
    159     /* load Unicode character properties data from file if necessary */
    160 
    161     /*
    162      * This lazy intialization with double-checked locking (without mutex protection for
    163      * haveNormData==0) is transiently unsafe under certain circumstances.
    164      * Check the readme and use u_init() if necessary.
    165      */
    166     if(havePropsData==0) {
    167         UCharProps ucp={ NULL };
    168 
    169         if(U_FAILURE(*pErrorCode)) {
    170             return havePropsData;
    171         }
    172 
    173         /* open the data outside the mutex block */
    174         _openProps(&ucp, pErrorCode);
    175 
    176         if(U_SUCCESS(*pErrorCode)) {
    177             /* in the mutex block, set the data for this process */
    178             umtx_lock(NULL);
    179             if(propsData==NULL) {
    180                 propsData=ucp.propsData;
    181                 ucp.propsData=NULL;
    182                 pData32=ucp.pData32;
    183                 ucp.pData32=NULL;
    184                 uprv_memcpy(&propsTrie, &ucp.propsTrie, sizeof(propsTrie));
    185                 uprv_memcpy(&propsVectorsTrie, &ucp.propsVectorsTrie, sizeof(propsVectorsTrie));
    186             }
    187 
    188             /* initialize some variables */
    189             uprv_memcpy(indexes, pData32, sizeof(indexes));
    190 
    191             /* additional properties */
    192             if(indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]!=0) {
    193                 propsVectors=pData32+indexes[UPROPS_ADDITIONAL_VECTORS_INDEX];
    194                 countPropsVectors=indexes[UPROPS_RESERVED_INDEX]-indexes[UPROPS_ADDITIONAL_VECTORS_INDEX];
    195                 propsVectorsColumns=indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX];
    196             }
    197 
    198             havePropsData=1;
    199             umtx_unlock(NULL);
    200         } else {
    201             dataErrorCode=*pErrorCode;
    202             havePropsData=-1;
    203         }
    204         ucln_common_registerCleanup(UCLN_COMMON_UCHAR, uchar_cleanup);
    205 
    206         /* if a different thread set it first, then close the extra data */
    207         udata_close(ucp.propsData); /* NULL if it was set correctly */
    208     }
    209 
    210     return havePropsData;
    211 }
    212 
    213 static int8_t
    214 loadPropsData(void) {
    215     UErrorCode   errorCode = U_ZERO_ERROR;
    216     int8_t       retVal    = uprv_loadPropsData(&errorCode);
    217     return retVal;
    218 }
    219 
    220 #endif
    221 
    222 /* constants and macros for access to the data ------------------------------ */
    223 
    224 /* getting a uint32_t properties word from the data */
    225 #if UCHAR_HARDCODE_DATA
    226 
    227 #define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c));
    228 
    229 #else
    230 
    231 #define HAVE_DATA (havePropsData>0 || loadPropsData()>0)
    232 #define GET_PROPS_UNSAFE(c, result) \
    233     UTRIE_GET16(&propsTrie, c, result);
    234 #define GET_PROPS(c, result) \
    235     if(HAVE_DATA) { \
    236         GET_PROPS_UNSAFE(c, result); \
    237     } else { \
    238         (result)=0; \
    239     }
    240 
    241 #endif
    242 
    243 U_CFUNC UBool
    244 uprv_haveProperties(UErrorCode *pErrorCode) {
    245     if(U_FAILURE(*pErrorCode)) {
    246         return FALSE;
    247     }
    248 #if !UCHAR_HARDCODE_DATA
    249     if(havePropsData==0) {
    250         uprv_loadPropsData(pErrorCode);
    251     }
    252     if(havePropsData<0) {
    253         *pErrorCode=dataErrorCode;
    254         return FALSE;
    255     }
    256 #endif
    257     return TRUE;
    258 }
    259 
    260 /* API functions ------------------------------------------------------------ */
    261 
    262 /* Gets the Unicode character's general category.*/
    263 U_CAPI int8_t U_EXPORT2
    264 u_charType(UChar32 c) {
    265     uint32_t props;
    266     GET_PROPS(c, props);
    267     return (int8_t)GET_CATEGORY(props);
    268 }
    269 
    270 /* Enumerate all code points with their general categories. */
    271 struct _EnumTypeCallback {
    272     UCharEnumTypeRange *enumRange;
    273     const void *context;
    274 };
    275 
    276 static uint32_t U_CALLCONV
    277 _enumTypeValue(const void *context, uint32_t value) {
    278     return GET_CATEGORY(value);
    279 }
    280 
    281 static UBool U_CALLCONV
    282 _enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
    283     /* just cast the value to UCharCategory */
    284     return ((struct _EnumTypeCallback *)context)->
    285         enumRange(((struct _EnumTypeCallback *)context)->context,
    286                   start, end+1, (UCharCategory)value);
    287 }
    288 
    289 U_CAPI void U_EXPORT2
    290 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) {
    291     struct _EnumTypeCallback callback;
    292 
    293     if(enumRange==NULL
    294 #if !UCHAR_HARDCODE_DATA
    295         || !HAVE_DATA
    296 #endif
    297     ) {
    298         return;
    299     }
    300 
    301     callback.enumRange=enumRange;
    302     callback.context=context;
    303     utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback);
    304 }
    305 
    306 /* Checks if ch is a lower case letter.*/
    307 U_CAPI UBool U_EXPORT2
    308 u_islower(UChar32 c) {
    309     uint32_t props;
    310     GET_PROPS(c, props);
    311     return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER);
    312 }
    313 
    314 /* Checks if ch is an upper case letter.*/
    315 U_CAPI UBool U_EXPORT2
    316 u_isupper(UChar32 c) {
    317     uint32_t props;
    318     GET_PROPS(c, props);
    319     return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER);
    320 }
    321 
    322 /* Checks if ch is a title case letter; usually upper case letters.*/
    323 U_CAPI UBool U_EXPORT2
    324 u_istitle(UChar32 c) {
    325     uint32_t props;
    326     GET_PROPS(c, props);
    327     return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER);
    328 }
    329 
    330 /* Checks if ch is a decimal digit. */
    331 U_CAPI UBool U_EXPORT2
    332 u_isdigit(UChar32 c) {
    333     uint32_t props;
    334     GET_PROPS(c, props);
    335     return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
    336 }
    337 
    338 U_CAPI UBool U_EXPORT2
    339 u_isxdigit(UChar32 c) {
    340     uint32_t props;
    341 
    342     /* check ASCII and Fullwidth ASCII a-fA-F */
    343     if(
    344         (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) ||
    345         (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41))
    346     ) {
    347         return TRUE;
    348     }
    349 
    350     GET_PROPS(c, props);
    351     return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
    352 }
    353 
    354 /* Checks if the Unicode character is a letter.*/
    355 U_CAPI UBool U_EXPORT2
    356 u_isalpha(UChar32 c) {
    357     uint32_t props;
    358     GET_PROPS(c, props);
    359     return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0);
    360 }
    361 
    362 U_CAPI UBool U_EXPORT2
    363 u_isUAlphabetic(UChar32 c) {
    364     return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0;
    365 }
    366 
    367 /* Checks if c is a letter or a decimal digit */
    368 U_CAPI UBool U_EXPORT2
    369 u_isalnum(UChar32 c) {
    370     uint32_t props;
    371     GET_PROPS(c, props);
    372     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0);
    373 }
    374 
    375 /**
    376  * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
    377  * @internal
    378  */
    379 U_CFUNC UBool
    380 u_isalnumPOSIX(UChar32 c) {
    381     return (UBool)(u_isUAlphabetic(c) || u_isdigit(c));
    382 }
    383 
    384 /* Checks if ch is a unicode character with assigned character type.*/
    385 U_CAPI UBool U_EXPORT2
    386 u_isdefined(UChar32 c) {
    387     uint32_t props;
    388     GET_PROPS(c, props);
    389     return (UBool)(GET_CATEGORY(props)!=0);
    390 }
    391 
    392 /* Checks if the Unicode character is a base form character that can take a diacritic.*/
    393 U_CAPI UBool U_EXPORT2
    394 u_isbase(UChar32 c) {
    395     uint32_t props;
    396     GET_PROPS(c, props);
    397     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0);
    398 }
    399 
    400 /* Checks if the Unicode character is a control character.*/
    401 U_CAPI UBool U_EXPORT2
    402 u_iscntrl(UChar32 c) {
    403     uint32_t props;
    404     GET_PROPS(c, props);
    405     return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0);
    406 }
    407 
    408 U_CAPI UBool U_EXPORT2
    409 u_isISOControl(UChar32 c) {
    410     return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f);
    411 }
    412 
    413 /* Some control characters that are used as space. */
    414 #define IS_THAT_CONTROL_SPACE(c) \
    415     (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL))
    416 
    417 /* Java has decided that U+0085 New Line is not whitespace any more. */
    418 #define IS_THAT_ASCII_CONTROL_SPACE(c) \
    419     (c<=0x1f && c>=TAB && (c<=CR || c>=0x1c))
    420 
    421 /* Checks if the Unicode character is a space character.*/
    422 U_CAPI UBool U_EXPORT2
    423 u_isspace(UChar32 c) {
    424     uint32_t props;
    425     GET_PROPS(c, props);
    426     return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c));
    427 }
    428 
    429 U_CAPI UBool U_EXPORT2
    430 u_isJavaSpaceChar(UChar32 c) {
    431     uint32_t props;
    432     GET_PROPS(c, props);
    433     return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0);
    434 }
    435 
    436 /* Checks if the Unicode character is a whitespace character.*/
    437 U_CAPI UBool U_EXPORT2
    438 u_isWhitespace(UChar32 c) {
    439     uint32_t props;
    440     GET_PROPS(c, props);
    441     return (UBool)(
    442                 ((CAT_MASK(props)&U_GC_Z_MASK)!=0 &&
    443                     c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */
    444                 IS_THAT_ASCII_CONTROL_SPACE(c)
    445            );
    446 }
    447 
    448 U_CAPI UBool U_EXPORT2
    449 u_isblank(UChar32 c) {
    450     if((uint32_t)c<=0x9f) {
    451         return c==9 || c==0x20; /* TAB or SPACE */
    452     } else {
    453         /* Zs */
    454         uint32_t props;
    455         GET_PROPS(c, props);
    456         return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR);
    457     }
    458 }
    459 
    460 U_CAPI UBool U_EXPORT2
    461 u_isUWhiteSpace(UChar32 c) {
    462     return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0;
    463 }
    464 
    465 /* Checks if the Unicode character is printable.*/
    466 U_CAPI UBool U_EXPORT2
    467 u_isprint(UChar32 c) {
    468     uint32_t props;
    469     GET_PROPS(c, props);
    470     /* comparing ==0 returns FALSE for the categories mentioned */
    471     return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0);
    472 }
    473 
    474 /**
    475  * Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
    476  * Implements UCHAR_POSIX_PRINT.
    477  * @internal
    478  */
    479 U_CFUNC UBool
    480 u_isprintPOSIX(UChar32 c) {
    481     uint32_t props;
    482     GET_PROPS(c, props);
    483     /*
    484      * The only cntrl character in graph+blank is TAB (in blank).
    485      * Here we implement (blank-TAB)=Zs instead of calling u_isblank().
    486      */
    487     return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c));
    488 }
    489 
    490 U_CAPI UBool U_EXPORT2
    491 u_isgraph(UChar32 c) {
    492     uint32_t props;
    493     GET_PROPS(c, props);
    494     /* comparing ==0 returns FALSE for the categories mentioned */
    495     return (UBool)((CAT_MASK(props)&
    496                     (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
    497                    ==0);
    498 }
    499 
    500 /**
    501  * Checks if c is in
    502  * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
    503  * with space=\p{Whitespace} and Control=Cc.
    504  * Implements UCHAR_POSIX_GRAPH.
    505  * @internal
    506  */
    507 U_CFUNC UBool
    508 u_isgraphPOSIX(UChar32 c) {
    509     uint32_t props;
    510     GET_PROPS(c, props);
    511     /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
    512     /* comparing ==0 returns FALSE for the categories mentioned */
    513     return (UBool)((CAT_MASK(props)&
    514                     (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
    515                    ==0);
    516 }
    517 
    518 U_CAPI UBool U_EXPORT2
    519 u_ispunct(UChar32 c) {
    520     uint32_t props;
    521     GET_PROPS(c, props);
    522     return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0);
    523 }
    524 
    525 /* Checks if the Unicode character can start a Unicode identifier.*/
    526 U_CAPI UBool U_EXPORT2
    527 u_isIDStart(UChar32 c) {
    528     /* same as u_isalpha() */
    529     uint32_t props;
    530     GET_PROPS(c, props);
    531     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0);
    532 }
    533 
    534 /* Checks if the Unicode character can be a Unicode identifier part other than starting the
    535  identifier.*/
    536 U_CAPI UBool U_EXPORT2
    537 u_isIDPart(UChar32 c) {
    538     uint32_t props;
    539     GET_PROPS(c, props);
    540     return (UBool)(
    541            (CAT_MASK(props)&
    542             (U_GC_ND_MASK|U_GC_NL_MASK|
    543              U_GC_L_MASK|
    544              U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK)
    545            )!=0 ||
    546            u_isIDIgnorable(c));
    547 }
    548 
    549 /*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/
    550 U_CAPI UBool U_EXPORT2
    551 u_isIDIgnorable(UChar32 c) {
    552     if(c<=0x9f) {
    553         return u_isISOControl(c) && !IS_THAT_ASCII_CONTROL_SPACE(c);
    554     } else {
    555         uint32_t props;
    556         GET_PROPS(c, props);
    557         return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR);
    558     }
    559 }
    560 
    561 /*Checks if the Unicode character can start a Java identifier.*/
    562 U_CAPI UBool U_EXPORT2
    563 u_isJavaIDStart(UChar32 c) {
    564     uint32_t props;
    565     GET_PROPS(c, props);
    566     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0);
    567 }
    568 
    569 /*Checks if the Unicode character can be a Java identifier part other than starting the
    570  * identifier.
    571  */
    572 U_CAPI UBool U_EXPORT2
    573 u_isJavaIDPart(UChar32 c) {
    574     uint32_t props;
    575     GET_PROPS(c, props);
    576     return (UBool)(
    577            (CAT_MASK(props)&
    578             (U_GC_ND_MASK|U_GC_NL_MASK|
    579              U_GC_L_MASK|
    580              U_GC_SC_MASK|U_GC_PC_MASK|
    581              U_GC_MC_MASK|U_GC_MN_MASK)
    582            )!=0 ||
    583            u_isIDIgnorable(c));
    584 }
    585 
    586 U_CAPI int32_t U_EXPORT2
    587 u_charDigitValue(UChar32 c) {
    588     uint32_t props;
    589     int32_t value;
    590     GET_PROPS(c, props);
    591     value=(int32_t)GET_NUMERIC_TYPE_VALUE(props)-UPROPS_NTV_DECIMAL_START;
    592     if(value<=9) {
    593         return value;
    594     } else {
    595         return -1;
    596     }
    597 }
    598 
    599 U_CAPI double U_EXPORT2
    600 u_getNumericValue(UChar32 c) {
    601     uint32_t props;
    602     int32_t ntv;
    603     GET_PROPS(c, props);
    604     ntv=(int32_t)GET_NUMERIC_TYPE_VALUE(props);
    605 
    606     if(ntv==UPROPS_NTV_NONE) {
    607         return U_NO_NUMERIC_VALUE;
    608     } else if(ntv<UPROPS_NTV_DIGIT_START) {
    609         /* decimal digit */
    610         return ntv-UPROPS_NTV_DECIMAL_START;
    611     } else if(ntv<UPROPS_NTV_NUMERIC_START) {
    612         /* other digit */
    613         return ntv-UPROPS_NTV_DIGIT_START;
    614     } else if(ntv<UPROPS_NTV_FRACTION_START) {
    615         /* small integer */
    616         return ntv-UPROPS_NTV_NUMERIC_START;
    617     } else if(ntv<UPROPS_NTV_LARGE_START) {
    618         /* fraction */
    619         int32_t numerator=(ntv>>4)-12;
    620         int32_t denominator=(ntv&0xf)+1;
    621         return (double)numerator/denominator;
    622     } else if(ntv<UPROPS_NTV_RESERVED_START) {
    623         /* large, single-significant-digit integer */
    624         double numValue;
    625         int32_t mant=(ntv>>5)-14;
    626         int32_t exp=(ntv&0x1f)+2;
    627         numValue=mant;
    628 
    629         /* multiply by 10^exp without math.h */
    630         while(exp>=4) {
    631             numValue*=10000.;
    632             exp-=4;
    633         }
    634         switch(exp) {
    635         case 3:
    636             numValue*=1000.;
    637             break;
    638         case 2:
    639             numValue*=100.;
    640             break;
    641         case 1:
    642             numValue*=10.;
    643             break;
    644         case 0:
    645         default:
    646             break;
    647         }
    648 
    649         return numValue;
    650     } else {
    651         /* reserved */
    652         return U_NO_NUMERIC_VALUE;
    653     }
    654 }
    655 
    656 U_CAPI int32_t U_EXPORT2
    657 u_digit(UChar32 ch, int8_t radix) {
    658     int8_t value;
    659     if((uint8_t)(radix-2)<=(36-2)) {
    660         value=(int8_t)u_charDigitValue(ch);
    661         if(value<0) {
    662             /* ch is not a decimal digit, try latin letters */
    663             if(ch>=0x61 && ch<=0x7A) {
    664                 value=(int8_t)(ch-0x57);  /* ch - 'a' + 10 */
    665             } else if(ch>=0x41 && ch<=0x5A) {
    666                 value=(int8_t)(ch-0x37);  /* ch - 'A' + 10 */
    667             } else if(ch>=0xFF41 && ch<=0xFF5A) {
    668                 value=(int8_t)(ch-0xFF37);  /* fullwidth ASCII a-z */
    669             } else if(ch>=0xFF21 && ch<=0xFF3A) {
    670                 value=(int8_t)(ch-0xFF17);  /* fullwidth ASCII A-Z */
    671             }
    672         }
    673     } else {
    674         value=-1;   /* invalid radix */
    675     }
    676     return (int8_t)((value<radix) ? value : -1);
    677 }
    678 
    679 U_CAPI UChar32 U_EXPORT2
    680 u_forDigit(int32_t digit, int8_t radix) {
    681     if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) {
    682         return 0;
    683     } else if(digit<10) {
    684         return (UChar32)(0x30+digit);
    685     } else {
    686         return (UChar32)((0x61-10)+digit);
    687     }
    688 }
    689 
    690 /* miscellaneous, and support for uprops.c ---------------------------------- */
    691 
    692 U_CAPI void U_EXPORT2
    693 u_getUnicodeVersion(UVersionInfo versionArray) {
    694     if(versionArray!=NULL) {
    695         uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH);
    696     }
    697 }
    698 
    699 U_CFUNC uint32_t
    700 u_getUnicodeProperties(UChar32 c, int32_t column) {
    701     uint16_t vecIndex;
    702 
    703     if(column==-1) {
    704         uint32_t props;
    705         GET_PROPS(c, props);
    706         return props;
    707     } else if(
    708 #if !UCHAR_HARDCODE_DATA
    709                !HAVE_DATA || countPropsVectors==0 ||
    710 #endif
    711                column<0 || column>=propsVectorsColumns
    712     ) {
    713         return 0;
    714     } else {
    715         vecIndex=UTRIE2_GET16(&propsVectorsTrie, c);
    716         return propsVectors[vecIndex+column];
    717     }
    718 }
    719 
    720 U_CFUNC int32_t
    721 uprv_getMaxValues(int32_t column) {
    722 #if !UCHAR_HARDCODE_DATA
    723     if(HAVE_DATA) {
    724 #endif
    725         switch(column) {
    726         case 0:
    727             return indexes[UPROPS_MAX_VALUES_INDEX];
    728         case 2:
    729             return indexes[UPROPS_MAX_VALUES_2_INDEX];
    730         default:
    731             return 0;
    732         }
    733 #if !UCHAR_HARDCODE_DATA
    734     } else {
    735         return 0;
    736     }
    737 #endif
    738 }
    739 
    740 U_CAPI void U_EXPORT2
    741 u_charAge(UChar32 c, UVersionInfo versionArray) {
    742     if(versionArray!=NULL) {
    743         uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT;
    744         versionArray[0]=(uint8_t)(version>>4);
    745         versionArray[1]=(uint8_t)(version&0xf);
    746         versionArray[2]=versionArray[3]=0;
    747     }
    748 }
    749 
    750 U_CAPI UScriptCode U_EXPORT2
    751 uscript_getScript(UChar32 c, UErrorCode *pErrorCode) {
    752     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    753         return USCRIPT_INVALID_CODE;
    754     }
    755     if((uint32_t)c>0x10ffff) {
    756         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    757         return USCRIPT_INVALID_CODE;
    758     }
    759 
    760     return (UScriptCode)(u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_MASK);
    761 }
    762 
    763 U_CAPI UBlockCode U_EXPORT2
    764 ublock_getCode(UChar32 c) {
    765     return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT);
    766 }
    767 
    768 /* property starts for UnicodeSet ------------------------------------------- */
    769 
    770 static UBool U_CALLCONV
    771 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
    772     /* add the start code point to the USet */
    773     const USetAdder *sa=(const USetAdder *)context;
    774     sa->add(sa->set, start);
    775     return TRUE;
    776 }
    777 
    778 #define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1)
    779 
    780 U_CFUNC void U_EXPORT2
    781 uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
    782     if(U_FAILURE(*pErrorCode)) {
    783         return;
    784     }
    785 
    786 #if !UCHAR_HARDCODE_DATA
    787     if(!HAVE_DATA) {
    788         *pErrorCode=dataErrorCode;
    789         return;
    790     }
    791 #endif
    792 
    793     /* add the start code point of each same-value range of the main trie */
    794     utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa);
    795 
    796     /* add code points with hardcoded properties, plus the ones following them */
    797 
    798     /* add for u_isblank() */
    799     USET_ADD_CP_AND_NEXT(sa, TAB);
    800 
    801     /* add for IS_THAT_CONTROL_SPACE() */
    802     sa->add(sa->set, CR+1); /* range TAB..CR */
    803     sa->add(sa->set, 0x1c);
    804     sa->add(sa->set, 0x1f+1);
    805     USET_ADD_CP_AND_NEXT(sa, NL);
    806 
    807     /* add for u_isIDIgnorable() what was not added above */
    808     sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */
    809     sa->add(sa->set, HAIRSP);
    810     sa->add(sa->set, RLM+1);
    811     sa->add(sa->set, INHSWAP);
    812     sa->add(sa->set, NOMDIG+1);
    813     USET_ADD_CP_AND_NEXT(sa, ZWNBSP);
    814 
    815     /* add no-break spaces for u_isWhitespace() what was not added above */
    816     USET_ADD_CP_AND_NEXT(sa, NBSP);
    817     USET_ADD_CP_AND_NEXT(sa, FIGURESP);
    818     USET_ADD_CP_AND_NEXT(sa, NNBSP);
    819 
    820     /* add for u_digit() */
    821     sa->add(sa->set, U_a);
    822     sa->add(sa->set, U_z+1);
    823     sa->add(sa->set, U_A);
    824     sa->add(sa->set, U_Z+1);
    825     sa->add(sa->set, U_FW_a);
    826     sa->add(sa->set, U_FW_z+1);
    827     sa->add(sa->set, U_FW_A);
    828     sa->add(sa->set, U_FW_Z+1);
    829 
    830     /* add for u_isxdigit() */
    831     sa->add(sa->set, U_f+1);
    832     sa->add(sa->set, U_F+1);
    833     sa->add(sa->set, U_FW_f+1);
    834     sa->add(sa->set, U_FW_F+1);
    835 
    836     /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
    837     sa->add(sa->set, WJ); /* range WJ..NOMDIG */
    838     sa->add(sa->set, 0xfff0);
    839     sa->add(sa->set, 0xfffb+1);
    840     sa->add(sa->set, 0xe0000);
    841     sa->add(sa->set, 0xe0fff+1);
    842 
    843     /* add for UCHAR_GRAPHEME_BASE and others */
    844     USET_ADD_CP_AND_NEXT(sa, CGJ);
    845 }
    846 
    847 U_CFUNC void U_EXPORT2
    848 upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
    849     if(U_FAILURE(*pErrorCode)) {
    850         return;
    851     }
    852 
    853 #if !UCHAR_HARDCODE_DATA
    854     if(!HAVE_DATA) {
    855         *pErrorCode=dataErrorCode;
    856         return;
    857     }
    858 #endif
    859 
    860     /* add the start code point of each same-value range of the properties vectors trie */
    861     if(propsVectorsColumns>0) {
    862         /* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */
    863         utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
    864     }
    865 }
    866