Home | History | Annotate | Download | only in common
      1 /*
      2 ********************************************************************************
      3 *   Copyright (C) 1996-2008, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 ********************************************************************************
      6 *
      7 * File UCHAR.C
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   04/02/97    aliu        Creation.
     13 *   4/15/99     Madhu       Updated all the function definitions for C Implementation
     14 *   5/20/99     Madhu       Added the function u_getVersion()
     15 *   8/19/1999   srl         Upgraded scripts to Unicode3.0
     16 *   11/11/1999  weiv        added u_isalnum(), cleaned comments
     17 *   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion.
     18 *   06/20/2000  helena      OS/400 port changes; mostly typecast.
     19 ******************************************************************************
     20 */
     21 
     22 #include "unicode/utypes.h"
     23 #include "unicode/uchar.h"
     24 #include "unicode/uscript.h"
     25 #include "unicode/udata.h"
     26 #include "umutex.h"
     27 #include "cmemory.h"
     28 #include "ucln_cmn.h"
     29 #include "utrie2.h"
     30 #include "udataswp.h"
     31 #include "unormimp.h" /* JAMO_L_BASE etc. */
     32 #include "uprops.h"
     33 
     34 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
     35 
     36 /* dynamically loaded Unicode character properties -------------------------- */
     37 
     38 #define UCHAR_HARDCODE_DATA 1
     39 
     40 #if UCHAR_HARDCODE_DATA
     41 
     42 /* uchar_props_data.c is machine-generated by genprops --csource */
     43 #include "uchar_props_data.c"
     44 
     45 #else
     46 
     47 /*
     48  * loaded uprops.dat -
     49  * for a description of the file format, see icu/source/tools/genprops/store.c
     50  */
     51 static const char DATA_NAME[] = "uprops";
     52 static const char DATA_TYPE[] = "icu";
     53 
     54 static UDataMemory *propsData=NULL;
     55 static UErrorCode dataErrorCode=U_ZERO_ERROR;
     56 
     57 static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
     58 static UVersionInfo dataVersion={ 0, 0, 0, 0 };
     59 
     60 static UTrie propsTrie={ 0 }, propsVectorsTrie={ 0 };
     61 static const uint32_t *pData32=NULL, *propsVectors=NULL;
     62 static int32_t countPropsVectors=0, propsVectorsColumns=0;
     63 
     64 static int8_t havePropsData=0;     /*  == 0   ->  Data has not been loaded.
     65                                     *   < 0   ->  Error occured attempting to load data.
     66                                     *   > 0   ->  Data has been successfully loaded.
     67                                     */
     68 
     69 /* index values loaded from uprops.dat */
     70 static int32_t indexes[UPROPS_INDEX_COUNT];
     71 
     72 static UBool U_CALLCONV
     73 isAcceptable(void *context,
     74              const char *type, const char *name,
     75              const UDataInfo *pInfo) {
     76     if(
     77         pInfo->size>=20 &&
     78         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
     79         pInfo->charsetFamily==U_CHARSET_FAMILY &&
     80         pInfo->dataFormat[0]==0x55 &&   /* dataFormat="UPro" */
     81         pInfo->dataFormat[1]==0x50 &&
     82         pInfo->dataFormat[2]==0x72 &&
     83         pInfo->dataFormat[3]==0x6f &&
     84         pInfo->formatVersion[0]==4 &&
     85         pInfo->formatVersion[2]==UTRIE_SHIFT &&
     86         pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
     87     ) {
     88         uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
     89         uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
     90         return TRUE;
     91     } else {
     92         return FALSE;
     93     }
     94 }
     95 
     96 static UBool U_CALLCONV uchar_cleanup(void)
     97 {
     98     if (propsData) {
     99         udata_close(propsData);
    100         propsData=NULL;
    101     }
    102     pData32=NULL;
    103     propsVectors=NULL;
    104     countPropsVectors=0;
    105     uprv_memset(dataVersion, 0, U_MAX_VERSION_LENGTH);
    106     dataErrorCode=U_ZERO_ERROR;
    107     havePropsData=0;
    108 
    109     return TRUE;
    110 }
    111 
    112 struct UCharProps {
    113     UDataMemory *propsData;
    114     UTrie propsTrie, propsVectorsTrie;
    115     const uint32_t *pData32;
    116 };
    117 typedef struct UCharProps UCharProps;
    118 
    119 /* open uprops.icu */
    120 static void
    121 _openProps(UCharProps *ucp, UErrorCode *pErrorCode) {
    122     const uint32_t *p;
    123     int32_t length;
    124 
    125     ucp->propsData=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
    126     if(U_FAILURE(*pErrorCode)) {
    127         return;
    128     }
    129 
    130     ucp->pData32=p=(const uint32_t *)udata_getMemory(ucp->propsData);
    131 
    132     /* unserialize the trie; it is directly after the int32_t indexes[UPROPS_INDEX_COUNT] */
    133     length=(int32_t)p[UPROPS_PROPS32_INDEX]*4;
    134     length=utrie_unserialize(&ucp->propsTrie, (const uint8_t *)(p+UPROPS_INDEX_COUNT), length-64, pErrorCode);
    135     if(U_FAILURE(*pErrorCode)) {
    136         return;
    137     }
    138 
    139     /* unserialize the properties vectors trie */
    140     length=(int32_t)(p[UPROPS_ADDITIONAL_VECTORS_INDEX]-p[UPROPS_ADDITIONAL_TRIE_INDEX])*4;
    141     if(length>0) {
    142         length=utrie_unserialize(&ucp->propsVectorsTrie, (const uint8_t *)(p+p[UPROPS_ADDITIONAL_TRIE_INDEX]), length, pErrorCode);
    143     }
    144     if(length<=0 || U_FAILURE(*pErrorCode)) {
    145         /*
    146          * length==0:
    147          * Allow the properties vectors trie to be missing -
    148          * also requires propsVectorsColumns=indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX]
    149          * to be zero so that this trie is never accessed.
    150          */
    151         uprv_memset(&ucp->propsVectorsTrie, 0, sizeof(ucp->propsVectorsTrie));
    152     }
    153 }
    154 
    155 #endif
    156 
    157 #if !UCHAR_HARDCODE_DATA
    158 static int8_t
    159 uprv_loadPropsData(UErrorCode *pErrorCode) {
    160     /* load Unicode character properties data from file if necessary */
    161 
    162     /*
    163      * This lazy intialization with double-checked locking (without mutex protection for
    164      * haveNormData==0) is transiently unsafe under certain circumstances.
    165      * Check the readme and use u_init() if necessary.
    166      */
    167     if(havePropsData==0) {
    168         UCharProps ucp={ NULL };
    169 
    170         if(U_FAILURE(*pErrorCode)) {
    171             return havePropsData;
    172         }
    173 
    174         /* open the data outside the mutex block */
    175         _openProps(&ucp, pErrorCode);
    176 
    177         if(U_SUCCESS(*pErrorCode)) {
    178             /* in the mutex block, set the data for this process */
    179             umtx_lock(NULL);
    180             if(propsData==NULL) {
    181                 propsData=ucp.propsData;
    182                 ucp.propsData=NULL;
    183                 pData32=ucp.pData32;
    184                 ucp.pData32=NULL;
    185                 uprv_memcpy(&propsTrie, &ucp.propsTrie, sizeof(propsTrie));
    186                 uprv_memcpy(&propsVectorsTrie, &ucp.propsVectorsTrie, sizeof(propsVectorsTrie));
    187             }
    188 
    189             /* initialize some variables */
    190             uprv_memcpy(indexes, pData32, sizeof(indexes));
    191 
    192             /* additional properties */
    193             if(indexes[UPROPS_ADDITIONAL_VECTORS_INDEX]!=0) {
    194                 propsVectors=pData32+indexes[UPROPS_ADDITIONAL_VECTORS_INDEX];
    195                 countPropsVectors=indexes[UPROPS_RESERVED_INDEX]-indexes[UPROPS_ADDITIONAL_VECTORS_INDEX];
    196                 propsVectorsColumns=indexes[UPROPS_ADDITIONAL_VECTORS_COLUMNS_INDEX];
    197             }
    198 
    199             havePropsData=1;
    200             umtx_unlock(NULL);
    201         } else {
    202             dataErrorCode=*pErrorCode;
    203             havePropsData=-1;
    204         }
    205         ucln_common_registerCleanup(UCLN_COMMON_UCHAR, uchar_cleanup);
    206 
    207         /* if a different thread set it first, then close the extra data */
    208         udata_close(ucp.propsData); /* NULL if it was set correctly */
    209     }
    210 
    211     return havePropsData;
    212 }
    213 
    214 static int8_t
    215 loadPropsData(void) {
    216     UErrorCode   errorCode = U_ZERO_ERROR;
    217     int8_t       retVal    = uprv_loadPropsData(&errorCode);
    218     return retVal;
    219 }
    220 
    221 #endif
    222 
    223 /* constants and macros for access to the data ------------------------------ */
    224 
    225 /* getting a uint32_t properties word from the data */
    226 #if UCHAR_HARDCODE_DATA
    227 
    228 #define GET_PROPS(c, result) ((result)=UTRIE2_GET16(&propsTrie, c));
    229 
    230 #else
    231 
    232 #define HAVE_DATA (havePropsData>0 || loadPropsData()>0)
    233 #define GET_PROPS_UNSAFE(c, result) \
    234     UTRIE_GET16(&propsTrie, c, result);
    235 #define GET_PROPS(c, result) \
    236     if(HAVE_DATA) { \
    237         GET_PROPS_UNSAFE(c, result); \
    238     } else { \
    239         (result)=0; \
    240     }
    241 
    242 #endif
    243 
    244 U_CFUNC UBool
    245 uprv_haveProperties(UErrorCode *pErrorCode) {
    246     if(U_FAILURE(*pErrorCode)) {
    247         return FALSE;
    248     }
    249 #if !UCHAR_HARDCODE_DATA
    250     if(havePropsData==0) {
    251         uprv_loadPropsData(pErrorCode);
    252     }
    253     if(havePropsData<0) {
    254         *pErrorCode=dataErrorCode;
    255         return FALSE;
    256     }
    257 #endif
    258     return TRUE;
    259 }
    260 
    261 /* API functions ------------------------------------------------------------ */
    262 
    263 /* Gets the Unicode character's general category.*/
    264 U_CAPI int8_t U_EXPORT2
    265 u_charType(UChar32 c) {
    266     uint32_t props;
    267     GET_PROPS(c, props);
    268     return (int8_t)GET_CATEGORY(props);
    269 }
    270 
    271 /* Enumerate all code points with their general categories. */
    272 struct _EnumTypeCallback {
    273     UCharEnumTypeRange *enumRange;
    274     const void *context;
    275 };
    276 
    277 static uint32_t U_CALLCONV
    278 _enumTypeValue(const void *context, uint32_t value) {
    279     return GET_CATEGORY(value);
    280 }
    281 
    282 static UBool U_CALLCONV
    283 _enumTypeRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
    284     /* just cast the value to UCharCategory */
    285     return ((struct _EnumTypeCallback *)context)->
    286         enumRange(((struct _EnumTypeCallback *)context)->context,
    287                   start, end+1, (UCharCategory)value);
    288 }
    289 
    290 U_CAPI void U_EXPORT2
    291 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context) {
    292     struct _EnumTypeCallback callback;
    293 
    294     if(enumRange==NULL
    295 #if !UCHAR_HARDCODE_DATA
    296         || !HAVE_DATA
    297 #endif
    298     ) {
    299         return;
    300     }
    301 
    302     callback.enumRange=enumRange;
    303     callback.context=context;
    304     utrie2_enum(&propsTrie, _enumTypeValue, _enumTypeRange, &callback);
    305 }
    306 
    307 /* Checks if ch is a lower case letter.*/
    308 U_CAPI UBool U_EXPORT2
    309 u_islower(UChar32 c) {
    310     uint32_t props;
    311     GET_PROPS(c, props);
    312     return (UBool)(GET_CATEGORY(props)==U_LOWERCASE_LETTER);
    313 }
    314 
    315 /* Checks if ch is an upper case letter.*/
    316 U_CAPI UBool U_EXPORT2
    317 u_isupper(UChar32 c) {
    318     uint32_t props;
    319     GET_PROPS(c, props);
    320     return (UBool)(GET_CATEGORY(props)==U_UPPERCASE_LETTER);
    321 }
    322 
    323 /* Checks if ch is a title case letter; usually upper case letters.*/
    324 U_CAPI UBool U_EXPORT2
    325 u_istitle(UChar32 c) {
    326     uint32_t props;
    327     GET_PROPS(c, props);
    328     return (UBool)(GET_CATEGORY(props)==U_TITLECASE_LETTER);
    329 }
    330 
    331 /* Checks if ch is a decimal digit. */
    332 U_CAPI UBool U_EXPORT2
    333 u_isdigit(UChar32 c) {
    334     uint32_t props;
    335     GET_PROPS(c, props);
    336     return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
    337 }
    338 
    339 U_CAPI UBool U_EXPORT2
    340 u_isxdigit(UChar32 c) {
    341     uint32_t props;
    342 
    343     /* check ASCII and Fullwidth ASCII a-fA-F */
    344     if(
    345         (c<=0x66 && c>=0x41 && (c<=0x46 || c>=0x61)) ||
    346         (c>=0xff21 && c<=0xff46 && (c<=0xff26 || c>=0xff41))
    347     ) {
    348         return TRUE;
    349     }
    350 
    351     GET_PROPS(c, props);
    352     return (UBool)(GET_CATEGORY(props)==U_DECIMAL_DIGIT_NUMBER);
    353 }
    354 
    355 /* Checks if the Unicode character is a letter.*/
    356 U_CAPI UBool U_EXPORT2
    357 u_isalpha(UChar32 c) {
    358     uint32_t props;
    359     GET_PROPS(c, props);
    360     return (UBool)((CAT_MASK(props)&U_GC_L_MASK)!=0);
    361 }
    362 
    363 U_CAPI UBool U_EXPORT2
    364 u_isUAlphabetic(UChar32 c) {
    365     return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_ALPHABETIC))!=0;
    366 }
    367 
    368 /* Checks if c is a letter or a decimal digit */
    369 U_CAPI UBool U_EXPORT2
    370 u_isalnum(UChar32 c) {
    371     uint32_t props;
    372     GET_PROPS(c, props);
    373     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_ND_MASK))!=0);
    374 }
    375 
    376 /**
    377  * Checks if c is alphabetic, or a decimal digit; implements UCHAR_POSIX_ALNUM.
    378  * @internal
    379  */
    380 U_CFUNC UBool
    381 u_isalnumPOSIX(UChar32 c) {
    382     return (UBool)(u_isUAlphabetic(c) || u_isdigit(c));
    383 }
    384 
    385 /* Checks if ch is a unicode character with assigned character type.*/
    386 U_CAPI UBool U_EXPORT2
    387 u_isdefined(UChar32 c) {
    388     uint32_t props;
    389     GET_PROPS(c, props);
    390     return (UBool)(GET_CATEGORY(props)!=0);
    391 }
    392 
    393 /* Checks if the Unicode character is a base form character that can take a diacritic.*/
    394 U_CAPI UBool U_EXPORT2
    395 u_isbase(UChar32 c) {
    396     uint32_t props;
    397     GET_PROPS(c, props);
    398     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_N_MASK|U_GC_MC_MASK|U_GC_ME_MASK))!=0);
    399 }
    400 
    401 /* Checks if the Unicode character is a control character.*/
    402 U_CAPI UBool U_EXPORT2
    403 u_iscntrl(UChar32 c) {
    404     uint32_t props;
    405     GET_PROPS(c, props);
    406     return (UBool)((CAT_MASK(props)&(U_GC_CC_MASK|U_GC_CF_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK))!=0);
    407 }
    408 
    409 U_CAPI UBool U_EXPORT2
    410 u_isISOControl(UChar32 c) {
    411     return (uint32_t)c<=0x9f && (c<=0x1f || c>=0x7f);
    412 }
    413 
    414 /* Some control characters that are used as space. */
    415 #define IS_THAT_CONTROL_SPACE(c) \
    416     (c<=0x9f && ((c>=TAB && c<=CR) || (c>=0x1c && c <=0x1f) || c==NL))
    417 
    418 /* Checks if the Unicode character is a space character.*/
    419 U_CAPI UBool U_EXPORT2
    420 u_isspace(UChar32 c) {
    421     uint32_t props;
    422     GET_PROPS(c, props);
    423     return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0 || IS_THAT_CONTROL_SPACE(c));
    424 }
    425 
    426 U_CAPI UBool U_EXPORT2
    427 u_isJavaSpaceChar(UChar32 c) {
    428     uint32_t props;
    429     GET_PROPS(c, props);
    430     return (UBool)((CAT_MASK(props)&U_GC_Z_MASK)!=0);
    431 }
    432 
    433 /* Checks if the Unicode character is a whitespace character.*/
    434 U_CAPI UBool U_EXPORT2
    435 u_isWhitespace(UChar32 c) {
    436     uint32_t props;
    437     GET_PROPS(c, props);
    438     return (UBool)(
    439                 ((CAT_MASK(props)&U_GC_Z_MASK)!=0 &&
    440                     c!=NBSP && c!=FIGURESP && c!=NNBSP) || /* exclude no-break spaces */
    441                 IS_THAT_CONTROL_SPACE(c)
    442            );
    443 }
    444 
    445 U_CAPI UBool U_EXPORT2
    446 u_isblank(UChar32 c) {
    447     if((uint32_t)c<=0x9f) {
    448         return c==9 || c==0x20; /* TAB or SPACE */
    449     } else {
    450         /* Zs */
    451         uint32_t props;
    452         GET_PROPS(c, props);
    453         return (UBool)(GET_CATEGORY(props)==U_SPACE_SEPARATOR);
    454     }
    455 }
    456 
    457 U_CAPI UBool U_EXPORT2
    458 u_isUWhiteSpace(UChar32 c) {
    459     return (u_getUnicodeProperties(c, 1)&U_MASK(UPROPS_WHITE_SPACE))!=0;
    460 }
    461 
    462 /* Checks if the Unicode character is printable.*/
    463 U_CAPI UBool U_EXPORT2
    464 u_isprint(UChar32 c) {
    465     uint32_t props;
    466     GET_PROPS(c, props);
    467     /* comparing ==0 returns FALSE for the categories mentioned */
    468     return (UBool)((CAT_MASK(props)&U_GC_C_MASK)==0);
    469 }
    470 
    471 /**
    472  * Checks if c is in \p{graph}\p{blank} - \p{cntrl}.
    473  * Implements UCHAR_POSIX_PRINT.
    474  * @internal
    475  */
    476 U_CFUNC UBool
    477 u_isprintPOSIX(UChar32 c) {
    478     uint32_t props;
    479     GET_PROPS(c, props);
    480     /*
    481      * The only cntrl character in graph+blank is TAB (in blank).
    482      * Here we implement (blank-TAB)=Zs instead of calling u_isblank().
    483      */
    484     return (UBool)((GET_CATEGORY(props)==U_SPACE_SEPARATOR) || u_isgraphPOSIX(c));
    485 }
    486 
    487 U_CAPI UBool U_EXPORT2
    488 u_isgraph(UChar32 c) {
    489     uint32_t props;
    490     GET_PROPS(c, props);
    491     /* comparing ==0 returns FALSE for the categories mentioned */
    492     return (UBool)((CAT_MASK(props)&
    493                     (U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
    494                    ==0);
    495 }
    496 
    497 /**
    498  * Checks if c is in
    499  * [^\p{space}\p{gc=Control}\p{gc=Surrogate}\p{gc=Unassigned}]
    500  * with space=\p{Whitespace} and Control=Cc.
    501  * Implements UCHAR_POSIX_GRAPH.
    502  * @internal
    503  */
    504 U_CFUNC UBool
    505 u_isgraphPOSIX(UChar32 c) {
    506     uint32_t props;
    507     GET_PROPS(c, props);
    508     /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */
    509     /* comparing ==0 returns FALSE for the categories mentioned */
    510     return (UBool)((CAT_MASK(props)&
    511                     (U_GC_CC_MASK|U_GC_CS_MASK|U_GC_CN_MASK|U_GC_Z_MASK))
    512                    ==0);
    513 }
    514 
    515 U_CAPI UBool U_EXPORT2
    516 u_ispunct(UChar32 c) {
    517     uint32_t props;
    518     GET_PROPS(c, props);
    519     return (UBool)((CAT_MASK(props)&U_GC_P_MASK)!=0);
    520 }
    521 
    522 /* Checks if the Unicode character can start a Unicode identifier.*/
    523 U_CAPI UBool U_EXPORT2
    524 u_isIDStart(UChar32 c) {
    525     /* same as u_isalpha() */
    526     uint32_t props;
    527     GET_PROPS(c, props);
    528     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_NL_MASK))!=0);
    529 }
    530 
    531 /* Checks if the Unicode character can be a Unicode identifier part other than starting the
    532  identifier.*/
    533 U_CAPI UBool U_EXPORT2
    534 u_isIDPart(UChar32 c) {
    535     uint32_t props;
    536     GET_PROPS(c, props);
    537     return (UBool)(
    538            (CAT_MASK(props)&
    539             (U_GC_ND_MASK|U_GC_NL_MASK|
    540              U_GC_L_MASK|
    541              U_GC_PC_MASK|U_GC_MC_MASK|U_GC_MN_MASK)
    542            )!=0 ||
    543            u_isIDIgnorable(c));
    544 }
    545 
    546 /*Checks if the Unicode character can be ignorable in a Java or Unicode identifier.*/
    547 U_CAPI UBool U_EXPORT2
    548 u_isIDIgnorable(UChar32 c) {
    549     if(c<=0x9f) {
    550         return u_isISOControl(c) && !IS_THAT_CONTROL_SPACE(c);
    551     } else {
    552         uint32_t props;
    553         GET_PROPS(c, props);
    554         return (UBool)(GET_CATEGORY(props)==U_FORMAT_CHAR);
    555     }
    556 }
    557 
    558 /*Checks if the Unicode character can start a Java identifier.*/
    559 U_CAPI UBool U_EXPORT2
    560 u_isJavaIDStart(UChar32 c) {
    561     uint32_t props;
    562     GET_PROPS(c, props);
    563     return (UBool)((CAT_MASK(props)&(U_GC_L_MASK|U_GC_SC_MASK|U_GC_PC_MASK))!=0);
    564 }
    565 
    566 /*Checks if the Unicode character can be a Java identifier part other than starting the
    567  * identifier.
    568  */
    569 U_CAPI UBool U_EXPORT2
    570 u_isJavaIDPart(UChar32 c) {
    571     uint32_t props;
    572     GET_PROPS(c, props);
    573     return (UBool)(
    574            (CAT_MASK(props)&
    575             (U_GC_ND_MASK|U_GC_NL_MASK|
    576              U_GC_L_MASK|
    577              U_GC_SC_MASK|U_GC_PC_MASK|
    578              U_GC_MC_MASK|U_GC_MN_MASK)
    579            )!=0 ||
    580            u_isIDIgnorable(c));
    581 }
    582 
    583 U_CAPI int32_t U_EXPORT2
    584 u_charDigitValue(UChar32 c) {
    585     uint32_t props;
    586     GET_PROPS(c, props);
    587 
    588     if(GET_NUMERIC_TYPE(props)==1) {
    589         return GET_NUMERIC_VALUE(props);
    590     } else {
    591         return -1;
    592     }
    593 }
    594 
    595 U_CAPI double U_EXPORT2
    596 u_getNumericValue(UChar32 c) {
    597     uint32_t props, numericType, numericValue;
    598     GET_PROPS(c, props);
    599     numericType=GET_NUMERIC_TYPE(props);
    600 
    601     if(numericType==0 || numericType>=UPROPS_NT_COUNT) {
    602         return U_NO_NUMERIC_VALUE;
    603     }
    604 
    605     numericValue=GET_NUMERIC_VALUE(props);
    606 
    607     if(numericType<U_NT_COUNT) {
    608         /* normal type, the value is stored directly */
    609         return numericValue;
    610     } else if(numericType==UPROPS_NT_FRACTION) {
    611         /* fraction value */
    612         int32_t numerator;
    613         uint32_t denominator;
    614 
    615         numerator=(int32_t)numericValue>>UPROPS_FRACTION_NUM_SHIFT;
    616         denominator=(numericValue&UPROPS_FRACTION_DEN_MASK)+UPROPS_FRACTION_DEN_OFFSET;
    617 
    618         if(numerator==0) {
    619             numerator=-1;
    620         }
    621         return (double)numerator/(double)denominator;
    622     } else /* numericType==UPROPS_NT_LARGE */ {
    623         /* large value with exponent */
    624         double numValue;
    625         int32_t mant, exp;
    626 
    627         mant=(int32_t)numericValue>>UPROPS_LARGE_MANT_SHIFT;
    628         exp=(int32_t)numericValue&UPROPS_LARGE_EXP_MASK;
    629         if(mant==0) {
    630             mant=1;
    631             exp+=UPROPS_LARGE_EXP_OFFSET_EXTRA;
    632         } else if(mant>9) {
    633             return U_NO_NUMERIC_VALUE; /* reserved mantissa value */
    634         } else {
    635             exp+=UPROPS_LARGE_EXP_OFFSET;
    636         }
    637 
    638         numValue=mant;
    639 
    640         /* multiply by 10^exp without math.h */
    641         while(exp>=4) {
    642             numValue*=10000.;
    643             exp-=4;
    644         }
    645         switch(exp) {
    646         case 3:
    647             numValue*=1000.;
    648             break;
    649         case 2:
    650             numValue*=100.;
    651             break;
    652         case 1:
    653             numValue*=10.;
    654             break;
    655         case 0:
    656         default:
    657             break;
    658         }
    659 
    660         return numValue;
    661     }
    662 }
    663 
    664 /* ICU 3.4: bidi/shaping properties moved to ubidi_props.c */
    665 
    666 /* ICU 2.1: u_getCombiningClass() moved to unorm.cpp */
    667 
    668 U_CAPI int32_t U_EXPORT2
    669 u_digit(UChar32 ch, int8_t radix) {
    670     int8_t value;
    671     if((uint8_t)(radix-2)<=(36-2)) {
    672         value=(int8_t)u_charDigitValue(ch);
    673         if(value<0) {
    674             /* ch is not a decimal digit, try latin letters */
    675             if(ch>=0x61 && ch<=0x7A) {
    676                 value=(int8_t)(ch-0x57);  /* ch - 'a' + 10 */
    677             } else if(ch>=0x41 && ch<=0x5A) {
    678                 value=(int8_t)(ch-0x37);  /* ch - 'A' + 10 */
    679             } else if(ch>=0xFF41 && ch<=0xFF5A) {
    680                 value=(int8_t)(ch-0xFF37);  /* fullwidth ASCII a-z */
    681             } else if(ch>=0xFF21 && ch<=0xFF3A) {
    682                 value=(int8_t)(ch-0xFF17);  /* fullwidth ASCII A-Z */
    683             }
    684         }
    685     } else {
    686         value=-1;   /* invalid radix */
    687     }
    688     return (int8_t)((value<radix) ? value : -1);
    689 }
    690 
    691 U_CAPI UChar32 U_EXPORT2
    692 u_forDigit(int32_t digit, int8_t radix) {
    693     if((uint8_t)(radix-2)>(36-2) || (uint32_t)digit>=(uint32_t)radix) {
    694         return 0;
    695     } else if(digit<10) {
    696         return (UChar32)(0x30+digit);
    697     } else {
    698         return (UChar32)((0x61-10)+digit);
    699     }
    700 }
    701 
    702 /* miscellaneous, and support for uprops.c ---------------------------------- */
    703 
    704 U_CAPI void U_EXPORT2
    705 u_getUnicodeVersion(UVersionInfo versionArray) {
    706     if(versionArray!=NULL) {
    707         uprv_memcpy(versionArray, dataVersion, U_MAX_VERSION_LENGTH);
    708     }
    709 }
    710 
    711 U_CFUNC uint32_t
    712 u_getUnicodeProperties(UChar32 c, int32_t column) {
    713     uint16_t vecIndex;
    714 
    715     if(column==-1) {
    716         uint32_t props;
    717         GET_PROPS(c, props);
    718         return props;
    719     } else if(
    720 #if !UCHAR_HARDCODE_DATA
    721                !HAVE_DATA || countPropsVectors==0 ||
    722 #endif
    723                column<0 || column>=propsVectorsColumns
    724     ) {
    725         return 0;
    726     } else {
    727         vecIndex=UTRIE2_GET16(&propsVectorsTrie, c);
    728         return propsVectors[vecIndex+column];
    729     }
    730 }
    731 
    732 U_CFUNC int32_t
    733 uprv_getMaxValues(int32_t column) {
    734 #if !UCHAR_HARDCODE_DATA
    735     if(HAVE_DATA) {
    736 #endif
    737         switch(column) {
    738         case 0:
    739             return indexes[UPROPS_MAX_VALUES_INDEX];
    740         case 2:
    741             return indexes[UPROPS_MAX_VALUES_2_INDEX];
    742         default:
    743             return 0;
    744         }
    745 #if !UCHAR_HARDCODE_DATA
    746     } else {
    747         return 0;
    748     }
    749 #endif
    750 }
    751 
    752 /*
    753  * get Hangul Syllable Type
    754  * implemented here so that uchar.c (uhst_addPropertyStarts())
    755  * does not depend on uprops.c (u_getIntPropertyValue(c, UCHAR_HANGUL_SYLLABLE_TYPE))
    756  */
    757 U_CFUNC UHangulSyllableType
    758 uchar_getHST(UChar32 c) {
    759     /* purely algorithmic; hardcode known characters, check for assigned new ones */
    760     if(c<JAMO_L_BASE) {
    761         /* U_HST_NOT_APPLICABLE */
    762     } else if(c<=0x11ff) {
    763         /* Jamo range */
    764         if(c<=0x115f) {
    765             /* Jamo L range, HANGUL CHOSEONG ... */
    766             if(c==0x115f || c<=0x1159 || u_charType(c)==U_OTHER_LETTER) {
    767                 return U_HST_LEADING_JAMO;
    768             }
    769         } else if(c<=0x11a7) {
    770             /* Jamo V range, HANGUL JUNGSEONG ... */
    771             if(c<=0x11a2 || u_charType(c)==U_OTHER_LETTER) {
    772                 return U_HST_VOWEL_JAMO;
    773             }
    774         } else {
    775             /* Jamo T range */
    776             if(c<=0x11f9 || u_charType(c)==U_OTHER_LETTER) {
    777                 return U_HST_TRAILING_JAMO;
    778             }
    779         }
    780     } else if((c-=HANGUL_BASE)<0) {
    781         /* U_HST_NOT_APPLICABLE */
    782     } else if(c<HANGUL_COUNT) {
    783         /* Hangul syllable */
    784         return c%JAMO_T_COUNT==0 ? U_HST_LV_SYLLABLE : U_HST_LVT_SYLLABLE;
    785     }
    786     return U_HST_NOT_APPLICABLE;
    787 }
    788 
    789 U_CAPI void U_EXPORT2
    790 u_charAge(UChar32 c, UVersionInfo versionArray) {
    791     if(versionArray!=NULL) {
    792         uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT;
    793         versionArray[0]=(uint8_t)(version>>4);
    794         versionArray[1]=(uint8_t)(version&0xf);
    795         versionArray[2]=versionArray[3]=0;
    796     }
    797 }
    798 
    799 U_CAPI UScriptCode U_EXPORT2
    800 uscript_getScript(UChar32 c, UErrorCode *pErrorCode) {
    801     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
    802         return USCRIPT_INVALID_CODE;
    803     }
    804     if((uint32_t)c>0x10ffff) {
    805         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
    806         return USCRIPT_INVALID_CODE;
    807     }
    808 
    809     return (UScriptCode)(u_getUnicodeProperties(c, 0)&UPROPS_SCRIPT_MASK);
    810 }
    811 
    812 U_CAPI UBlockCode U_EXPORT2
    813 ublock_getCode(UChar32 c) {
    814     return (UBlockCode)((u_getUnicodeProperties(c, 0)&UPROPS_BLOCK_MASK)>>UPROPS_BLOCK_SHIFT);
    815 }
    816 
    817 /* property starts for UnicodeSet ------------------------------------------- */
    818 
    819 /* for Hangul_Syllable_Type */
    820 U_CFUNC void U_EXPORT2
    821 uhst_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
    822     UChar32 c;
    823     int32_t value, value2;
    824 
    825     if(U_FAILURE(*pErrorCode)) {
    826         return;
    827     }
    828 
    829 #if !UCHAR_HARDCODE_DATA
    830     if(!HAVE_DATA) {
    831         *pErrorCode=dataErrorCode;
    832         return;
    833     }
    834 #endif
    835 
    836     /* add code points with hardcoded properties, plus the ones following them */
    837 
    838     /*
    839      * Add Jamo type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE.
    840      * First, we add fixed boundaries for the blocks of Jamos.
    841      * Then we check in loops to see where the current Unicode version
    842      * actually stops assigning such Jamos. We start each loop
    843      * at the end of the per-Jamo-block assignments in Unicode 4 or earlier.
    844      * (These have not changed since Unicode 2.)
    845      */
    846     sa->add(sa->set, 0x1100);
    847     value=U_HST_LEADING_JAMO;
    848     for(c=0x115a; c<=0x115f; ++c) {
    849         value2=uchar_getHST(c);
    850         if(value!=value2) {
    851             value=value2;
    852             sa->add(sa->set, c);
    853         }
    854     }
    855 
    856     sa->add(sa->set, 0x1160);
    857     value=U_HST_VOWEL_JAMO;
    858     for(c=0x11a3; c<=0x11a7; ++c) {
    859         value2=uchar_getHST(c);
    860         if(value!=value2) {
    861             value=value2;
    862             sa->add(sa->set, c);
    863         }
    864     }
    865 
    866     sa->add(sa->set, 0x11a8);
    867     value=U_HST_TRAILING_JAMO;
    868     for(c=0x11fa; c<=0x11ff; ++c) {
    869         value2=uchar_getHST(c);
    870         if(value!=value2) {
    871             value=value2;
    872             sa->add(sa->set, c);
    873         }
    874     }
    875 
    876     /* Add Hangul type boundaries for UCHAR_HANGUL_SYLLABLE_TYPE. */
    877     for(c=HANGUL_BASE; c<(HANGUL_BASE+HANGUL_COUNT); c+=JAMO_T_COUNT) {
    878         sa->add(sa->set, c);
    879         sa->add(sa->set, c+1);
    880     }
    881     sa->add(sa->set, c);
    882 }
    883 
    884 static UBool U_CALLCONV
    885 _enumPropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
    886     /* add the start code point to the USet */
    887     const USetAdder *sa=(const USetAdder *)context;
    888     sa->add(sa->set, start);
    889     return TRUE;
    890 }
    891 
    892 #define USET_ADD_CP_AND_NEXT(sa, cp) sa->add(sa->set, cp); sa->add(sa->set, cp+1)
    893 
    894 U_CFUNC void U_EXPORT2
    895 uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
    896     if(U_FAILURE(*pErrorCode)) {
    897         return;
    898     }
    899 
    900 #if !UCHAR_HARDCODE_DATA
    901     if(!HAVE_DATA) {
    902         *pErrorCode=dataErrorCode;
    903         return;
    904     }
    905 #endif
    906 
    907     /* add the start code point of each same-value range of the main trie */
    908     utrie2_enum(&propsTrie, NULL, _enumPropertyStartsRange, sa);
    909 
    910     /* add code points with hardcoded properties, plus the ones following them */
    911 
    912     /* add for u_isblank() */
    913     USET_ADD_CP_AND_NEXT(sa, TAB);
    914 
    915     /* add for IS_THAT_CONTROL_SPACE() */
    916     sa->add(sa->set, CR+1); /* range TAB..CR */
    917     sa->add(sa->set, 0x1c);
    918     sa->add(sa->set, 0x1f+1);
    919     USET_ADD_CP_AND_NEXT(sa, NL);
    920 
    921     /* add for u_isIDIgnorable() what was not added above */
    922     sa->add(sa->set, DEL); /* range DEL..NBSP-1, NBSP added below */
    923     sa->add(sa->set, HAIRSP);
    924     sa->add(sa->set, RLM+1);
    925     sa->add(sa->set, INHSWAP);
    926     sa->add(sa->set, NOMDIG+1);
    927     USET_ADD_CP_AND_NEXT(sa, ZWNBSP);
    928 
    929     /* add no-break spaces for u_isWhitespace() what was not added above */
    930     USET_ADD_CP_AND_NEXT(sa, NBSP);
    931     USET_ADD_CP_AND_NEXT(sa, FIGURESP);
    932     USET_ADD_CP_AND_NEXT(sa, NNBSP);
    933 
    934     /* add for u_digit() */
    935     sa->add(sa->set, U_a);
    936     sa->add(sa->set, U_z+1);
    937     sa->add(sa->set, U_A);
    938     sa->add(sa->set, U_Z+1);
    939     sa->add(sa->set, U_FW_a);
    940     sa->add(sa->set, U_FW_z+1);
    941     sa->add(sa->set, U_FW_A);
    942     sa->add(sa->set, U_FW_Z+1);
    943 
    944     /* add for u_isxdigit() */
    945     sa->add(sa->set, U_f+1);
    946     sa->add(sa->set, U_F+1);
    947     sa->add(sa->set, U_FW_f+1);
    948     sa->add(sa->set, U_FW_F+1);
    949 
    950     /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */
    951     sa->add(sa->set, WJ); /* range WJ..NOMDIG */
    952     sa->add(sa->set, 0xfff0);
    953     sa->add(sa->set, 0xfffb+1);
    954     sa->add(sa->set, 0xe0000);
    955     sa->add(sa->set, 0xe0fff+1);
    956 
    957     /* add for UCHAR_GRAPHEME_BASE and others */
    958     USET_ADD_CP_AND_NEXT(sa, CGJ);
    959 }
    960 
    961 U_CFUNC void U_EXPORT2
    962 upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) {
    963     if(U_FAILURE(*pErrorCode)) {
    964         return;
    965     }
    966 
    967 #if !UCHAR_HARDCODE_DATA
    968     if(!HAVE_DATA) {
    969         *pErrorCode=dataErrorCode;
    970         return;
    971     }
    972 #endif
    973 
    974     /* add the start code point of each same-value range of the properties vectors trie */
    975     if(propsVectorsColumns>0) {
    976         /* if propsVectorsColumns==0 then the properties vectors trie may not be there at all */
    977         utrie2_enum(&propsVectorsTrie, NULL, _enumPropertyStartsRange, sa);
    978     }
    979 }
    980