Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 * Copyright (c) 2002-2014, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 * Author: Alan Liu
      7 * Created: October 30 2002
      8 * Since: ICU 2.4
      9 * 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
     10 **********************************************************************
     11 */
     12 #include "propname.h"
     13 #include "unicode/uchar.h"
     14 #include "unicode/udata.h"
     15 #include "unicode/uscript.h"
     16 #include "umutex.h"
     17 #include "cmemory.h"
     18 #include "cstring.h"
     19 #include "uarrsort.h"
     20 #include "uinvchar.h"
     21 
     22 #define INCLUDED_FROM_PROPNAME_CPP
     23 #include "propname_data.h"
     24 
     25 U_CDECL_BEGIN
     26 
     27 /**
     28  * Get the next non-ignorable ASCII character from a property name
     29  * and lowercases it.
     30  * @return ((advance count for the name)<<8)|character
     31  */
     32 static inline int32_t
     33 getASCIIPropertyNameChar(const char *name) {
     34     int32_t i;
     35     char c;
     36 
     37     /* Ignore delimiters '-', '_', and ASCII White_Space */
     38     for(i=0;
     39         (c=name[i++])==0x2d || c==0x5f ||
     40         c==0x20 || (0x09<=c && c<=0x0d);
     41     ) {}
     42 
     43     if(c!=0) {
     44         return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
     45     } else {
     46         return i<<8;
     47     }
     48 }
     49 
     50 /**
     51  * Get the next non-ignorable EBCDIC character from a property name
     52  * and lowercases it.
     53  * @return ((advance count for the name)<<8)|character
     54  */
     55 static inline int32_t
     56 getEBCDICPropertyNameChar(const char *name) {
     57     int32_t i;
     58     char c;
     59 
     60     /* Ignore delimiters '-', '_', and EBCDIC White_Space */
     61     for(i=0;
     62         (c=name[i++])==0x60 || c==0x6d ||
     63         c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
     64     ) {}
     65 
     66     if(c!=0) {
     67         return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
     68     } else {
     69         return i<<8;
     70     }
     71 }
     72 
     73 /**
     74  * Unicode property names and property value names are compared "loosely".
     75  *
     76  * UCD.html 4.0.1 says:
     77  *   For all property names, property value names, and for property values for
     78  *   Enumerated, Binary, or Catalog properties, use the following
     79  *   loose matching rule:
     80  *
     81  *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
     82  *
     83  * This function does just that, for (char *) name strings.
     84  * It is almost identical to ucnv_compareNames() but also ignores
     85  * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
     86  *
     87  * @internal
     88  */
     89 
     90 U_CAPI int32_t U_EXPORT2
     91 uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
     92     int32_t rc, r1, r2;
     93 
     94     for(;;) {
     95         r1=getASCIIPropertyNameChar(name1);
     96         r2=getASCIIPropertyNameChar(name2);
     97 
     98         /* If we reach the ends of both strings then they match */
     99         if(((r1|r2)&0xff)==0) {
    100             return 0;
    101         }
    102 
    103         /* Compare the lowercased characters */
    104         if(r1!=r2) {
    105             rc=(r1&0xff)-(r2&0xff);
    106             if(rc!=0) {
    107                 return rc;
    108             }
    109         }
    110 
    111         name1+=r1>>8;
    112         name2+=r2>>8;
    113     }
    114 }
    115 
    116 U_CAPI int32_t U_EXPORT2
    117 uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
    118     int32_t rc, r1, r2;
    119 
    120     for(;;) {
    121         r1=getEBCDICPropertyNameChar(name1);
    122         r2=getEBCDICPropertyNameChar(name2);
    123 
    124         /* If we reach the ends of both strings then they match */
    125         if(((r1|r2)&0xff)==0) {
    126             return 0;
    127         }
    128 
    129         /* Compare the lowercased characters */
    130         if(r1!=r2) {
    131             rc=(r1&0xff)-(r2&0xff);
    132             if(rc!=0) {
    133                 return rc;
    134             }
    135         }
    136 
    137         name1+=r1>>8;
    138         name2+=r2>>8;
    139     }
    140 }
    141 
    142 U_CDECL_END
    143 
    144 U_NAMESPACE_BEGIN
    145 
    146 int32_t PropNameData::findProperty(int32_t property) {
    147     int32_t i=1;  // valueMaps index, initially after numRanges
    148     for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
    149         // Read and skip the start and limit of this range.
    150         int32_t start=valueMaps[i];
    151         int32_t limit=valueMaps[i+1];
    152         i+=2;
    153         if(property<start) {
    154             break;
    155         }
    156         if(property<limit) {
    157             return i+(property-start)*2;
    158         }
    159         i+=(limit-start)*2;  // Skip all entries for this range.
    160     }
    161     return 0;
    162 }
    163 
    164 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
    165     if(valueMapIndex==0) {
    166         return 0;  // The property does not have named values.
    167     }
    168     ++valueMapIndex;  // Skip the BytesTrie offset.
    169     int32_t numRanges=valueMaps[valueMapIndex++];
    170     if(numRanges<0x10) {
    171         // Ranges of values.
    172         for(; numRanges>0; --numRanges) {
    173             // Read and skip the start and limit of this range.
    174             int32_t start=valueMaps[valueMapIndex];
    175             int32_t limit=valueMaps[valueMapIndex+1];
    176             valueMapIndex+=2;
    177             if(value<start) {
    178                 break;
    179             }
    180             if(value<limit) {
    181                 return valueMaps[valueMapIndex+value-start];
    182             }
    183             valueMapIndex+=limit-start;  // Skip all entries for this range.
    184         }
    185     } else {
    186         // List of values.
    187         int32_t valuesStart=valueMapIndex;
    188         int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
    189         do {
    190             int32_t v=valueMaps[valueMapIndex];
    191             if(value<v) {
    192                 break;
    193             }
    194             if(value==v) {
    195                 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
    196             }
    197         } while(++valueMapIndex<nameGroupOffsetsStart);
    198     }
    199     return 0;
    200 }
    201 
    202 const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
    203     int32_t numNames=*nameGroup++;
    204     if(nameIndex<0 || numNames<=nameIndex) {
    205         return NULL;
    206     }
    207     // Skip nameIndex names.
    208     for(; nameIndex>0; --nameIndex) {
    209         nameGroup=uprv_strchr(nameGroup, 0)+1;
    210     }
    211     if(*nameGroup==0) {
    212         return NULL;  // no name (Property[Value]Aliases.txt has "n/a")
    213     }
    214     return nameGroup;
    215 }
    216 
    217 UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
    218     if(name==NULL) {
    219         return FALSE;
    220     }
    221     UStringTrieResult result=USTRINGTRIE_NO_VALUE;
    222     char c;
    223     while((c=*name++)!=0) {
    224         c=uprv_invCharToLowercaseAscii(c);
    225         // Ignore delimiters '-', '_', and ASCII White_Space.
    226         if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
    227             continue;
    228         }
    229         if(!USTRINGTRIE_HAS_NEXT(result)) {
    230             return FALSE;
    231         }
    232         result=trie.next((uint8_t)c);
    233     }
    234     return USTRINGTRIE_HAS_VALUE(result);
    235 }
    236 
    237 const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
    238     int32_t valueMapIndex=findProperty(property);
    239     if(valueMapIndex==0) {
    240         return NULL;  // Not a known property.
    241     }
    242     return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
    243 }
    244 
    245 const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
    246     int32_t valueMapIndex=findProperty(property);
    247     if(valueMapIndex==0) {
    248         return NULL;  // Not a known property.
    249     }
    250     int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
    251     if(nameGroupOffset==0) {
    252         return NULL;
    253     }
    254     return getName(nameGroups+nameGroupOffset, nameChoice);
    255 }
    256 
    257 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
    258     BytesTrie trie(bytesTries+bytesTrieOffset);
    259     if(containsName(trie, alias)) {
    260         return trie.getValue();
    261     } else {
    262         return UCHAR_INVALID_CODE;
    263     }
    264 }
    265 
    266 int32_t PropNameData::getPropertyEnum(const char *alias) {
    267     return getPropertyOrValueEnum(0, alias);
    268 }
    269 
    270 int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
    271     int32_t valueMapIndex=findProperty(property);
    272     if(valueMapIndex==0) {
    273         return UCHAR_INVALID_CODE;  // Not a known property.
    274     }
    275     valueMapIndex=valueMaps[valueMapIndex+1];
    276     if(valueMapIndex==0) {
    277         return UCHAR_INVALID_CODE;  // The property does not have named values.
    278     }
    279     // valueMapIndex is the start of the property's valueMap,
    280     // where the first word is the BytesTrie offset.
    281     return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
    282 }
    283 U_NAMESPACE_END
    284 
    285 //----------------------------------------------------------------------
    286 // Public API implementation
    287 
    288 U_CAPI const char* U_EXPORT2
    289 u_getPropertyName(UProperty property,
    290                   UPropertyNameChoice nameChoice) {
    291     U_NAMESPACE_USE
    292     return PropNameData::getPropertyName(property, nameChoice);
    293 }
    294 
    295 U_CAPI UProperty U_EXPORT2
    296 u_getPropertyEnum(const char* alias) {
    297     U_NAMESPACE_USE
    298     return (UProperty)PropNameData::getPropertyEnum(alias);
    299 }
    300 
    301 U_CAPI const char* U_EXPORT2
    302 u_getPropertyValueName(UProperty property,
    303                        int32_t value,
    304                        UPropertyNameChoice nameChoice) {
    305     U_NAMESPACE_USE
    306     return PropNameData::getPropertyValueName(property, value, nameChoice);
    307 }
    308 
    309 U_CAPI int32_t U_EXPORT2
    310 u_getPropertyValueEnum(UProperty property,
    311                        const char* alias) {
    312     U_NAMESPACE_USE
    313     return PropNameData::getPropertyValueEnum(property, alias);
    314 }
    315 
    316 U_CAPI const char*  U_EXPORT2
    317 uscript_getName(UScriptCode scriptCode){
    318     return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
    319                                   U_LONG_PROPERTY_NAME);
    320 }
    321 
    322 U_CAPI const char*  U_EXPORT2
    323 uscript_getShortName(UScriptCode scriptCode){
    324     return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
    325                                   U_SHORT_PROPERTY_NAME);
    326 }
    327