Home | History | Annotate | Download | only in common
      1 /*
      2 **********************************************************************
      3 * Copyright (c) 2002-2011, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 * Author: Alan Liu
      7 * Created: October 30 2002
      8 * Since: ICU 2.4
      9 * 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
     10 **********************************************************************
     11 */
     12 #include "propname.h"
     13 #include "unicode/uchar.h"
     14 #include "unicode/udata.h"
     15 #include "unicode/uscript.h"
     16 #include "umutex.h"
     17 #include "cmemory.h"
     18 #include "cstring.h"
     19 #include "ucln_cmn.h"
     20 #include "uarrsort.h"
     21 #include "uinvchar.h"
     22 
     23 #define INCLUDED_FROM_PROPNAME_CPP
     24 #include "propname_data.h"
     25 
     26 U_CDECL_BEGIN
     27 
     28 /**
     29  * Get the next non-ignorable ASCII character from a property name
     30  * and lowercases it.
     31  * @return ((advance count for the name)<<8)|character
     32  */
     33 static inline int32_t
     34 getASCIIPropertyNameChar(const char *name) {
     35     int32_t i;
     36     char c;
     37 
     38     /* Ignore delimiters '-', '_', and ASCII White_Space */
     39     for(i=0;
     40         (c=name[i++])==0x2d || c==0x5f ||
     41         c==0x20 || (0x09<=c && c<=0x0d);
     42     ) {}
     43 
     44     if(c!=0) {
     45         return (i<<8)|(uint8_t)uprv_asciitolower((char)c);
     46     } else {
     47         return i<<8;
     48     }
     49 }
     50 
     51 /**
     52  * Get the next non-ignorable EBCDIC character from a property name
     53  * and lowercases it.
     54  * @return ((advance count for the name)<<8)|character
     55  */
     56 static inline int32_t
     57 getEBCDICPropertyNameChar(const char *name) {
     58     int32_t i;
     59     char c;
     60 
     61     /* Ignore delimiters '-', '_', and EBCDIC White_Space */
     62     for(i=0;
     63         (c=name[i++])==0x60 || c==0x6d ||
     64         c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d;
     65     ) {}
     66 
     67     if(c!=0) {
     68         return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c);
     69     } else {
     70         return i<<8;
     71     }
     72 }
     73 
     74 /**
     75  * Unicode property names and property value names are compared "loosely".
     76  *
     77  * UCD.html 4.0.1 says:
     78  *   For all property names, property value names, and for property values for
     79  *   Enumerated, Binary, or Catalog properties, use the following
     80  *   loose matching rule:
     81  *
     82  *   LM3. Ignore case, whitespace, underscore ('_'), and hyphens.
     83  *
     84  * This function does just that, for (char *) name strings.
     85  * It is almost identical to ucnv_compareNames() but also ignores
     86  * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC).
     87  *
     88  * @internal
     89  */
     90 
     91 U_CAPI int32_t U_EXPORT2
     92 uprv_compareASCIIPropertyNames(const char *name1, const char *name2) {
     93     int32_t rc, r1, r2;
     94 
     95     for(;;) {
     96         r1=getASCIIPropertyNameChar(name1);
     97         r2=getASCIIPropertyNameChar(name2);
     98 
     99         /* If we reach the ends of both strings then they match */
    100         if(((r1|r2)&0xff)==0) {
    101             return 0;
    102         }
    103 
    104         /* Compare the lowercased characters */
    105         if(r1!=r2) {
    106             rc=(r1&0xff)-(r2&0xff);
    107             if(rc!=0) {
    108                 return rc;
    109             }
    110         }
    111 
    112         name1+=r1>>8;
    113         name2+=r2>>8;
    114     }
    115 }
    116 
    117 U_CAPI int32_t U_EXPORT2
    118 uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) {
    119     int32_t rc, r1, r2;
    120 
    121     for(;;) {
    122         r1=getEBCDICPropertyNameChar(name1);
    123         r2=getEBCDICPropertyNameChar(name2);
    124 
    125         /* If we reach the ends of both strings then they match */
    126         if(((r1|r2)&0xff)==0) {
    127             return 0;
    128         }
    129 
    130         /* Compare the lowercased characters */
    131         if(r1!=r2) {
    132             rc=(r1&0xff)-(r2&0xff);
    133             if(rc!=0) {
    134                 return rc;
    135             }
    136         }
    137 
    138         name1+=r1>>8;
    139         name2+=r2>>8;
    140     }
    141 }
    142 
    143 U_CDECL_END
    144 
    145 U_NAMESPACE_BEGIN
    146 
    147 int32_t PropNameData::findProperty(int32_t property) {
    148     int32_t i=1;  // valueMaps index, initially after numRanges
    149     for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) {
    150         // Read and skip the start and limit of this range.
    151         int32_t start=valueMaps[i];
    152         int32_t limit=valueMaps[i+1];
    153         i+=2;
    154         if(property<start) {
    155             break;
    156         }
    157         if(property<limit) {
    158             return i+(property-start)*2;
    159         }
    160         i+=(limit-start)*2;  // Skip all entries for this range.
    161     }
    162     return 0;
    163 }
    164 
    165 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) {
    166     if(valueMapIndex==0) {
    167         return 0;  // The property does not have named values.
    168     }
    169     ++valueMapIndex;  // Skip the BytesTrie offset.
    170     int32_t numRanges=valueMaps[valueMapIndex++];
    171     if(numRanges<0x10) {
    172         // Ranges of values.
    173         for(; numRanges>0; --numRanges) {
    174             // Read and skip the start and limit of this range.
    175             int32_t start=valueMaps[valueMapIndex];
    176             int32_t limit=valueMaps[valueMapIndex+1];
    177             valueMapIndex+=2;
    178             if(value<start) {
    179                 break;
    180             }
    181             if(value<limit) {
    182                 return valueMaps[valueMapIndex+value-start];
    183             }
    184             valueMapIndex+=limit-start;  // Skip all entries for this range.
    185         }
    186     } else {
    187         // List of values.
    188         int32_t valuesStart=valueMapIndex;
    189         int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
    190         do {
    191             int32_t v=valueMaps[valueMapIndex];
    192             if(value<v) {
    193                 break;
    194             }
    195             if(value==v) {
    196                 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
    197             }
    198         } while(++valueMapIndex<nameGroupOffsetsStart);
    199     }
    200     return 0;
    201 }
    202 
    203 const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) {
    204     int32_t numNames=*nameGroup++;
    205     if(nameIndex<0 || numNames<=nameIndex) {
    206         return NULL;
    207     }
    208     // Skip nameIndex names.
    209     for(; nameIndex>0; --nameIndex) {
    210         nameGroup=uprv_strchr(nameGroup, 0)+1;
    211     }
    212     if(*nameGroup==0) {
    213         return NULL;  // no name (Property[Value]Aliases.txt has "n/a")
    214     }
    215     return nameGroup;
    216 }
    217 
    218 UBool PropNameData::containsName(BytesTrie &trie, const char *name) {
    219     if(name==NULL) {
    220         return FALSE;
    221     }
    222     UStringTrieResult result=USTRINGTRIE_NO_VALUE;
    223     char c;
    224     while((c=*name++)!=0) {
    225         c=uprv_invCharToLowercaseAscii(c);
    226         // Ignore delimiters '-', '_', and ASCII White_Space.
    227         if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) {
    228             continue;
    229         }
    230         if(!USTRINGTRIE_HAS_NEXT(result)) {
    231             return FALSE;
    232         }
    233         result=trie.next((uint8_t)c);
    234     }
    235     return USTRINGTRIE_HAS_VALUE(result);
    236 }
    237 
    238 const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) {
    239     int32_t valueMapIndex=findProperty(property);
    240     if(valueMapIndex==0) {
    241         return NULL;  // Not a known property.
    242     }
    243     return getName(nameGroups+valueMaps[valueMapIndex], nameChoice);
    244 }
    245 
    246 const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) {
    247     int32_t valueMapIndex=findProperty(property);
    248     if(valueMapIndex==0) {
    249         return NULL;  // Not a known property.
    250     }
    251     int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
    252     if(nameGroupOffset==0) {
    253         return NULL;
    254     }
    255     return getName(nameGroups+nameGroupOffset, nameChoice);
    256 }
    257 
    258 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) {
    259     BytesTrie trie(bytesTries+bytesTrieOffset);
    260     if(containsName(trie, alias)) {
    261         return trie.getValue();
    262     } else {
    263         return UCHAR_INVALID_CODE;
    264     }
    265 }
    266 
    267 int32_t PropNameData::getPropertyEnum(const char *alias) {
    268     return getPropertyOrValueEnum(0, alias);
    269 }
    270 
    271 int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) {
    272     int32_t valueMapIndex=findProperty(property);
    273     if(valueMapIndex==0) {
    274         return UCHAR_INVALID_CODE;  // Not a known property.
    275     }
    276     valueMapIndex=valueMaps[valueMapIndex+1];
    277     if(valueMapIndex==0) {
    278         return UCHAR_INVALID_CODE;  // The property does not have named values.
    279     }
    280     // valueMapIndex is the start of the property's valueMap,
    281     // where the first word is the BytesTrie offset.
    282     return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
    283 }
    284 U_NAMESPACE_END
    285 
    286 //----------------------------------------------------------------------
    287 // Public API implementation
    288 
    289 U_CAPI const char* U_EXPORT2
    290 u_getPropertyName(UProperty property,
    291                   UPropertyNameChoice nameChoice) {
    292     U_NAMESPACE_USE
    293     return PropNameData::getPropertyName(property, nameChoice);
    294 }
    295 
    296 U_CAPI UProperty U_EXPORT2
    297 u_getPropertyEnum(const char* alias) {
    298     U_NAMESPACE_USE
    299     return (UProperty)PropNameData::getPropertyEnum(alias);
    300 }
    301 
    302 U_CAPI const char* U_EXPORT2
    303 u_getPropertyValueName(UProperty property,
    304                        int32_t value,
    305                        UPropertyNameChoice nameChoice) {
    306     U_NAMESPACE_USE
    307     return PropNameData::getPropertyValueName(property, value, nameChoice);
    308 }
    309 
    310 U_CAPI int32_t U_EXPORT2
    311 u_getPropertyValueEnum(UProperty property,
    312                        const char* alias) {
    313     U_NAMESPACE_USE
    314     return PropNameData::getPropertyValueEnum(property, alias);
    315 }
    316 
    317 U_CAPI const char*  U_EXPORT2
    318 uscript_getName(UScriptCode scriptCode){
    319     return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
    320                                   U_LONG_PROPERTY_NAME);
    321 }
    322 
    323 U_CAPI const char*  U_EXPORT2
    324 uscript_getShortName(UScriptCode scriptCode){
    325     return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode,
    326                                   U_SHORT_PROPERTY_NAME);
    327 }
    328