1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Alan Liu 7 * Created: October 30 2002 8 * Since: ICU 2.4 9 * 2010nov19 Markus Scherer Rewrite for formatVersion 2. 10 ********************************************************************** 11 */ 12 #include "propname.h" 13 #include "unicode/uchar.h" 14 #include "unicode/udata.h" 15 #include "unicode/uscript.h" 16 #include "umutex.h" 17 #include "cmemory.h" 18 #include "cstring.h" 19 #include "ucln_cmn.h" 20 #include "uarrsort.h" 21 #include "uinvchar.h" 22 23 #define INCLUDED_FROM_PROPNAME_CPP 24 #include "propname_data.h" 25 26 U_CDECL_BEGIN 27 28 /** 29 * Get the next non-ignorable ASCII character from a property name 30 * and lowercases it. 31 * @return ((advance count for the name)<<8)|character 32 */ 33 static inline int32_t 34 getASCIIPropertyNameChar(const char *name) { 35 int32_t i; 36 char c; 37 38 /* Ignore delimiters '-', '_', and ASCII White_Space */ 39 for(i=0; 40 (c=name[i++])==0x2d || c==0x5f || 41 c==0x20 || (0x09<=c && c<=0x0d); 42 ) {} 43 44 if(c!=0) { 45 return (i<<8)|(uint8_t)uprv_asciitolower((char)c); 46 } else { 47 return i<<8; 48 } 49 } 50 51 /** 52 * Get the next non-ignorable EBCDIC character from a property name 53 * and lowercases it. 54 * @return ((advance count for the name)<<8)|character 55 */ 56 static inline int32_t 57 getEBCDICPropertyNameChar(const char *name) { 58 int32_t i; 59 char c; 60 61 /* Ignore delimiters '-', '_', and EBCDIC White_Space */ 62 for(i=0; 63 (c=name[i++])==0x60 || c==0x6d || 64 c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d; 65 ) {} 66 67 if(c!=0) { 68 return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c); 69 } else { 70 return i<<8; 71 } 72 } 73 74 /** 75 * Unicode property names and property value names are compared "loosely". 76 * 77 * UCD.html 4.0.1 says: 78 * For all property names, property value names, and for property values for 79 * Enumerated, Binary, or Catalog properties, use the following 80 * loose matching rule: 81 * 82 * LM3. Ignore case, whitespace, underscore ('_'), and hyphens. 83 * 84 * This function does just that, for (char *) name strings. 85 * It is almost identical to ucnv_compareNames() but also ignores 86 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). 87 * 88 * @internal 89 */ 90 91 U_CAPI int32_t U_EXPORT2 92 uprv_compareASCIIPropertyNames(const char *name1, const char *name2) { 93 int32_t rc, r1, r2; 94 95 for(;;) { 96 r1=getASCIIPropertyNameChar(name1); 97 r2=getASCIIPropertyNameChar(name2); 98 99 /* If we reach the ends of both strings then they match */ 100 if(((r1|r2)&0xff)==0) { 101 return 0; 102 } 103 104 /* Compare the lowercased characters */ 105 if(r1!=r2) { 106 rc=(r1&0xff)-(r2&0xff); 107 if(rc!=0) { 108 return rc; 109 } 110 } 111 112 name1+=r1>>8; 113 name2+=r2>>8; 114 } 115 } 116 117 U_CAPI int32_t U_EXPORT2 118 uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) { 119 int32_t rc, r1, r2; 120 121 for(;;) { 122 r1=getEBCDICPropertyNameChar(name1); 123 r2=getEBCDICPropertyNameChar(name2); 124 125 /* If we reach the ends of both strings then they match */ 126 if(((r1|r2)&0xff)==0) { 127 return 0; 128 } 129 130 /* Compare the lowercased characters */ 131 if(r1!=r2) { 132 rc=(r1&0xff)-(r2&0xff); 133 if(rc!=0) { 134 return rc; 135 } 136 } 137 138 name1+=r1>>8; 139 name2+=r2>>8; 140 } 141 } 142 143 U_CDECL_END 144 145 U_NAMESPACE_BEGIN 146 147 int32_t PropNameData::findProperty(int32_t property) { 148 int32_t i=1; // valueMaps index, initially after numRanges 149 for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) { 150 // Read and skip the start and limit of this range. 151 int32_t start=valueMaps[i]; 152 int32_t limit=valueMaps[i+1]; 153 i+=2; 154 if(property<start) { 155 break; 156 } 157 if(property<limit) { 158 return i+(property-start)*2; 159 } 160 i+=(limit-start)*2; // Skip all entries for this range. 161 } 162 return 0; 163 } 164 165 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) { 166 if(valueMapIndex==0) { 167 return 0; // The property does not have named values. 168 } 169 ++valueMapIndex; // Skip the BytesTrie offset. 170 int32_t numRanges=valueMaps[valueMapIndex++]; 171 if(numRanges<0x10) { 172 // Ranges of values. 173 for(; numRanges>0; --numRanges) { 174 // Read and skip the start and limit of this range. 175 int32_t start=valueMaps[valueMapIndex]; 176 int32_t limit=valueMaps[valueMapIndex+1]; 177 valueMapIndex+=2; 178 if(value<start) { 179 break; 180 } 181 if(value<limit) { 182 return valueMaps[valueMapIndex+value-start]; 183 } 184 valueMapIndex+=limit-start; // Skip all entries for this range. 185 } 186 } else { 187 // List of values. 188 int32_t valuesStart=valueMapIndex; 189 int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10; 190 do { 191 int32_t v=valueMaps[valueMapIndex]; 192 if(value<v) { 193 break; 194 } 195 if(value==v) { 196 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart]; 197 } 198 } while(++valueMapIndex<nameGroupOffsetsStart); 199 } 200 return 0; 201 } 202 203 const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) { 204 int32_t numNames=*nameGroup++; 205 if(nameIndex<0 || numNames<=nameIndex) { 206 return NULL; 207 } 208 // Skip nameIndex names. 209 for(; nameIndex>0; --nameIndex) { 210 nameGroup=uprv_strchr(nameGroup, 0)+1; 211 } 212 if(*nameGroup==0) { 213 return NULL; // no name (Property[Value]Aliases.txt has "n/a") 214 } 215 return nameGroup; 216 } 217 218 UBool PropNameData::containsName(BytesTrie &trie, const char *name) { 219 if(name==NULL) { 220 return FALSE; 221 } 222 UStringTrieResult result=USTRINGTRIE_NO_VALUE; 223 char c; 224 while((c=*name++)!=0) { 225 c=uprv_invCharToLowercaseAscii(c); 226 // Ignore delimiters '-', '_', and ASCII White_Space. 227 if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) { 228 continue; 229 } 230 if(!USTRINGTRIE_HAS_NEXT(result)) { 231 return FALSE; 232 } 233 result=trie.next((uint8_t)c); 234 } 235 return USTRINGTRIE_HAS_VALUE(result); 236 } 237 238 const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) { 239 int32_t valueMapIndex=findProperty(property); 240 if(valueMapIndex==0) { 241 return NULL; // Not a known property. 242 } 243 return getName(nameGroups+valueMaps[valueMapIndex], nameChoice); 244 } 245 246 const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) { 247 int32_t valueMapIndex=findProperty(property); 248 if(valueMapIndex==0) { 249 return NULL; // Not a known property. 250 } 251 int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value); 252 if(nameGroupOffset==0) { 253 return NULL; 254 } 255 return getName(nameGroups+nameGroupOffset, nameChoice); 256 } 257 258 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) { 259 BytesTrie trie(bytesTries+bytesTrieOffset); 260 if(containsName(trie, alias)) { 261 return trie.getValue(); 262 } else { 263 return UCHAR_INVALID_CODE; 264 } 265 } 266 267 int32_t PropNameData::getPropertyEnum(const char *alias) { 268 return getPropertyOrValueEnum(0, alias); 269 } 270 271 int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) { 272 int32_t valueMapIndex=findProperty(property); 273 if(valueMapIndex==0) { 274 return UCHAR_INVALID_CODE; // Not a known property. 275 } 276 valueMapIndex=valueMaps[valueMapIndex+1]; 277 if(valueMapIndex==0) { 278 return UCHAR_INVALID_CODE; // The property does not have named values. 279 } 280 // valueMapIndex is the start of the property's valueMap, 281 // where the first word is the BytesTrie offset. 282 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); 283 } 284 U_NAMESPACE_END 285 286 //---------------------------------------------------------------------- 287 // Public API implementation 288 289 U_CAPI const char* U_EXPORT2 290 u_getPropertyName(UProperty property, 291 UPropertyNameChoice nameChoice) { 292 U_NAMESPACE_USE 293 return PropNameData::getPropertyName(property, nameChoice); 294 } 295 296 U_CAPI UProperty U_EXPORT2 297 u_getPropertyEnum(const char* alias) { 298 U_NAMESPACE_USE 299 return (UProperty)PropNameData::getPropertyEnum(alias); 300 } 301 302 U_CAPI const char* U_EXPORT2 303 u_getPropertyValueName(UProperty property, 304 int32_t value, 305 UPropertyNameChoice nameChoice) { 306 U_NAMESPACE_USE 307 return PropNameData::getPropertyValueName(property, value, nameChoice); 308 } 309 310 U_CAPI int32_t U_EXPORT2 311 u_getPropertyValueEnum(UProperty property, 312 const char* alias) { 313 U_NAMESPACE_USE 314 return PropNameData::getPropertyValueEnum(property, alias); 315 } 316 317 U_CAPI const char* U_EXPORT2 318 uscript_getName(UScriptCode scriptCode){ 319 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, 320 U_LONG_PROPERTY_NAME); 321 } 322 323 U_CAPI const char* U_EXPORT2 324 uscript_getShortName(UScriptCode scriptCode){ 325 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, 326 U_SHORT_PROPERTY_NAME); 327 } 328