1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: Alan Liu 7 * Created: October 30 2002 8 * Since: ICU 2.4 9 * 2010nov19 Markus Scherer Rewrite for formatVersion 2. 10 ********************************************************************** 11 */ 12 #include "propname.h" 13 #include "unicode/uchar.h" 14 #include "unicode/udata.h" 15 #include "umutex.h" 16 #include "cmemory.h" 17 #include "cstring.h" 18 #include "ucln_cmn.h" 19 #include "uarrsort.h" 20 #include "uinvchar.h" 21 22 #define INCLUDED_FROM_PROPNAME_CPP 23 #include "propname_data.h" 24 25 U_CDECL_BEGIN 26 27 /** 28 * Get the next non-ignorable ASCII character from a property name 29 * and lowercases it. 30 * @return ((advance count for the name)<<8)|character 31 */ 32 static inline int32_t 33 getASCIIPropertyNameChar(const char *name) { 34 int32_t i; 35 char c; 36 37 /* Ignore delimiters '-', '_', and ASCII White_Space */ 38 for(i=0; 39 (c=name[i++])==0x2d || c==0x5f || 40 c==0x20 || (0x09<=c && c<=0x0d); 41 ) {} 42 43 if(c!=0) { 44 return (i<<8)|(uint8_t)uprv_asciitolower((char)c); 45 } else { 46 return i<<8; 47 } 48 } 49 50 /** 51 * Get the next non-ignorable EBCDIC character from a property name 52 * and lowercases it. 53 * @return ((advance count for the name)<<8)|character 54 */ 55 static inline int32_t 56 getEBCDICPropertyNameChar(const char *name) { 57 int32_t i; 58 char c; 59 60 /* Ignore delimiters '-', '_', and EBCDIC White_Space */ 61 for(i=0; 62 (c=name[i++])==0x60 || c==0x6d || 63 c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d; 64 ) {} 65 66 if(c!=0) { 67 return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c); 68 } else { 69 return i<<8; 70 } 71 } 72 73 /** 74 * Unicode property names and property value names are compared "loosely". 75 * 76 * UCD.html 4.0.1 says: 77 * For all property names, property value names, and for property values for 78 * Enumerated, Binary, or Catalog properties, use the following 79 * loose matching rule: 80 * 81 * LM3. Ignore case, whitespace, underscore ('_'), and hyphens. 82 * 83 * This function does just that, for (char *) name strings. 84 * It is almost identical to ucnv_compareNames() but also ignores 85 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). 86 * 87 * @internal 88 */ 89 90 U_CAPI int32_t U_EXPORT2 91 uprv_compareASCIIPropertyNames(const char *name1, const char *name2) { 92 int32_t rc, r1, r2; 93 94 for(;;) { 95 r1=getASCIIPropertyNameChar(name1); 96 r2=getASCIIPropertyNameChar(name2); 97 98 /* If we reach the ends of both strings then they match */ 99 if(((r1|r2)&0xff)==0) { 100 return 0; 101 } 102 103 /* Compare the lowercased characters */ 104 if(r1!=r2) { 105 rc=(r1&0xff)-(r2&0xff); 106 if(rc!=0) { 107 return rc; 108 } 109 } 110 111 name1+=r1>>8; 112 name2+=r2>>8; 113 } 114 } 115 116 U_CAPI int32_t U_EXPORT2 117 uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) { 118 int32_t rc, r1, r2; 119 120 for(;;) { 121 r1=getEBCDICPropertyNameChar(name1); 122 r2=getEBCDICPropertyNameChar(name2); 123 124 /* If we reach the ends of both strings then they match */ 125 if(((r1|r2)&0xff)==0) { 126 return 0; 127 } 128 129 /* Compare the lowercased characters */ 130 if(r1!=r2) { 131 rc=(r1&0xff)-(r2&0xff); 132 if(rc!=0) { 133 return rc; 134 } 135 } 136 137 name1+=r1>>8; 138 name2+=r2>>8; 139 } 140 } 141 142 U_CDECL_END 143 144 U_NAMESPACE_BEGIN 145 146 int32_t PropNameData::findProperty(int32_t property) { 147 int32_t i=1; // valueMaps index, initially after numRanges 148 for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) { 149 // Read and skip the start and limit of this range. 150 int32_t start=valueMaps[i]; 151 int32_t limit=valueMaps[i+1]; 152 i+=2; 153 if(property<start) { 154 break; 155 } 156 if(property<limit) { 157 return i+(property-start)*2; 158 } 159 i+=(limit-start)*2; // Skip all entries for this range. 160 } 161 return 0; 162 } 163 164 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) { 165 if(valueMapIndex==0) { 166 return 0; // The property does not have named values. 167 } 168 ++valueMapIndex; // Skip the BytesTrie offset. 169 int32_t numRanges=valueMaps[valueMapIndex++]; 170 if(numRanges<0x10) { 171 // Ranges of values. 172 for(; numRanges>0; --numRanges) { 173 // Read and skip the start and limit of this range. 174 int32_t start=valueMaps[valueMapIndex]; 175 int32_t limit=valueMaps[valueMapIndex+1]; 176 valueMapIndex+=2; 177 if(value<start) { 178 break; 179 } 180 if(value<limit) { 181 return valueMaps[valueMapIndex+value-start]; 182 } 183 valueMapIndex+=limit-start; // Skip all entries for this range. 184 } 185 } else { 186 // List of values. 187 int32_t valuesStart=valueMapIndex; 188 int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10; 189 do { 190 int32_t v=valueMaps[valueMapIndex]; 191 if(value<v) { 192 break; 193 } 194 if(value==v) { 195 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart]; 196 } 197 } while(++valueMapIndex<nameGroupOffsetsStart); 198 } 199 return 0; 200 } 201 202 const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) { 203 int32_t numNames=*nameGroup++; 204 if(nameIndex<0 || numNames<=nameIndex) { 205 return NULL; 206 } 207 // Skip nameIndex names. 208 for(; nameIndex>0; --nameIndex) { 209 nameGroup=uprv_strchr(nameGroup, 0)+1; 210 } 211 if(*nameGroup==0) { 212 return NULL; // no name (Property[Value]Aliases.txt has "n/a") 213 } 214 return nameGroup; 215 } 216 217 UBool PropNameData::containsName(BytesTrie &trie, const char *name) { 218 if(name==NULL) { 219 return FALSE; 220 } 221 UStringTrieResult result=USTRINGTRIE_NO_VALUE; 222 char c; 223 while((c=*name++)!=0) { 224 c=uprv_invCharToLowercaseAscii(c); 225 // Ignore delimiters '-', '_', and ASCII White_Space. 226 if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) { 227 continue; 228 } 229 if(!USTRINGTRIE_HAS_NEXT(result)) { 230 return FALSE; 231 } 232 result=trie.next((uint8_t)c); 233 } 234 return USTRINGTRIE_HAS_VALUE(result); 235 } 236 237 const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) { 238 int32_t valueMapIndex=findProperty(property); 239 if(valueMapIndex==0) { 240 return NULL; // Not a known property. 241 } 242 return getName(nameGroups+valueMaps[valueMapIndex], nameChoice); 243 } 244 245 const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) { 246 int32_t valueMapIndex=findProperty(property); 247 if(valueMapIndex==0) { 248 return NULL; // Not a known property. 249 } 250 int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value); 251 if(nameGroupOffset==0) { 252 return NULL; 253 } 254 return getName(nameGroups+nameGroupOffset, nameChoice); 255 } 256 257 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) { 258 BytesTrie trie(bytesTries+bytesTrieOffset); 259 if(containsName(trie, alias)) { 260 return trie.getValue(); 261 } else { 262 return UCHAR_INVALID_CODE; 263 } 264 } 265 266 int32_t PropNameData::getPropertyEnum(const char *alias) { 267 return getPropertyOrValueEnum(0, alias); 268 } 269 270 int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) { 271 int32_t valueMapIndex=findProperty(property); 272 if(valueMapIndex==0) { 273 return UCHAR_INVALID_CODE; // Not a known property. 274 } 275 valueMapIndex=valueMaps[valueMapIndex+1]; 276 if(valueMapIndex==0) { 277 return UCHAR_INVALID_CODE; // The property does not have named values. 278 } 279 // valueMapIndex is the start of the property's valueMap, 280 // where the first word is the BytesTrie offset. 281 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); 282 } 283 U_NAMESPACE_END 284 285 //---------------------------------------------------------------------- 286 // Public API implementation 287 288 U_CAPI const char* U_EXPORT2 289 u_getPropertyName(UProperty property, 290 UPropertyNameChoice nameChoice) { 291 U_NAMESPACE_USE 292 return PropNameData::getPropertyName(property, nameChoice); 293 } 294 295 U_CAPI UProperty U_EXPORT2 296 u_getPropertyEnum(const char* alias) { 297 U_NAMESPACE_USE 298 return (UProperty)PropNameData::getPropertyEnum(alias); 299 } 300 301 U_CAPI const char* U_EXPORT2 302 u_getPropertyValueName(UProperty property, 303 int32_t value, 304 UPropertyNameChoice nameChoice) { 305 U_NAMESPACE_USE 306 return PropNameData::getPropertyValueName(property, value, nameChoice); 307 } 308 309 U_CAPI int32_t U_EXPORT2 310 u_getPropertyValueEnum(UProperty property, 311 const char* alias) { 312 U_NAMESPACE_USE 313 return PropNameData::getPropertyValueEnum(property, alias); 314 } 315