1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (c) 2002-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * Author: Alan Liu 9 * Created: October 30 2002 10 * Since: ICU 2.4 11 * 2010nov19 Markus Scherer Rewrite for formatVersion 2. 12 ********************************************************************** 13 */ 14 #include "propname.h" 15 #include "unicode/uchar.h" 16 #include "unicode/udata.h" 17 #include "unicode/uscript.h" 18 #include "umutex.h" 19 #include "cmemory.h" 20 #include "cstring.h" 21 #include "uarrsort.h" 22 #include "uinvchar.h" 23 24 #define INCLUDED_FROM_PROPNAME_CPP 25 #include "propname_data.h" 26 27 U_CDECL_BEGIN 28 29 /** 30 * Get the next non-ignorable ASCII character from a property name 31 * and lowercases it. 32 * @return ((advance count for the name)<<8)|character 33 */ 34 static inline int32_t 35 getASCIIPropertyNameChar(const char *name) { 36 int32_t i; 37 char c; 38 39 /* Ignore delimiters '-', '_', and ASCII White_Space */ 40 for(i=0; 41 (c=name[i++])==0x2d || c==0x5f || 42 c==0x20 || (0x09<=c && c<=0x0d); 43 ) {} 44 45 if(c!=0) { 46 return (i<<8)|(uint8_t)uprv_asciitolower((char)c); 47 } else { 48 return i<<8; 49 } 50 } 51 52 /** 53 * Get the next non-ignorable EBCDIC character from a property name 54 * and lowercases it. 55 * @return ((advance count for the name)<<8)|character 56 */ 57 static inline int32_t 58 getEBCDICPropertyNameChar(const char *name) { 59 int32_t i; 60 char c; 61 62 /* Ignore delimiters '-', '_', and EBCDIC White_Space */ 63 for(i=0; 64 (c=name[i++])==0x60 || c==0x6d || 65 c==0x40 || c==0x05 || c==0x15 || c==0x25 || c==0x0b || c==0x0c || c==0x0d; 66 ) {} 67 68 if(c!=0) { 69 return (i<<8)|(uint8_t)uprv_ebcdictolower((char)c); 70 } else { 71 return i<<8; 72 } 73 } 74 75 /** 76 * Unicode property names and property value names are compared "loosely". 77 * 78 * UCD.html 4.0.1 says: 79 * For all property names, property value names, and for property values for 80 * Enumerated, Binary, or Catalog properties, use the following 81 * loose matching rule: 82 * 83 * LM3. Ignore case, whitespace, underscore ('_'), and hyphens. 84 * 85 * This function does just that, for (char *) name strings. 86 * It is almost identical to ucnv_compareNames() but also ignores 87 * C0 White_Space characters (U+0009..U+000d, and U+0085 on EBCDIC). 88 * 89 * @internal 90 */ 91 92 U_CAPI int32_t U_EXPORT2 93 uprv_compareASCIIPropertyNames(const char *name1, const char *name2) { 94 int32_t rc, r1, r2; 95 96 for(;;) { 97 r1=getASCIIPropertyNameChar(name1); 98 r2=getASCIIPropertyNameChar(name2); 99 100 /* If we reach the ends of both strings then they match */ 101 if(((r1|r2)&0xff)==0) { 102 return 0; 103 } 104 105 /* Compare the lowercased characters */ 106 if(r1!=r2) { 107 rc=(r1&0xff)-(r2&0xff); 108 if(rc!=0) { 109 return rc; 110 } 111 } 112 113 name1+=r1>>8; 114 name2+=r2>>8; 115 } 116 } 117 118 U_CAPI int32_t U_EXPORT2 119 uprv_compareEBCDICPropertyNames(const char *name1, const char *name2) { 120 int32_t rc, r1, r2; 121 122 for(;;) { 123 r1=getEBCDICPropertyNameChar(name1); 124 r2=getEBCDICPropertyNameChar(name2); 125 126 /* If we reach the ends of both strings then they match */ 127 if(((r1|r2)&0xff)==0) { 128 return 0; 129 } 130 131 /* Compare the lowercased characters */ 132 if(r1!=r2) { 133 rc=(r1&0xff)-(r2&0xff); 134 if(rc!=0) { 135 return rc; 136 } 137 } 138 139 name1+=r1>>8; 140 name2+=r2>>8; 141 } 142 } 143 144 U_CDECL_END 145 146 U_NAMESPACE_BEGIN 147 148 int32_t PropNameData::findProperty(int32_t property) { 149 int32_t i=1; // valueMaps index, initially after numRanges 150 for(int32_t numRanges=valueMaps[0]; numRanges>0; --numRanges) { 151 // Read and skip the start and limit of this range. 152 int32_t start=valueMaps[i]; 153 int32_t limit=valueMaps[i+1]; 154 i+=2; 155 if(property<start) { 156 break; 157 } 158 if(property<limit) { 159 return i+(property-start)*2; 160 } 161 i+=(limit-start)*2; // Skip all entries for this range. 162 } 163 return 0; 164 } 165 166 int32_t PropNameData::findPropertyValueNameGroup(int32_t valueMapIndex, int32_t value) { 167 if(valueMapIndex==0) { 168 return 0; // The property does not have named values. 169 } 170 ++valueMapIndex; // Skip the BytesTrie offset. 171 int32_t numRanges=valueMaps[valueMapIndex++]; 172 if(numRanges<0x10) { 173 // Ranges of values. 174 for(; numRanges>0; --numRanges) { 175 // Read and skip the start and limit of this range. 176 int32_t start=valueMaps[valueMapIndex]; 177 int32_t limit=valueMaps[valueMapIndex+1]; 178 valueMapIndex+=2; 179 if(value<start) { 180 break; 181 } 182 if(value<limit) { 183 return valueMaps[valueMapIndex+value-start]; 184 } 185 valueMapIndex+=limit-start; // Skip all entries for this range. 186 } 187 } else { 188 // List of values. 189 int32_t valuesStart=valueMapIndex; 190 int32_t nameGroupOffsetsStart=valueMapIndex+numRanges-0x10; 191 do { 192 int32_t v=valueMaps[valueMapIndex]; 193 if(value<v) { 194 break; 195 } 196 if(value==v) { 197 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart]; 198 } 199 } while(++valueMapIndex<nameGroupOffsetsStart); 200 } 201 return 0; 202 } 203 204 const char *PropNameData::getName(const char *nameGroup, int32_t nameIndex) { 205 int32_t numNames=*nameGroup++; 206 if(nameIndex<0 || numNames<=nameIndex) { 207 return NULL; 208 } 209 // Skip nameIndex names. 210 for(; nameIndex>0; --nameIndex) { 211 nameGroup=uprv_strchr(nameGroup, 0)+1; 212 } 213 if(*nameGroup==0) { 214 return NULL; // no name (Property[Value]Aliases.txt has "n/a") 215 } 216 return nameGroup; 217 } 218 219 UBool PropNameData::containsName(BytesTrie &trie, const char *name) { 220 if(name==NULL) { 221 return FALSE; 222 } 223 UStringTrieResult result=USTRINGTRIE_NO_VALUE; 224 char c; 225 while((c=*name++)!=0) { 226 c=uprv_invCharToLowercaseAscii(c); 227 // Ignore delimiters '-', '_', and ASCII White_Space. 228 if(c==0x2d || c==0x5f || c==0x20 || (0x09<=c && c<=0x0d)) { 229 continue; 230 } 231 if(!USTRINGTRIE_HAS_NEXT(result)) { 232 return FALSE; 233 } 234 result=trie.next((uint8_t)c); 235 } 236 return USTRINGTRIE_HAS_VALUE(result); 237 } 238 239 const char *PropNameData::getPropertyName(int32_t property, int32_t nameChoice) { 240 int32_t valueMapIndex=findProperty(property); 241 if(valueMapIndex==0) { 242 return NULL; // Not a known property. 243 } 244 return getName(nameGroups+valueMaps[valueMapIndex], nameChoice); 245 } 246 247 const char *PropNameData::getPropertyValueName(int32_t property, int32_t value, int32_t nameChoice) { 248 int32_t valueMapIndex=findProperty(property); 249 if(valueMapIndex==0) { 250 return NULL; // Not a known property. 251 } 252 int32_t nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value); 253 if(nameGroupOffset==0) { 254 return NULL; 255 } 256 return getName(nameGroups+nameGroupOffset, nameChoice); 257 } 258 259 int32_t PropNameData::getPropertyOrValueEnum(int32_t bytesTrieOffset, const char *alias) { 260 BytesTrie trie(bytesTries+bytesTrieOffset); 261 if(containsName(trie, alias)) { 262 return trie.getValue(); 263 } else { 264 return UCHAR_INVALID_CODE; 265 } 266 } 267 268 int32_t PropNameData::getPropertyEnum(const char *alias) { 269 return getPropertyOrValueEnum(0, alias); 270 } 271 272 int32_t PropNameData::getPropertyValueEnum(int32_t property, const char *alias) { 273 int32_t valueMapIndex=findProperty(property); 274 if(valueMapIndex==0) { 275 return UCHAR_INVALID_CODE; // Not a known property. 276 } 277 valueMapIndex=valueMaps[valueMapIndex+1]; 278 if(valueMapIndex==0) { 279 return UCHAR_INVALID_CODE; // The property does not have named values. 280 } 281 // valueMapIndex is the start of the property's valueMap, 282 // where the first word is the BytesTrie offset. 283 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); 284 } 285 U_NAMESPACE_END 286 287 //---------------------------------------------------------------------- 288 // Public API implementation 289 290 U_CAPI const char* U_EXPORT2 291 u_getPropertyName(UProperty property, 292 UPropertyNameChoice nameChoice) { 293 U_NAMESPACE_USE 294 return PropNameData::getPropertyName(property, nameChoice); 295 } 296 297 U_CAPI UProperty U_EXPORT2 298 u_getPropertyEnum(const char* alias) { 299 U_NAMESPACE_USE 300 return (UProperty)PropNameData::getPropertyEnum(alias); 301 } 302 303 U_CAPI const char* U_EXPORT2 304 u_getPropertyValueName(UProperty property, 305 int32_t value, 306 UPropertyNameChoice nameChoice) { 307 U_NAMESPACE_USE 308 return PropNameData::getPropertyValueName(property, value, nameChoice); 309 } 310 311 U_CAPI int32_t U_EXPORT2 312 u_getPropertyValueEnum(UProperty property, 313 const char* alias) { 314 U_NAMESPACE_USE 315 return PropNameData::getPropertyValueEnum(property, alias); 316 } 317 318 U_CAPI const char* U_EXPORT2 319 uscript_getName(UScriptCode scriptCode){ 320 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, 321 U_LONG_PROPERTY_NAME); 322 } 323 324 U_CAPI const char* U_EXPORT2 325 uscript_getShortName(UScriptCode scriptCode){ 326 return u_getPropertyValueName(UCHAR_SCRIPT, scriptCode, 327 U_SHORT_PROPERTY_NAME); 328 } 329