1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /* 5 ********************************************************************** 6 * Copyright (c) 2002-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ********************************************************************** 9 * Author: Alan Liu 10 * Created: November 5 2002 11 * Since: ICU 2.4 12 * 2010nov19 Markus Scherer Rewrite for formatVersion 2. 13 ********************************************************************** 14 */ 15 16 package android.icu.impl; 17 18 import java.io.IOException; 19 import java.nio.ByteBuffer; 20 import java.util.MissingResourceException; 21 22 import android.icu.lang.UProperty; 23 import android.icu.util.BytesTrie; 24 25 /** 26 * Wrapper for the pnames.icu binary data file. This data file is 27 * imported from icu4c. It contains property and property value 28 * aliases from the UCD files PropertyAliases.txt and 29 * PropertyValueAliases.txt. The file is built by the icu4c tool 30 * genpname. It must be an ASCII big-endian file to be 31 * usable in icu4j. 32 * 33 * This class performs two functions. 34 * 35 * (1) It can import the flat binary data into usable objects. 36 * 37 * (2) It provides an API to access the tree of objects. 38 * 39 * Needless to say, this class is tightly coupled to the binary format 40 * of icu4c's pnames.icu file. 41 * 42 * Each time a UPropertyAliases is constructed, the pnames.icu file is 43 * read, parsed, and data structures assembled. Clients should create one 44 * singleton instance and cache it. 45 * 46 * @author Alan Liu 47 * @hide Only a subset of ICU is exposed in Android 48 */ 49 public final class UPropertyAliases { 50 // Byte offsets from the start of the data, after the generic header. 51 private static final int IX_VALUE_MAPS_OFFSET=0; 52 private static final int IX_BYTE_TRIES_OFFSET=1; 53 private static final int IX_NAME_GROUPS_OFFSET=2; 54 private static final int IX_RESERVED3_OFFSET=3; 55 // private static final int IX_RESERVED4_OFFSET=4; 56 // private static final int IX_TOTAL_SIZE=5; 57 58 // Other values. 59 // private static final int IX_MAX_NAME_LENGTH=6; 60 // private static final int IX_RESERVED7=7; 61 // private static final int IX_COUNT=8; 62 63 //---------------------------------------------------------------- 64 // Runtime data. This is an unflattened representation of the 65 // data in pnames.icu. 66 67 private int[] valueMaps; 68 private byte[] bytesTries; 69 private String nameGroups; 70 71 private static final class IsAcceptable implements ICUBinary.Authenticate { 72 // @Override when we switch to Java 6 73 @Override 74 public boolean isDataVersionAcceptable(byte version[]) { 75 return version[0]==2; 76 } 77 } 78 private static final IsAcceptable IS_ACCEPTABLE=new IsAcceptable(); 79 private static final int DATA_FORMAT=0x706E616D; // "pnam" 80 81 private void load(ByteBuffer bytes) throws IOException { 82 //dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE); 83 ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE); 84 int indexesLength=bytes.getInt()/4; // inIndexes[IX_VALUE_MAPS_OFFSET]/4 85 if(indexesLength<8) { // formatVersion 2 initially has 8 indexes 86 throw new IOException("pnames.icu: not enough indexes"); 87 } 88 int[] inIndexes=new int[indexesLength]; 89 inIndexes[0]=indexesLength*4; 90 for(int i=1; i<indexesLength; ++i) { 91 inIndexes[i]=bytes.getInt(); 92 } 93 94 // Read the valueMaps. 95 int offset=inIndexes[IX_VALUE_MAPS_OFFSET]; 96 int nextOffset=inIndexes[IX_BYTE_TRIES_OFFSET]; 97 int numInts=(nextOffset-offset)/4; 98 valueMaps=ICUBinary.getInts(bytes, numInts, 0); 99 100 // Read the bytesTries. 101 offset=nextOffset; 102 nextOffset=inIndexes[IX_NAME_GROUPS_OFFSET]; 103 int numBytes=nextOffset-offset; 104 bytesTries=new byte[numBytes]; 105 bytes.get(bytesTries); 106 107 // Read the nameGroups and turn them from ASCII bytes into a Java String. 108 offset=nextOffset; 109 nextOffset=inIndexes[IX_RESERVED3_OFFSET]; 110 numBytes=nextOffset-offset; 111 StringBuilder sb=new StringBuilder(numBytes); 112 for(int i=0; i<numBytes; ++i) { 113 sb.append((char)bytes.get()); 114 } 115 nameGroups=sb.toString(); 116 } 117 118 private UPropertyAliases() throws IOException { 119 ByteBuffer bytes = ICUBinary.getRequiredData("pnames.icu"); 120 load(bytes); 121 } 122 123 private int findProperty(int property) { 124 int i=1; // valueMaps index, initially after numRanges 125 for(int numRanges=valueMaps[0]; numRanges>0; --numRanges) { 126 // Read and skip the start and limit of this range. 127 int start=valueMaps[i]; 128 int limit=valueMaps[i+1]; 129 i+=2; 130 if(property<start) { 131 break; 132 } 133 if(property<limit) { 134 return i+(property-start)*2; 135 } 136 i+=(limit-start)*2; // Skip all entries for this range. 137 } 138 return 0; 139 } 140 141 private int findPropertyValueNameGroup(int valueMapIndex, int value) { 142 if(valueMapIndex==0) { 143 return 0; // The property does not have named values. 144 } 145 ++valueMapIndex; // Skip the BytesTrie offset. 146 int numRanges=valueMaps[valueMapIndex++]; 147 if(numRanges<0x10) { 148 // Ranges of values. 149 for(; numRanges>0; --numRanges) { 150 // Read and skip the start and limit of this range. 151 int start=valueMaps[valueMapIndex]; 152 int limit=valueMaps[valueMapIndex+1]; 153 valueMapIndex+=2; 154 if(value<start) { 155 break; 156 } 157 if(value<limit) { 158 return valueMaps[valueMapIndex+value-start]; 159 } 160 valueMapIndex+=limit-start; // Skip all entries for this range. 161 } 162 } else { 163 // List of values. 164 int valuesStart=valueMapIndex; 165 int nameGroupOffsetsStart=valueMapIndex+numRanges-0x10; 166 do { 167 int v=valueMaps[valueMapIndex]; 168 if(value<v) { 169 break; 170 } 171 if(value==v) { 172 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart]; 173 } 174 } while(++valueMapIndex<nameGroupOffsetsStart); 175 } 176 return 0; 177 } 178 179 private String getName(int nameGroupsIndex, int nameIndex) { 180 int numNames=nameGroups.charAt(nameGroupsIndex++); 181 if(nameIndex<0 || numNames<=nameIndex) { 182 throw new IllegalIcuArgumentException("Invalid property (value) name choice"); 183 } 184 // Skip nameIndex names. 185 for(; nameIndex>0; --nameIndex) { 186 while(0!=nameGroups.charAt(nameGroupsIndex++)) {} 187 } 188 // Find the end of this name. 189 int nameStart=nameGroupsIndex; 190 while(0!=nameGroups.charAt(nameGroupsIndex)) { 191 ++nameGroupsIndex; 192 } 193 if(nameStart==nameGroupsIndex) { 194 return null; // no name (Property[Value]Aliases.txt has "n/a") 195 } 196 return nameGroups.substring(nameStart, nameGroupsIndex); 197 } 198 199 private static int asciiToLowercase(int c) { 200 return 'A'<=c && c<='Z' ? c+0x20 : c; 201 } 202 203 private boolean containsName(BytesTrie trie, CharSequence name) { 204 BytesTrie.Result result=BytesTrie.Result.NO_VALUE; 205 for(int i=0; i<name.length(); ++i) { 206 int c=name.charAt(i); 207 // Ignore delimiters '-', '_', and ASCII White_Space. 208 if(c=='-' || c=='_' || c==' ' || (0x09<=c && c<=0x0d)) { 209 continue; 210 } 211 if(!result.hasNext()) { 212 return false; 213 } 214 c=asciiToLowercase(c); 215 result=trie.next(c); 216 } 217 return result.hasValue(); 218 } 219 220 //---------------------------------------------------------------- 221 // Public API 222 223 public static final UPropertyAliases INSTANCE; 224 225 static { 226 try { 227 INSTANCE = new UPropertyAliases(); 228 } catch(IOException e) { 229 ///CLOVER:OFF 230 MissingResourceException mre = new MissingResourceException( 231 "Could not construct UPropertyAliases. Missing pnames.icu", "", ""); 232 mre.initCause(e); 233 throw mre; 234 ///CLOVER:ON 235 } 236 } 237 238 /** 239 * Returns a property name given a property enum. 240 * Multiple names may be available for each property; 241 * the nameChoice selects among them. 242 */ 243 public String getPropertyName(int property, int nameChoice) { 244 int valueMapIndex=findProperty(property); 245 if(valueMapIndex==0) { 246 throw new IllegalArgumentException( 247 "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")"); 248 } 249 return getName(valueMaps[valueMapIndex], nameChoice); 250 } 251 252 /** 253 * Returns a value name given a property enum and a value enum. 254 * Multiple names may be available for each value; 255 * the nameChoice selects among them. 256 */ 257 public String getPropertyValueName(int property, int value, int nameChoice) { 258 int valueMapIndex=findProperty(property); 259 if(valueMapIndex==0) { 260 throw new IllegalArgumentException( 261 "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")"); 262 } 263 int nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value); 264 if(nameGroupOffset==0) { 265 throw new IllegalArgumentException( 266 "Property "+property+" (0x"+Integer.toHexString(property)+ 267 ") does not have named values"); 268 } 269 return getName(nameGroupOffset, nameChoice); 270 } 271 272 private int getPropertyOrValueEnum(int bytesTrieOffset, CharSequence alias) { 273 BytesTrie trie=new BytesTrie(bytesTries, bytesTrieOffset); 274 if(containsName(trie, alias)) { 275 return trie.getValue(); 276 } else { 277 return UProperty.UNDEFINED; 278 } 279 } 280 281 /** 282 * Returns a property enum given one of its property names. 283 * If the property name is not known, this method returns 284 * UProperty.UNDEFINED. 285 */ 286 public int getPropertyEnum(CharSequence alias) { 287 return getPropertyOrValueEnum(0, alias); 288 } 289 290 /** 291 * Returns a value enum given a property enum and one of its value names. 292 */ 293 public int getPropertyValueEnum(int property, CharSequence alias) { 294 int valueMapIndex=findProperty(property); 295 if(valueMapIndex==0) { 296 throw new IllegalArgumentException( 297 "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")"); 298 } 299 valueMapIndex=valueMaps[valueMapIndex+1]; 300 if(valueMapIndex==0) { 301 throw new IllegalArgumentException( 302 "Property "+property+" (0x"+Integer.toHexString(property)+ 303 ") does not have named values"); 304 } 305 // valueMapIndex is the start of the property's valueMap, 306 // where the first word is the BytesTrie offset. 307 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); 308 } 309 310 /** 311 * Returns a value enum given a property enum and one of its value names. Does not throw. 312 * @return value enum, or UProperty.UNDEFINED if not defined for that property 313 */ 314 public int getPropertyValueEnumNoThrow(int property, CharSequence alias) { 315 int valueMapIndex=findProperty(property); 316 if(valueMapIndex==0) { 317 return UProperty.UNDEFINED; 318 } 319 valueMapIndex=valueMaps[valueMapIndex+1]; 320 if(valueMapIndex==0) { 321 return UProperty.UNDEFINED; 322 } 323 // valueMapIndex is the start of the property's valueMap, 324 // where the first word is the BytesTrie offset. 325 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); 326 } 327 328 /** 329 * Compare two property names, returning <0, 0, or >0. The 330 * comparison is that described as "loose" matching in the 331 * Property*Aliases.txt files. 332 */ 333 public static int compare(String stra, String strb) { 334 // Note: This implementation is a literal copy of 335 // uprv_comparePropertyNames. It can probably be improved. 336 int istra=0, istrb=0, rc; 337 int cstra=0, cstrb=0; 338 for (;;) { 339 /* Ignore delimiters '-', '_', and ASCII White_Space */ 340 while (istra<stra.length()) { 341 cstra = stra.charAt(istra); 342 switch (cstra) { 343 case '-': case '_': case ' ': case '\t': 344 case '\n': case 0xb/*\v*/: case '\f': case '\r': 345 ++istra; 346 continue; 347 } 348 break; 349 } 350 351 while (istrb<strb.length()) { 352 cstrb = strb.charAt(istrb); 353 switch (cstrb) { 354 case '-': case '_': case ' ': case '\t': 355 case '\n': case 0xb/*\v*/: case '\f': case '\r': 356 ++istrb; 357 continue; 358 } 359 break; 360 } 361 362 /* If we reach the ends of both strings then they match */ 363 boolean endstra = istra==stra.length(); 364 boolean endstrb = istrb==strb.length(); 365 if (endstra) { 366 if (endstrb) return 0; 367 cstra = 0; 368 } else if (endstrb) { 369 cstrb = 0; 370 } 371 372 rc = asciiToLowercase(cstra) - asciiToLowercase(cstrb); 373 if (rc != 0) { 374 return rc; 375 } 376 377 ++istra; 378 ++istrb; 379 } 380 } 381 } 382