1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ********************************************************************** 5 * Copyright (c) 2002-2015, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * Author: Alan Liu 9 * Created: November 5 2002 10 * Since: ICU 2.4 11 * 2010nov19 Markus Scherer Rewrite for formatVersion 2. 12 ********************************************************************** 13 */ 14 15 package com.ibm.icu.impl; 16 17 import java.io.IOException; 18 import java.nio.ByteBuffer; 19 import java.util.MissingResourceException; 20 21 import com.ibm.icu.lang.UProperty; 22 import com.ibm.icu.util.BytesTrie; 23 24 /** 25 * Wrapper for the pnames.icu binary data file. This data file is 26 * imported from icu4c. It contains property and property value 27 * aliases from the UCD files PropertyAliases.txt and 28 * PropertyValueAliases.txt. The file is built by the icu4c tool 29 * genpname. It must be an ASCII big-endian file to be 30 * usable in icu4j. 31 * 32 * This class performs two functions. 33 * 34 * (1) It can import the flat binary data into usable objects. 35 * 36 * (2) It provides an API to access the tree of objects. 37 * 38 * Needless to say, this class is tightly coupled to the binary format 39 * of icu4c's pnames.icu file. 40 * 41 * Each time a UPropertyAliases is constructed, the pnames.icu file is 42 * read, parsed, and data structures assembled. Clients should create one 43 * singleton instance and cache it. 44 * 45 * @author Alan Liu 46 * @since ICU 2.4 47 */ 48 public final class UPropertyAliases { 49 // Byte offsets from the start of the data, after the generic header. 50 private static final int IX_VALUE_MAPS_OFFSET=0; 51 private static final int IX_BYTE_TRIES_OFFSET=1; 52 private static final int IX_NAME_GROUPS_OFFSET=2; 53 private static final int IX_RESERVED3_OFFSET=3; 54 // private static final int IX_RESERVED4_OFFSET=4; 55 // private static final int IX_TOTAL_SIZE=5; 56 57 // Other values. 58 // private static final int IX_MAX_NAME_LENGTH=6; 59 // private static final int IX_RESERVED7=7; 60 // private static final int IX_COUNT=8; 61 62 //---------------------------------------------------------------- 63 // Runtime data. This is an unflattened representation of the 64 // data in pnames.icu. 65 66 private int[] valueMaps; 67 private byte[] bytesTries; 68 private String nameGroups; 69 70 private static final class IsAcceptable implements ICUBinary.Authenticate { 71 // @Override when we switch to Java 6 72 @Override 73 public boolean isDataVersionAcceptable(byte version[]) { 74 return version[0]==2; 75 } 76 } 77 private static final IsAcceptable IS_ACCEPTABLE=new IsAcceptable(); 78 private static final int DATA_FORMAT=0x706E616D; // "pnam" 79 80 private void load(ByteBuffer bytes) throws IOException { 81 //dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE); 82 ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE); 83 int indexesLength=bytes.getInt()/4; // inIndexes[IX_VALUE_MAPS_OFFSET]/4 84 if(indexesLength<8) { // formatVersion 2 initially has 8 indexes 85 throw new IOException("pnames.icu: not enough indexes"); 86 } 87 int[] inIndexes=new int[indexesLength]; 88 inIndexes[0]=indexesLength*4; 89 for(int i=1; i<indexesLength; ++i) { 90 inIndexes[i]=bytes.getInt(); 91 } 92 93 // Read the valueMaps. 94 int offset=inIndexes[IX_VALUE_MAPS_OFFSET]; 95 int nextOffset=inIndexes[IX_BYTE_TRIES_OFFSET]; 96 int numInts=(nextOffset-offset)/4; 97 valueMaps=ICUBinary.getInts(bytes, numInts, 0); 98 99 // Read the bytesTries. 100 offset=nextOffset; 101 nextOffset=inIndexes[IX_NAME_GROUPS_OFFSET]; 102 int numBytes=nextOffset-offset; 103 bytesTries=new byte[numBytes]; 104 bytes.get(bytesTries); 105 106 // Read the nameGroups and turn them from ASCII bytes into a Java String. 107 offset=nextOffset; 108 nextOffset=inIndexes[IX_RESERVED3_OFFSET]; 109 numBytes=nextOffset-offset; 110 StringBuilder sb=new StringBuilder(numBytes); 111 for(int i=0; i<numBytes; ++i) { 112 sb.append((char)bytes.get()); 113 } 114 nameGroups=sb.toString(); 115 } 116 117 private UPropertyAliases() throws IOException { 118 ByteBuffer bytes = ICUBinary.getRequiredData("pnames.icu"); 119 load(bytes); 120 } 121 122 private int findProperty(int property) { 123 int i=1; // valueMaps index, initially after numRanges 124 for(int numRanges=valueMaps[0]; numRanges>0; --numRanges) { 125 // Read and skip the start and limit of this range. 126 int start=valueMaps[i]; 127 int limit=valueMaps[i+1]; 128 i+=2; 129 if(property<start) { 130 break; 131 } 132 if(property<limit) { 133 return i+(property-start)*2; 134 } 135 i+=(limit-start)*2; // Skip all entries for this range. 136 } 137 return 0; 138 } 139 140 private int findPropertyValueNameGroup(int valueMapIndex, int value) { 141 if(valueMapIndex==0) { 142 return 0; // The property does not have named values. 143 } 144 ++valueMapIndex; // Skip the BytesTrie offset. 145 int numRanges=valueMaps[valueMapIndex++]; 146 if(numRanges<0x10) { 147 // Ranges of values. 148 for(; numRanges>0; --numRanges) { 149 // Read and skip the start and limit of this range. 150 int start=valueMaps[valueMapIndex]; 151 int limit=valueMaps[valueMapIndex+1]; 152 valueMapIndex+=2; 153 if(value<start) { 154 break; 155 } 156 if(value<limit) { 157 return valueMaps[valueMapIndex+value-start]; 158 } 159 valueMapIndex+=limit-start; // Skip all entries for this range. 160 } 161 } else { 162 // List of values. 163 int valuesStart=valueMapIndex; 164 int nameGroupOffsetsStart=valueMapIndex+numRanges-0x10; 165 do { 166 int v=valueMaps[valueMapIndex]; 167 if(value<v) { 168 break; 169 } 170 if(value==v) { 171 return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart]; 172 } 173 } while(++valueMapIndex<nameGroupOffsetsStart); 174 } 175 return 0; 176 } 177 178 private String getName(int nameGroupsIndex, int nameIndex) { 179 int numNames=nameGroups.charAt(nameGroupsIndex++); 180 if(nameIndex<0 || numNames<=nameIndex) { 181 throw new IllegalIcuArgumentException("Invalid property (value) name choice"); 182 } 183 // Skip nameIndex names. 184 for(; nameIndex>0; --nameIndex) { 185 while(0!=nameGroups.charAt(nameGroupsIndex++)) {} 186 } 187 // Find the end of this name. 188 int nameStart=nameGroupsIndex; 189 while(0!=nameGroups.charAt(nameGroupsIndex)) { 190 ++nameGroupsIndex; 191 } 192 if(nameStart==nameGroupsIndex) { 193 return null; // no name (Property[Value]Aliases.txt has "n/a") 194 } 195 return nameGroups.substring(nameStart, nameGroupsIndex); 196 } 197 198 private static int asciiToLowercase(int c) { 199 return 'A'<=c && c<='Z' ? c+0x20 : c; 200 } 201 202 private boolean containsName(BytesTrie trie, CharSequence name) { 203 BytesTrie.Result result=BytesTrie.Result.NO_VALUE; 204 for(int i=0; i<name.length(); ++i) { 205 int c=name.charAt(i); 206 // Ignore delimiters '-', '_', and ASCII White_Space. 207 if(c=='-' || c=='_' || c==' ' || (0x09<=c && c<=0x0d)) { 208 continue; 209 } 210 if(!result.hasNext()) { 211 return false; 212 } 213 c=asciiToLowercase(c); 214 result=trie.next(c); 215 } 216 return result.hasValue(); 217 } 218 219 //---------------------------------------------------------------- 220 // Public API 221 222 public static final UPropertyAliases INSTANCE; 223 224 static { 225 try { 226 INSTANCE = new UPropertyAliases(); 227 } catch(IOException e) { 228 ///CLOVER:OFF 229 MissingResourceException mre = new MissingResourceException( 230 "Could not construct UPropertyAliases. Missing pnames.icu", "", ""); 231 mre.initCause(e); 232 throw mre; 233 ///CLOVER:ON 234 } 235 } 236 237 /** 238 * Returns a property name given a property enum. 239 * Multiple names may be available for each property; 240 * the nameChoice selects among them. 241 */ 242 public String getPropertyName(int property, int nameChoice) { 243 int valueMapIndex=findProperty(property); 244 if(valueMapIndex==0) { 245 throw new IllegalArgumentException( 246 "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")"); 247 } 248 return getName(valueMaps[valueMapIndex], nameChoice); 249 } 250 251 /** 252 * Returns a value name given a property enum and a value enum. 253 * Multiple names may be available for each value; 254 * the nameChoice selects among them. 255 */ 256 public String getPropertyValueName(int property, int value, int nameChoice) { 257 int valueMapIndex=findProperty(property); 258 if(valueMapIndex==0) { 259 throw new IllegalArgumentException( 260 "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")"); 261 } 262 int nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value); 263 if(nameGroupOffset==0) { 264 throw new IllegalArgumentException( 265 "Property "+property+" (0x"+Integer.toHexString(property)+ 266 ") does not have named values"); 267 } 268 return getName(nameGroupOffset, nameChoice); 269 } 270 271 private int getPropertyOrValueEnum(int bytesTrieOffset, CharSequence alias) { 272 BytesTrie trie=new BytesTrie(bytesTries, bytesTrieOffset); 273 if(containsName(trie, alias)) { 274 return trie.getValue(); 275 } else { 276 return UProperty.UNDEFINED; 277 } 278 } 279 280 /** 281 * Returns a property enum given one of its property names. 282 * If the property name is not known, this method returns 283 * UProperty.UNDEFINED. 284 */ 285 public int getPropertyEnum(CharSequence alias) { 286 return getPropertyOrValueEnum(0, alias); 287 } 288 289 /** 290 * Returns a value enum given a property enum and one of its value names. 291 */ 292 public int getPropertyValueEnum(int property, CharSequence alias) { 293 int valueMapIndex=findProperty(property); 294 if(valueMapIndex==0) { 295 throw new IllegalArgumentException( 296 "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")"); 297 } 298 valueMapIndex=valueMaps[valueMapIndex+1]; 299 if(valueMapIndex==0) { 300 throw new IllegalArgumentException( 301 "Property "+property+" (0x"+Integer.toHexString(property)+ 302 ") does not have named values"); 303 } 304 // valueMapIndex is the start of the property's valueMap, 305 // where the first word is the BytesTrie offset. 306 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); 307 } 308 309 /** 310 * Returns a value enum given a property enum and one of its value names. Does not throw. 311 * @return value enum, or UProperty.UNDEFINED if not defined for that property 312 */ 313 public int getPropertyValueEnumNoThrow(int property, CharSequence alias) { 314 int valueMapIndex=findProperty(property); 315 if(valueMapIndex==0) { 316 return UProperty.UNDEFINED; 317 } 318 valueMapIndex=valueMaps[valueMapIndex+1]; 319 if(valueMapIndex==0) { 320 return UProperty.UNDEFINED; 321 } 322 // valueMapIndex is the start of the property's valueMap, 323 // where the first word is the BytesTrie offset. 324 return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias); 325 } 326 327 /** 328 * Compare two property names, returning <0, 0, or >0. The 329 * comparison is that described as "loose" matching in the 330 * Property*Aliases.txt files. 331 */ 332 public static int compare(String stra, String strb) { 333 // Note: This implementation is a literal copy of 334 // uprv_comparePropertyNames. It can probably be improved. 335 int istra=0, istrb=0, rc; 336 int cstra=0, cstrb=0; 337 for (;;) { 338 /* Ignore delimiters '-', '_', and ASCII White_Space */ 339 while (istra<stra.length()) { 340 cstra = stra.charAt(istra); 341 switch (cstra) { 342 case '-': case '_': case ' ': case '\t': 343 case '\n': case 0xb/*\v*/: case '\f': case '\r': 344 ++istra; 345 continue; 346 } 347 break; 348 } 349 350 while (istrb<strb.length()) { 351 cstrb = strb.charAt(istrb); 352 switch (cstrb) { 353 case '-': case '_': case ' ': case '\t': 354 case '\n': case 0xb/*\v*/: case '\f': case '\r': 355 ++istrb; 356 continue; 357 } 358 break; 359 } 360 361 /* If we reach the ends of both strings then they match */ 362 boolean endstra = istra==stra.length(); 363 boolean endstrb = istrb==strb.length(); 364 if (endstra) { 365 if (endstrb) return 0; 366 cstra = 0; 367 } else if (endstrb) { 368 cstrb = 0; 369 } 370 371 rc = asciiToLowercase(cstra) - asciiToLowercase(cstrb); 372 if (rc != 0) { 373 return rc; 374 } 375 376 ++istra; 377 ++istrb; 378 } 379 } 380 } 381