Home | History | Annotate | Download | only in impl
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  **********************************************************************
      6  * Copyright (c) 2002-2015, International Business Machines
      7  * Corporation and others.  All Rights Reserved.
      8  **********************************************************************
      9  * Author: Alan Liu
     10  * Created: November 5 2002
     11  * Since: ICU 2.4
     12  * 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
     13  **********************************************************************
     14  */
     15 
     16 package android.icu.impl;
     17 
     18 import java.io.IOException;
     19 import java.nio.ByteBuffer;
     20 import java.util.MissingResourceException;
     21 
     22 import android.icu.lang.UProperty;
     23 import android.icu.util.BytesTrie;
     24 
     25 /**
     26  * Wrapper for the pnames.icu binary data file.  This data file is
     27  * imported from icu4c.  It contains property and property value
     28  * aliases from the UCD files PropertyAliases.txt and
     29  * PropertyValueAliases.txt.  The file is built by the icu4c tool
     30  * genpname.  It must be an ASCII big-endian file to be
     31  * usable in icu4j.
     32  *
     33  * This class performs two functions.
     34  *
     35  * (1) It can import the flat binary data into usable objects.
     36  *
     37  * (2) It provides an API to access the tree of objects.
     38  *
     39  * Needless to say, this class is tightly coupled to the binary format
     40  * of icu4c's pnames.icu file.
     41  *
     42  * Each time a UPropertyAliases is constructed, the pnames.icu file is
     43  * read, parsed, and data structures assembled.  Clients should create one
     44  * singleton instance and cache it.
     45  *
     46  * @author Alan Liu
     47  * @hide Only a subset of ICU is exposed in Android
     48  */
     49 public final class UPropertyAliases {
     50     // Byte offsets from the start of the data, after the generic header.
     51     private static final int IX_VALUE_MAPS_OFFSET=0;
     52     private static final int IX_BYTE_TRIES_OFFSET=1;
     53     private static final int IX_NAME_GROUPS_OFFSET=2;
     54     private static final int IX_RESERVED3_OFFSET=3;
     55     // private static final int IX_RESERVED4_OFFSET=4;
     56     // private static final int IX_TOTAL_SIZE=5;
     57 
     58     // Other values.
     59     // private static final int IX_MAX_NAME_LENGTH=6;
     60     // private static final int IX_RESERVED7=7;
     61     // private static final int IX_COUNT=8;
     62 
     63     //----------------------------------------------------------------
     64     // Runtime data.  This is an unflattened representation of the
     65     // data in pnames.icu.
     66 
     67     private int[] valueMaps;
     68     private byte[] bytesTries;
     69     private String nameGroups;
     70 
     71     private static final class IsAcceptable implements ICUBinary.Authenticate {
     72         // @Override when we switch to Java 6
     73         @Override
     74         public boolean isDataVersionAcceptable(byte version[]) {
     75             return version[0]==2;
     76         }
     77     }
     78     private static final IsAcceptable IS_ACCEPTABLE=new IsAcceptable();
     79     private static final int DATA_FORMAT=0x706E616D;  // "pnam"
     80 
     81     private void load(ByteBuffer bytes) throws IOException {
     82         //dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
     83         ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
     84         int indexesLength=bytes.getInt()/4;  // inIndexes[IX_VALUE_MAPS_OFFSET]/4
     85         if(indexesLength<8) {  // formatVersion 2 initially has 8 indexes
     86             throw new IOException("pnames.icu: not enough indexes");
     87         }
     88         int[] inIndexes=new int[indexesLength];
     89         inIndexes[0]=indexesLength*4;
     90         for(int i=1; i<indexesLength; ++i) {
     91             inIndexes[i]=bytes.getInt();
     92         }
     93 
     94         // Read the valueMaps.
     95         int offset=inIndexes[IX_VALUE_MAPS_OFFSET];
     96         int nextOffset=inIndexes[IX_BYTE_TRIES_OFFSET];
     97         int numInts=(nextOffset-offset)/4;
     98         valueMaps=ICUBinary.getInts(bytes, numInts, 0);
     99 
    100         // Read the bytesTries.
    101         offset=nextOffset;
    102         nextOffset=inIndexes[IX_NAME_GROUPS_OFFSET];
    103         int numBytes=nextOffset-offset;
    104         bytesTries=new byte[numBytes];
    105         bytes.get(bytesTries);
    106 
    107         // Read the nameGroups and turn them from ASCII bytes into a Java String.
    108         offset=nextOffset;
    109         nextOffset=inIndexes[IX_RESERVED3_OFFSET];
    110         numBytes=nextOffset-offset;
    111         StringBuilder sb=new StringBuilder(numBytes);
    112         for(int i=0; i<numBytes; ++i) {
    113             sb.append((char)bytes.get());
    114         }
    115         nameGroups=sb.toString();
    116     }
    117 
    118     private UPropertyAliases() throws IOException {
    119         ByteBuffer bytes = ICUBinary.getRequiredData("pnames.icu");
    120         load(bytes);
    121     }
    122 
    123     private int findProperty(int property) {
    124         int i=1;  // valueMaps index, initially after numRanges
    125         for(int numRanges=valueMaps[0]; numRanges>0; --numRanges) {
    126             // Read and skip the start and limit of this range.
    127             int start=valueMaps[i];
    128             int limit=valueMaps[i+1];
    129             i+=2;
    130             if(property<start) {
    131                 break;
    132             }
    133             if(property<limit) {
    134                 return i+(property-start)*2;
    135             }
    136             i+=(limit-start)*2;  // Skip all entries for this range.
    137         }
    138         return 0;
    139     }
    140 
    141     private int findPropertyValueNameGroup(int valueMapIndex, int value) {
    142         if(valueMapIndex==0) {
    143             return 0;  // The property does not have named values.
    144         }
    145         ++valueMapIndex;  // Skip the BytesTrie offset.
    146         int numRanges=valueMaps[valueMapIndex++];
    147         if(numRanges<0x10) {
    148             // Ranges of values.
    149             for(; numRanges>0; --numRanges) {
    150                 // Read and skip the start and limit of this range.
    151                 int start=valueMaps[valueMapIndex];
    152                 int limit=valueMaps[valueMapIndex+1];
    153                 valueMapIndex+=2;
    154                 if(value<start) {
    155                     break;
    156                 }
    157                 if(value<limit) {
    158                     return valueMaps[valueMapIndex+value-start];
    159                 }
    160                 valueMapIndex+=limit-start;  // Skip all entries for this range.
    161             }
    162         } else {
    163             // List of values.
    164             int valuesStart=valueMapIndex;
    165             int nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
    166             do {
    167                 int v=valueMaps[valueMapIndex];
    168                 if(value<v) {
    169                     break;
    170                 }
    171                 if(value==v) {
    172                     return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
    173                 }
    174             } while(++valueMapIndex<nameGroupOffsetsStart);
    175         }
    176         return 0;
    177     }
    178 
    179     private String getName(int nameGroupsIndex, int nameIndex) {
    180         int numNames=nameGroups.charAt(nameGroupsIndex++);
    181         if(nameIndex<0 || numNames<=nameIndex) {
    182             throw new IllegalIcuArgumentException("Invalid property (value) name choice");
    183         }
    184         // Skip nameIndex names.
    185         for(; nameIndex>0; --nameIndex) {
    186             while(0!=nameGroups.charAt(nameGroupsIndex++)) {}
    187         }
    188         // Find the end of this name.
    189         int nameStart=nameGroupsIndex;
    190         while(0!=nameGroups.charAt(nameGroupsIndex)) {
    191             ++nameGroupsIndex;
    192         }
    193         if(nameStart==nameGroupsIndex) {
    194             return null;  // no name (Property[Value]Aliases.txt has "n/a")
    195         }
    196         return nameGroups.substring(nameStart, nameGroupsIndex);
    197     }
    198 
    199     private static int asciiToLowercase(int c) {
    200         return 'A'<=c && c<='Z' ? c+0x20 : c;
    201     }
    202 
    203     private boolean containsName(BytesTrie trie, CharSequence name) {
    204         BytesTrie.Result result=BytesTrie.Result.NO_VALUE;
    205         for(int i=0; i<name.length(); ++i) {
    206             int c=name.charAt(i);
    207             // Ignore delimiters '-', '_', and ASCII White_Space.
    208             if(c=='-' || c=='_' || c==' ' || (0x09<=c && c<=0x0d)) {
    209                 continue;
    210             }
    211             if(!result.hasNext()) {
    212                 return false;
    213             }
    214             c=asciiToLowercase(c);
    215             result=trie.next(c);
    216         }
    217         return result.hasValue();
    218     }
    219 
    220     //----------------------------------------------------------------
    221     // Public API
    222 
    223     public static final UPropertyAliases INSTANCE;
    224 
    225     static {
    226         try {
    227             INSTANCE = new UPropertyAliases();
    228         } catch(IOException e) {
    229             ///CLOVER:OFF
    230             MissingResourceException mre = new MissingResourceException(
    231                     "Could not construct UPropertyAliases. Missing pnames.icu", "", "");
    232             mre.initCause(e);
    233             throw mre;
    234             ///CLOVER:ON
    235         }
    236     }
    237 
    238     /**
    239      * Returns a property name given a property enum.
    240      * Multiple names may be available for each property;
    241      * the nameChoice selects among them.
    242      */
    243     public String getPropertyName(int property, int nameChoice) {
    244         int valueMapIndex=findProperty(property);
    245         if(valueMapIndex==0) {
    246             throw new IllegalArgumentException(
    247                     "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
    248         }
    249         return getName(valueMaps[valueMapIndex], nameChoice);
    250     }
    251 
    252     /**
    253      * Returns a value name given a property enum and a value enum.
    254      * Multiple names may be available for each value;
    255      * the nameChoice selects among them.
    256      */
    257     public String getPropertyValueName(int property, int value, int nameChoice) {
    258         int valueMapIndex=findProperty(property);
    259         if(valueMapIndex==0) {
    260             throw new IllegalArgumentException(
    261                     "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
    262         }
    263         int nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
    264         if(nameGroupOffset==0) {
    265             throw new IllegalArgumentException(
    266                     "Property "+property+" (0x"+Integer.toHexString(property)+
    267                     ") does not have named values");
    268         }
    269         return getName(nameGroupOffset, nameChoice);
    270     }
    271 
    272     private int getPropertyOrValueEnum(int bytesTrieOffset, CharSequence alias) {
    273         BytesTrie trie=new BytesTrie(bytesTries, bytesTrieOffset);
    274         if(containsName(trie, alias)) {
    275             return trie.getValue();
    276         } else {
    277             return UProperty.UNDEFINED;
    278         }
    279     }
    280 
    281     /**
    282      * Returns a property enum given one of its property names.
    283      * If the property name is not known, this method returns
    284      * UProperty.UNDEFINED.
    285      */
    286     public int getPropertyEnum(CharSequence alias) {
    287         return getPropertyOrValueEnum(0, alias);
    288     }
    289 
    290     /**
    291      * Returns a value enum given a property enum and one of its value names.
    292      */
    293     public int getPropertyValueEnum(int property, CharSequence alias) {
    294         int valueMapIndex=findProperty(property);
    295         if(valueMapIndex==0) {
    296             throw new IllegalArgumentException(
    297                     "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
    298         }
    299         valueMapIndex=valueMaps[valueMapIndex+1];
    300         if(valueMapIndex==0) {
    301             throw new IllegalArgumentException(
    302                     "Property "+property+" (0x"+Integer.toHexString(property)+
    303                     ") does not have named values");
    304         }
    305         // valueMapIndex is the start of the property's valueMap,
    306         // where the first word is the BytesTrie offset.
    307         return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
    308     }
    309 
    310     /**
    311      * Returns a value enum given a property enum and one of its value names. Does not throw.
    312      * @return value enum, or UProperty.UNDEFINED if not defined for that property
    313      */
    314     public int getPropertyValueEnumNoThrow(int property, CharSequence alias) {
    315         int valueMapIndex=findProperty(property);
    316         if(valueMapIndex==0) {
    317             return UProperty.UNDEFINED;
    318         }
    319         valueMapIndex=valueMaps[valueMapIndex+1];
    320         if(valueMapIndex==0) {
    321             return UProperty.UNDEFINED;
    322         }
    323         // valueMapIndex is the start of the property's valueMap,
    324         // where the first word is the BytesTrie offset.
    325         return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
    326     }
    327 
    328     /**
    329      * Compare two property names, returning <0, 0, or >0.  The
    330      * comparison is that described as "loose" matching in the
    331      * Property*Aliases.txt files.
    332      */
    333     public static int compare(String stra, String strb) {
    334         // Note: This implementation is a literal copy of
    335         // uprv_comparePropertyNames.  It can probably be improved.
    336         int istra=0, istrb=0, rc;
    337         int cstra=0, cstrb=0;
    338         for (;;) {
    339             /* Ignore delimiters '-', '_', and ASCII White_Space */
    340             while (istra<stra.length()) {
    341                 cstra = stra.charAt(istra);
    342                 switch (cstra) {
    343                 case '-':  case '_':  case ' ':  case '\t':
    344                 case '\n': case 0xb/*\v*/: case '\f': case '\r':
    345                     ++istra;
    346                     continue;
    347                 }
    348                 break;
    349             }
    350 
    351             while (istrb<strb.length()) {
    352                 cstrb = strb.charAt(istrb);
    353                 switch (cstrb) {
    354                 case '-':  case '_':  case ' ':  case '\t':
    355                 case '\n': case 0xb/*\v*/: case '\f': case '\r':
    356                     ++istrb;
    357                     continue;
    358                 }
    359                 break;
    360             }
    361 
    362             /* If we reach the ends of both strings then they match */
    363             boolean endstra = istra==stra.length();
    364             boolean endstrb = istrb==strb.length();
    365             if (endstra) {
    366                 if (endstrb) return 0;
    367                 cstra = 0;
    368             } else if (endstrb) {
    369                 cstrb = 0;
    370             }
    371 
    372             rc = asciiToLowercase(cstra) - asciiToLowercase(cstrb);
    373             if (rc != 0) {
    374                 return rc;
    375             }
    376 
    377             ++istra;
    378             ++istrb;
    379         }
    380     }
    381 }
    382