Home | History | Annotate | Download | only in impl
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html#License
      3 /*
      4  **********************************************************************
      5  * Copyright (c) 2002-2015, International Business Machines
      6  * Corporation and others.  All Rights Reserved.
      7  **********************************************************************
      8  * Author: Alan Liu
      9  * Created: November 5 2002
     10  * Since: ICU 2.4
     11  * 2010nov19 Markus Scherer  Rewrite for formatVersion 2.
     12  **********************************************************************
     13  */
     14 
     15 package com.ibm.icu.impl;
     16 
     17 import java.io.IOException;
     18 import java.nio.ByteBuffer;
     19 import java.util.MissingResourceException;
     20 
     21 import com.ibm.icu.lang.UProperty;
     22 import com.ibm.icu.util.BytesTrie;
     23 
     24 /**
     25  * Wrapper for the pnames.icu binary data file.  This data file is
     26  * imported from icu4c.  It contains property and property value
     27  * aliases from the UCD files PropertyAliases.txt and
     28  * PropertyValueAliases.txt.  The file is built by the icu4c tool
     29  * genpname.  It must be an ASCII big-endian file to be
     30  * usable in icu4j.
     31  *
     32  * This class performs two functions.
     33  *
     34  * (1) It can import the flat binary data into usable objects.
     35  *
     36  * (2) It provides an API to access the tree of objects.
     37  *
     38  * Needless to say, this class is tightly coupled to the binary format
     39  * of icu4c's pnames.icu file.
     40  *
     41  * Each time a UPropertyAliases is constructed, the pnames.icu file is
     42  * read, parsed, and data structures assembled.  Clients should create one
     43  * singleton instance and cache it.
     44  *
     45  * @author Alan Liu
     46  * @since ICU 2.4
     47  */
     48 public final class UPropertyAliases {
     49     // Byte offsets from the start of the data, after the generic header.
     50     private static final int IX_VALUE_MAPS_OFFSET=0;
     51     private static final int IX_BYTE_TRIES_OFFSET=1;
     52     private static final int IX_NAME_GROUPS_OFFSET=2;
     53     private static final int IX_RESERVED3_OFFSET=3;
     54     // private static final int IX_RESERVED4_OFFSET=4;
     55     // private static final int IX_TOTAL_SIZE=5;
     56 
     57     // Other values.
     58     // private static final int IX_MAX_NAME_LENGTH=6;
     59     // private static final int IX_RESERVED7=7;
     60     // private static final int IX_COUNT=8;
     61 
     62     //----------------------------------------------------------------
     63     // Runtime data.  This is an unflattened representation of the
     64     // data in pnames.icu.
     65 
     66     private int[] valueMaps;
     67     private byte[] bytesTries;
     68     private String nameGroups;
     69 
     70     private static final class IsAcceptable implements ICUBinary.Authenticate {
     71         // @Override when we switch to Java 6
     72         @Override
     73         public boolean isDataVersionAcceptable(byte version[]) {
     74             return version[0]==2;
     75         }
     76     }
     77     private static final IsAcceptable IS_ACCEPTABLE=new IsAcceptable();
     78     private static final int DATA_FORMAT=0x706E616D;  // "pnam"
     79 
     80     private void load(ByteBuffer bytes) throws IOException {
     81         //dataVersion=ICUBinary.readHeaderAndDataVersion(bytes, DATA_FORMAT, IS_ACCEPTABLE);
     82         ICUBinary.readHeader(bytes, DATA_FORMAT, IS_ACCEPTABLE);
     83         int indexesLength=bytes.getInt()/4;  // inIndexes[IX_VALUE_MAPS_OFFSET]/4
     84         if(indexesLength<8) {  // formatVersion 2 initially has 8 indexes
     85             throw new IOException("pnames.icu: not enough indexes");
     86         }
     87         int[] inIndexes=new int[indexesLength];
     88         inIndexes[0]=indexesLength*4;
     89         for(int i=1; i<indexesLength; ++i) {
     90             inIndexes[i]=bytes.getInt();
     91         }
     92 
     93         // Read the valueMaps.
     94         int offset=inIndexes[IX_VALUE_MAPS_OFFSET];
     95         int nextOffset=inIndexes[IX_BYTE_TRIES_OFFSET];
     96         int numInts=(nextOffset-offset)/4;
     97         valueMaps=ICUBinary.getInts(bytes, numInts, 0);
     98 
     99         // Read the bytesTries.
    100         offset=nextOffset;
    101         nextOffset=inIndexes[IX_NAME_GROUPS_OFFSET];
    102         int numBytes=nextOffset-offset;
    103         bytesTries=new byte[numBytes];
    104         bytes.get(bytesTries);
    105 
    106         // Read the nameGroups and turn them from ASCII bytes into a Java String.
    107         offset=nextOffset;
    108         nextOffset=inIndexes[IX_RESERVED3_OFFSET];
    109         numBytes=nextOffset-offset;
    110         StringBuilder sb=new StringBuilder(numBytes);
    111         for(int i=0; i<numBytes; ++i) {
    112             sb.append((char)bytes.get());
    113         }
    114         nameGroups=sb.toString();
    115     }
    116 
    117     private UPropertyAliases() throws IOException {
    118         ByteBuffer bytes = ICUBinary.getRequiredData("pnames.icu");
    119         load(bytes);
    120     }
    121 
    122     private int findProperty(int property) {
    123         int i=1;  // valueMaps index, initially after numRanges
    124         for(int numRanges=valueMaps[0]; numRanges>0; --numRanges) {
    125             // Read and skip the start and limit of this range.
    126             int start=valueMaps[i];
    127             int limit=valueMaps[i+1];
    128             i+=2;
    129             if(property<start) {
    130                 break;
    131             }
    132             if(property<limit) {
    133                 return i+(property-start)*2;
    134             }
    135             i+=(limit-start)*2;  // Skip all entries for this range.
    136         }
    137         return 0;
    138     }
    139 
    140     private int findPropertyValueNameGroup(int valueMapIndex, int value) {
    141         if(valueMapIndex==0) {
    142             return 0;  // The property does not have named values.
    143         }
    144         ++valueMapIndex;  // Skip the BytesTrie offset.
    145         int numRanges=valueMaps[valueMapIndex++];
    146         if(numRanges<0x10) {
    147             // Ranges of values.
    148             for(; numRanges>0; --numRanges) {
    149                 // Read and skip the start and limit of this range.
    150                 int start=valueMaps[valueMapIndex];
    151                 int limit=valueMaps[valueMapIndex+1];
    152                 valueMapIndex+=2;
    153                 if(value<start) {
    154                     break;
    155                 }
    156                 if(value<limit) {
    157                     return valueMaps[valueMapIndex+value-start];
    158                 }
    159                 valueMapIndex+=limit-start;  // Skip all entries for this range.
    160             }
    161         } else {
    162             // List of values.
    163             int valuesStart=valueMapIndex;
    164             int nameGroupOffsetsStart=valueMapIndex+numRanges-0x10;
    165             do {
    166                 int v=valueMaps[valueMapIndex];
    167                 if(value<v) {
    168                     break;
    169                 }
    170                 if(value==v) {
    171                     return valueMaps[nameGroupOffsetsStart+valueMapIndex-valuesStart];
    172                 }
    173             } while(++valueMapIndex<nameGroupOffsetsStart);
    174         }
    175         return 0;
    176     }
    177 
    178     private String getName(int nameGroupsIndex, int nameIndex) {
    179         int numNames=nameGroups.charAt(nameGroupsIndex++);
    180         if(nameIndex<0 || numNames<=nameIndex) {
    181             throw new IllegalIcuArgumentException("Invalid property (value) name choice");
    182         }
    183         // Skip nameIndex names.
    184         for(; nameIndex>0; --nameIndex) {
    185             while(0!=nameGroups.charAt(nameGroupsIndex++)) {}
    186         }
    187         // Find the end of this name.
    188         int nameStart=nameGroupsIndex;
    189         while(0!=nameGroups.charAt(nameGroupsIndex)) {
    190             ++nameGroupsIndex;
    191         }
    192         if(nameStart==nameGroupsIndex) {
    193             return null;  // no name (Property[Value]Aliases.txt has "n/a")
    194         }
    195         return nameGroups.substring(nameStart, nameGroupsIndex);
    196     }
    197 
    198     private static int asciiToLowercase(int c) {
    199         return 'A'<=c && c<='Z' ? c+0x20 : c;
    200     }
    201 
    202     private boolean containsName(BytesTrie trie, CharSequence name) {
    203         BytesTrie.Result result=BytesTrie.Result.NO_VALUE;
    204         for(int i=0; i<name.length(); ++i) {
    205             int c=name.charAt(i);
    206             // Ignore delimiters '-', '_', and ASCII White_Space.
    207             if(c=='-' || c=='_' || c==' ' || (0x09<=c && c<=0x0d)) {
    208                 continue;
    209             }
    210             if(!result.hasNext()) {
    211                 return false;
    212             }
    213             c=asciiToLowercase(c);
    214             result=trie.next(c);
    215         }
    216         return result.hasValue();
    217     }
    218 
    219     //----------------------------------------------------------------
    220     // Public API
    221 
    222     public static final UPropertyAliases INSTANCE;
    223 
    224     static {
    225         try {
    226             INSTANCE = new UPropertyAliases();
    227         } catch(IOException e) {
    228             ///CLOVER:OFF
    229             MissingResourceException mre = new MissingResourceException(
    230                     "Could not construct UPropertyAliases. Missing pnames.icu", "", "");
    231             mre.initCause(e);
    232             throw mre;
    233             ///CLOVER:ON
    234         }
    235     }
    236 
    237     /**
    238      * Returns a property name given a property enum.
    239      * Multiple names may be available for each property;
    240      * the nameChoice selects among them.
    241      */
    242     public String getPropertyName(int property, int nameChoice) {
    243         int valueMapIndex=findProperty(property);
    244         if(valueMapIndex==0) {
    245             throw new IllegalArgumentException(
    246                     "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
    247         }
    248         return getName(valueMaps[valueMapIndex], nameChoice);
    249     }
    250 
    251     /**
    252      * Returns a value name given a property enum and a value enum.
    253      * Multiple names may be available for each value;
    254      * the nameChoice selects among them.
    255      */
    256     public String getPropertyValueName(int property, int value, int nameChoice) {
    257         int valueMapIndex=findProperty(property);
    258         if(valueMapIndex==0) {
    259             throw new IllegalArgumentException(
    260                     "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
    261         }
    262         int nameGroupOffset=findPropertyValueNameGroup(valueMaps[valueMapIndex+1], value);
    263         if(nameGroupOffset==0) {
    264             throw new IllegalArgumentException(
    265                     "Property "+property+" (0x"+Integer.toHexString(property)+
    266                     ") does not have named values");
    267         }
    268         return getName(nameGroupOffset, nameChoice);
    269     }
    270 
    271     private int getPropertyOrValueEnum(int bytesTrieOffset, CharSequence alias) {
    272         BytesTrie trie=new BytesTrie(bytesTries, bytesTrieOffset);
    273         if(containsName(trie, alias)) {
    274             return trie.getValue();
    275         } else {
    276             return UProperty.UNDEFINED;
    277         }
    278     }
    279 
    280     /**
    281      * Returns a property enum given one of its property names.
    282      * If the property name is not known, this method returns
    283      * UProperty.UNDEFINED.
    284      */
    285     public int getPropertyEnum(CharSequence alias) {
    286         return getPropertyOrValueEnum(0, alias);
    287     }
    288 
    289     /**
    290      * Returns a value enum given a property enum and one of its value names.
    291      */
    292     public int getPropertyValueEnum(int property, CharSequence alias) {
    293         int valueMapIndex=findProperty(property);
    294         if(valueMapIndex==0) {
    295             throw new IllegalArgumentException(
    296                     "Invalid property enum "+property+" (0x"+Integer.toHexString(property)+")");
    297         }
    298         valueMapIndex=valueMaps[valueMapIndex+1];
    299         if(valueMapIndex==0) {
    300             throw new IllegalArgumentException(
    301                     "Property "+property+" (0x"+Integer.toHexString(property)+
    302                     ") does not have named values");
    303         }
    304         // valueMapIndex is the start of the property's valueMap,
    305         // where the first word is the BytesTrie offset.
    306         return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
    307     }
    308 
    309     /**
    310      * Returns a value enum given a property enum and one of its value names. Does not throw.
    311      * @return value enum, or UProperty.UNDEFINED if not defined for that property
    312      */
    313     public int getPropertyValueEnumNoThrow(int property, CharSequence alias) {
    314         int valueMapIndex=findProperty(property);
    315         if(valueMapIndex==0) {
    316             return UProperty.UNDEFINED;
    317         }
    318         valueMapIndex=valueMaps[valueMapIndex+1];
    319         if(valueMapIndex==0) {
    320             return UProperty.UNDEFINED;
    321         }
    322         // valueMapIndex is the start of the property's valueMap,
    323         // where the first word is the BytesTrie offset.
    324         return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
    325     }
    326 
    327     /**
    328      * Compare two property names, returning <0, 0, or >0.  The
    329      * comparison is that described as "loose" matching in the
    330      * Property*Aliases.txt files.
    331      */
    332     public static int compare(String stra, String strb) {
    333         // Note: This implementation is a literal copy of
    334         // uprv_comparePropertyNames.  It can probably be improved.
    335         int istra=0, istrb=0, rc;
    336         int cstra=0, cstrb=0;
    337         for (;;) {
    338             /* Ignore delimiters '-', '_', and ASCII White_Space */
    339             while (istra<stra.length()) {
    340                 cstra = stra.charAt(istra);
    341                 switch (cstra) {
    342                 case '-':  case '_':  case ' ':  case '\t':
    343                 case '\n': case 0xb/*\v*/: case '\f': case '\r':
    344                     ++istra;
    345                     continue;
    346                 }
    347                 break;
    348             }
    349 
    350             while (istrb<strb.length()) {
    351                 cstrb = strb.charAt(istrb);
    352                 switch (cstrb) {
    353                 case '-':  case '_':  case ' ':  case '\t':
    354                 case '\n': case 0xb/*\v*/: case '\f': case '\r':
    355                     ++istrb;
    356                     continue;
    357                 }
    358                 break;
    359             }
    360 
    361             /* If we reach the ends of both strings then they match */
    362             boolean endstra = istra==stra.length();
    363             boolean endstrb = istrb==strb.length();
    364             if (endstra) {
    365                 if (endstrb) return 0;
    366                 cstra = 0;
    367             } else if (endstrb) {
    368                 cstrb = 0;
    369             }
    370 
    371             rc = asciiToLowercase(cstra) - asciiToLowercase(cstrb);
    372             if (rc != 0) {
    373                 return rc;
    374             }
    375 
    376             ++istra;
    377             ++istrb;
    378         }
    379     }
    380 }
    381