Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /**
      5 *******************************************************************************
      6 * Copyright (C) 1996-2016, International Business Machines Corporation and
      7 * others. All Rights Reserved.
      8 *******************************************************************************
      9 */
     10 package android.icu.text;
     11 
     12 import android.icu.impl.coll.Collation;
     13 
     14 /**
     15  * A <code>CollationKey</code> represents a <code>String</code>
     16  * under the rules of a specific <code>Collator</code>
     17  * object. Comparing two <code>CollationKey</code>s returns the
     18  * relative order of the <code>String</code>s they represent.
     19  *
     20  * <p>Since the rule set of <code>Collator</code>s can differ, the
     21  * sort orders of the same string under two different
     22  * <code>Collator</code>s might differ.  Hence comparing
     23  * <code>CollationKey</code>s generated from different
     24  * <code>Collator</code>s can give incorrect results.
     25 
     26  * <p>Both the method
     27  * <code>CollationKey.compareTo(CollationKey)</code> and the method
     28  * <code>Collator.compare(String, String)</code> compare two strings
     29  * and returns their relative order.  The performance characteristics
     30  * of these two approaches can differ.
     31  * Note that collation keys are often less efficient than simply doing comparison.
     32  * For more details, see the ICU User Guide.
     33  *
     34  * <p>During the construction of a <code>CollationKey</code>, the
     35  * entire source string is examined and processed into a series of
     36  * bits terminated by a null, that are stored in the <code>CollationKey</code>.
     37  * When <code>CollationKey.compareTo(CollationKey)</code> executes, it
     38  * performs bitwise comparison on the bit sequences.  This can incurs
     39  * startup cost when creating the <code>CollationKey</code>, but once
     40  * the key is created, binary comparisons are fast.  This approach is
     41  * recommended when the same strings are to be compared over and over
     42  * again.
     43  *
     44  * <p>On the other hand, implementations of
     45  * <code>Collator.compare(String, String)</code> can examine and
     46  * process the strings only until the first characters differing in
     47  * order.  This approach is recommended if the strings are to be
     48  * compared only once.</p>
     49  *
     50  * <p>More information about the composition of the bit sequence can
     51  * be found in the
     52  * <a href="http://www.icu-project.org/userguide/Collate_ServiceArchitecture.html">
     53  * user guide</a>.</p>
     54  *
     55  * <p>The following example shows how <code>CollationKey</code>s can be used
     56  * to sort a list of <code>String</code>s.</p>
     57  * <blockquote>
     58  * <pre>
     59  * // Create an array of CollationKeys for the Strings to be sorted.
     60  * Collator myCollator = Collator.getInstance();
     61  * CollationKey[] keys = new CollationKey[3];
     62  * keys[0] = myCollator.getCollationKey("Tom");
     63  * keys[1] = myCollator.getCollationKey("Dick");
     64  * keys[2] = myCollator.getCollationKey("Harry");
     65  * sort( keys );
     66  * <br>
     67  * //...
     68  * <br>
     69  * // Inside body of sort routine, compare keys this way
     70  * if( keys[i].compareTo( keys[j] ) &gt; 0 )
     71  *    // swap keys[i] and keys[j]
     72  * <br>
     73  * //...
     74  * <br>
     75  * // Finally, when we've returned from sort.
     76  * System.out.println( keys[0].getSourceString() );
     77  * System.out.println( keys[1].getSourceString() );
     78  * System.out.println( keys[2].getSourceString() );
     79  * </pre>
     80  * </blockquote>
     81  * <p>
     82  * This class is not subclassable
     83  * @see Collator
     84  * @see RuleBasedCollator
     85  * @author Syn Wee Quek
     86  */
     87 public final class CollationKey implements Comparable<CollationKey>
     88 {
     89     // public inner classes -------------------------------------------------
     90 
     91     /**
     92      * Options that used in the API CollationKey.getBound() for getting a
     93      * CollationKey based on the bound mode requested.
     94      */
     95     public static final class BoundMode
     96     {
     97         /*
     98          * do not change the values assigned to the members of this enum.
     99          * Underlying code depends on them having these numbers
    100          */
    101 
    102         /**
    103          * Lower bound
    104          */
    105         public static final int LOWER = 0;
    106 
    107         /**
    108          * Upper bound that will match strings of exact size
    109          */
    110         public static final int UPPER = 1;
    111 
    112         /**
    113          * Upper bound that will match all the strings that have the same
    114          * initial substring as the given string
    115          */
    116         public static final int UPPER_LONG = 2;
    117 
    118         /**
    119          * One more than the highest normal BoundMode value.
    120          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
    121          * @hide unsupported on Android
    122          */
    123         @Deprecated
    124         public static final int COUNT = 3;
    125 
    126         /**
    127          * Private Constructor
    128          */
    129         ///CLOVER:OFF
    130         private BoundMode(){}
    131         ///CLOVER:ON
    132     }
    133 
    134     // public constructor ---------------------------------------------------
    135 
    136     /**
    137      * CollationKey constructor.
    138      * This constructor is given public access, unlike the JDK version, to
    139      * allow access to users extending the Collator class. See
    140      * {@link Collator#getCollationKey(String)}.
    141      * @param source string this CollationKey is to represent
    142      * @param key array of bytes that represent the collation order of argument
    143      *            source terminated by a null
    144      * @see Collator
    145      */
    146     public CollationKey(String source, byte key[])
    147     {
    148         this(source, key, -1);
    149     }
    150 
    151     /**
    152      * Private constructor, takes a length argument so it need not be lazy-evaluated.
    153      * There must be a 00 byte at key[length] and none before.
    154      */
    155     private CollationKey(String source, byte key[], int length)
    156     {
    157         m_source_ = source;
    158         m_key_ = key;
    159         m_hashCode_ = 0;
    160         m_length_ = length;
    161     }
    162 
    163     /**
    164      * CollationKey constructor that forces key to release its internal byte
    165      * array for adoption. key will have a null byte array after this
    166      * construction.
    167      * @param source string this CollationKey is to represent
    168      * @param key RawCollationKey object that represents the collation order of
    169      *            argument source.
    170      * @see Collator
    171      * @see RawCollationKey
    172      * @hide unsupported on Android
    173      */
    174     public CollationKey(String source, RawCollationKey key)
    175     {
    176         m_source_ = source;
    177         m_length_ = key.size - 1;
    178         m_key_ = key.releaseBytes();
    179         assert m_key_[m_length_] == 0;
    180         m_hashCode_ = 0;
    181     }
    182 
    183     // public getters -------------------------------------------------------
    184 
    185     /**
    186      * Return the source string that this CollationKey represents.
    187      * @return source string that this CollationKey represents
    188      */
    189     public String getSourceString()
    190     {
    191         return m_source_;
    192     }
    193 
    194     /**
    195      * Duplicates and returns the value of this CollationKey as a sequence
    196      * of big-endian bytes terminated by a null.
    197      *
    198      * <p>If two CollationKeys can be legitimately compared, then one can
    199      * compare the byte arrays of each to obtain the same result, e.g.
    200      * <pre>
    201      * byte key1[] = collationkey1.toByteArray();
    202      * byte key2[] = collationkey2.toByteArray();
    203      * int key, targetkey;
    204      * int i = 0;
    205      * do {
    206      *       key = key1[i] &amp; 0xFF;
    207      *     targetkey = key2[i] &amp; 0xFF;
    208      *     if (key &lt; targetkey) {
    209      *         System.out.println("String 1 is less than string 2");
    210      *         return;
    211      *     }
    212      *     if (targetkey &lt; key) {
    213      *         System.out.println("String 1 is more than string 2");
    214      *     }
    215      *     i ++;
    216      * } while (key != 0 &amp;&amp; targetKey != 0);
    217      *
    218      * System.out.println("Strings are equal.");
    219      * </pre>
    220      *
    221      * @return CollationKey value in a sequence of big-endian byte bytes
    222      *         terminated by a null.
    223      */
    224     public byte[] toByteArray()
    225     {
    226         int length = getLength() + 1;
    227         byte result[] = new byte[length];
    228         System.arraycopy(m_key_, 0, result, 0, length);
    229         return result;
    230     }
    231 
    232     // public other methods -------------------------------------------------
    233 
    234     /**
    235      * Compare this CollationKey to another CollationKey.  The
    236      * collation rules of the Collator that created this key are
    237      * applied.
    238      *
    239      * <p><strong>Note:</strong> Comparison between CollationKeys
    240      * created by different Collators might return incorrect
    241      * results.  See class documentation.
    242      *
    243      * @param target target CollationKey
    244      * @return an integer value.  If the value is less than zero this CollationKey
    245      *         is less than than target, if the value is zero they are equal, and
    246      *         if the value is greater than zero this CollationKey is greater
    247      *         than target.
    248      * @exception NullPointerException is thrown if argument is null.
    249      * @see Collator#compare(String, String)
    250      */
    251     @Override
    252     public int compareTo(CollationKey target)
    253     {
    254         for (int i = 0;; ++i) {
    255             int l = m_key_[i]&0xff;
    256             int r = target.m_key_[i]&0xff;
    257             if (l < r) {
    258             return -1;
    259             } else if (l > r) {
    260             return 1;
    261             } else if (l == 0) {
    262             return 0;
    263             }
    264         }
    265     }
    266 
    267     /**
    268      * Compare this CollationKey and the specified Object for
    269      * equality.  The collation rules of the Collator that created
    270      * this key are applied.
    271      *
    272      * <p>See note in compareTo(CollationKey) for warnings about
    273      * possible incorrect results.
    274      *
    275      * @param target the object to compare to.
    276      * @return true if the two keys compare as equal, false otherwise.
    277      * @see #compareTo(CollationKey)
    278      * @exception ClassCastException is thrown when the argument is not
    279      *            a CollationKey.  NullPointerException is thrown when the argument
    280      *            is null.
    281      */
    282     @Override
    283     public boolean equals(Object target)
    284     {
    285         if (!(target instanceof CollationKey)) {
    286             return false;
    287         }
    288 
    289         return equals((CollationKey)target);
    290     }
    291 
    292     /**
    293      * Compare this CollationKey and the argument target CollationKey for
    294      * equality.
    295      * The collation
    296      * rules of the Collator object which created these objects are applied.
    297      * <p>
    298      * See note in compareTo(CollationKey) for warnings of incorrect results
    299      *
    300      * @param target the CollationKey to compare to.
    301      * @return true if two objects are equal, false otherwise.
    302      * @exception NullPointerException is thrown when the argument is null.
    303      */
    304     public boolean equals(CollationKey target)
    305     {
    306         if (this == target) {
    307             return true;
    308         }
    309         if (target == null) {
    310             return false;
    311         }
    312         CollationKey other = target;
    313         int i = 0;
    314         while (true) {
    315             if (m_key_[i] != other.m_key_[i]) {
    316                 return false;
    317             }
    318             if (m_key_[i] == 0) {
    319               break;
    320             }
    321             i ++;
    322         }
    323         return true;
    324     }
    325 
    326     /**
    327      * Returns a hash code for this CollationKey. The hash value is calculated
    328      * on the key itself, not the String from which the key was created. Thus
    329      * if x and y are CollationKeys, then x.hashCode(x) == y.hashCode()
    330      * if x.equals(y) is true. This allows language-sensitive comparison in a
    331      * hash table.
    332      *
    333      * @return the hash value.
    334      */
    335     @Override
    336     public int hashCode()
    337     {
    338         if (m_hashCode_ == 0) {
    339             if (m_key_ == null) {
    340                 m_hashCode_ = 1;
    341             }
    342             else {
    343                 int size = m_key_.length >> 1;
    344                 StringBuilder key = new StringBuilder(size);
    345                 int i = 0;
    346                 while (m_key_[i] != 0 && m_key_[i + 1] != 0) {
    347                     key.append((char)((m_key_[i] << 8) | (0xff & m_key_[i + 1])));
    348                     i += 2;
    349                 }
    350                 if (m_key_[i] != 0) {
    351                     key.append((char)(m_key_[i] << 8));
    352                 }
    353                 m_hashCode_ = key.toString().hashCode();
    354             }
    355         }
    356         return m_hashCode_;
    357     }
    358 
    359     /**
    360      * Produces a bound for the sort order of a given collation key and a
    361      * strength level. This API does not attempt to find a bound for the
    362      * CollationKey String representation, hence null will be returned in its
    363      * place.
    364      * <p>
    365      * Resulting bounds can be used to produce a range of strings that are
    366      * between upper and lower bounds. For example, if bounds are produced
    367      * for a sortkey of string "smith", strings between upper and lower
    368      * bounds with primary strength would include "Smith", "SMITH", "sMiTh".
    369      * <p>
    370      * There are two upper bounds that can be produced. If BoundMode.UPPER
    371      * is produced, strings matched would be as above. However, if a bound
    372      * is produced using BoundMode.UPPER_LONG is used, the above example will
    373      * also match "Smithsonian" and similar.
    374      * <p>
    375      * For more on usage, see example in test procedure
    376      * <a href="http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/collator/CollationAPITest.java">
    377      * src/com/ibm/icu/dev/test/collator/CollationAPITest/TestBounds.
    378      * </a>
    379      * <p>
    380      * Collation keys produced may be compared using the <TT>compare</TT> API.
    381      * @param boundType Mode of bound required. It can be BoundMode.LOWER, which
    382      *              produces a lower inclusive bound, BoundMode.UPPER, that
    383      *              produces upper bound that matches strings of the same
    384      *              length or BoundMode.UPPER_LONG that matches strings that
    385      *              have the same starting substring as the source string.
    386      * @param noOfLevels Strength levels required in the resulting bound
    387      *                 (for most uses, the recommended value is PRIMARY). This
    388      *                 strength should be less than the maximum strength of
    389      *                 this CollationKey.
    390      *                 See users guide for explanation on the strength levels a
    391      *                 collation key can have.
    392      * @return the result bounded CollationKey with a valid sort order but
    393      *         a null String representation.
    394      * @exception IllegalArgumentException thrown when the strength level
    395      *            requested is higher than or equal to the strength in this
    396      *            CollationKey.
    397      *            In the case of an Exception, information
    398      *            about the maximum strength to use will be returned in the
    399      *            Exception. The user can then call getBound() again with the
    400      *            appropriate strength.
    401      * @see CollationKey
    402      * @see CollationKey.BoundMode
    403      * @see Collator#PRIMARY
    404      * @see Collator#SECONDARY
    405      * @see Collator#TERTIARY
    406      * @see Collator#QUATERNARY
    407      * @see Collator#IDENTICAL
    408      */
    409     public CollationKey getBound(int boundType, int noOfLevels)
    410     {
    411         // Scan the string until we skip enough of the key OR reach the end of
    412         // the key
    413         int offset = 0;
    414         int keystrength = Collator.PRIMARY;
    415 
    416         if (noOfLevels > Collator.PRIMARY) {
    417             while (offset < m_key_.length && m_key_[offset] != 0) {
    418                 if (m_key_[offset ++]
    419                         == Collation.LEVEL_SEPARATOR_BYTE) {
    420                     keystrength ++;
    421                     noOfLevels --;
    422                     if (noOfLevels == Collator.PRIMARY
    423                         || offset == m_key_.length || m_key_[offset] == 0) {
    424                         offset --;
    425                         break;
    426                     }
    427                 }
    428             }
    429         }
    430 
    431         if (noOfLevels > 0) {
    432             throw new IllegalArgumentException(
    433                                   "Source collation key has only "
    434                                   + keystrength
    435                                   + " strength level. Call getBound() again "
    436                                   + " with noOfLevels < " + keystrength);
    437         }
    438 
    439         // READ ME: this code assumes that the values for BoundMode variables
    440         // will not change. They are set so that the enum value corresponds to
    441         // the number of extra bytes each bound type needs.
    442         byte resultkey[] = new byte[offset + boundType + 1];
    443         System.arraycopy(m_key_, 0, resultkey, 0, offset);
    444         switch (boundType) {
    445             case BoundMode.LOWER: // = 0
    446                 // Lower bound just gets terminated. No extra bytes
    447                 break;
    448             case BoundMode.UPPER: // = 1
    449                 // Upper bound needs one extra byte
    450                 resultkey[offset ++] = 2;
    451                 break;
    452             case BoundMode.UPPER_LONG: // = 2
    453                 // Upper long bound needs two extra bytes
    454                 resultkey[offset ++] = (byte)0xFF;
    455                 resultkey[offset ++] = (byte)0xFF;
    456                 break;
    457             default:
    458                 throw new IllegalArgumentException(
    459                                                 "Illegal boundType argument");
    460         }
    461         resultkey[offset] = 0;
    462         return new CollationKey(null, resultkey, offset);
    463     }
    464 
    465 
    466     /**
    467      * Merges this CollationKey with another.
    468      * The levels are merged with their corresponding counterparts
    469      * (primaries with primaries, secondaries with secondaries etc.).
    470      * Between the values from the same level a separator is inserted.
    471      *
    472      * <p>This is useful, for example, for combining sort keys from first and last names
    473      * to sort such pairs.
    474      * See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys
    475      *
    476      * <p>The recommended way to achieve "merged" sorting is by
    477      * concatenating strings with U+FFFE between them.
    478      * The concatenation has the same sort order as the merged sort keys,
    479      * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\uFFFE' + str2).
    480      * Using strings with U+FFFE may yield shorter sort keys.
    481      *
    482      * <p>For details about Sort Key Features see
    483      * http://userguide.icu-project.org/collation/api#TOC-Sort-Key-Features
    484      *
    485      * <p>It is possible to merge multiple sort keys by consecutively merging
    486      * another one with the intermediate result.
    487      *
    488      * <p>Only the sort key bytes of the CollationKeys are merged.
    489      * This API does not attempt to merge the
    490      * String representations of the CollationKeys, hence null will be returned
    491      * as the result's String representation.
    492      *
    493      * <p>Example (uncompressed):
    494      * <pre>191B1D 01 050505 01 910505 00
    495      * 1F2123 01 050505 01 910505 00</pre>
    496      * will be merged as
    497      * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre>
    498      *
    499      * @param source CollationKey to merge with
    500      * @return a CollationKey that contains the valid merged sort keys
    501      *         with a null String representation,
    502      *         i.e. <tt>new CollationKey(null, merged_sort_keys)</tt>
    503      * @exception IllegalArgumentException thrown if source CollationKey
    504      *            argument is null or of 0 length.
    505      */
    506     public CollationKey merge(CollationKey source)
    507     {
    508         // check arguments
    509         if (source == null || source.getLength() == 0) {
    510             throw new IllegalArgumentException(
    511                       "CollationKey argument can not be null or of 0 length");
    512         }
    513 
    514         // 1 byte extra for the 02 separator at the end of the copy of this sort key,
    515         // and 1 more for the terminating 00.
    516         byte result[] = new byte[getLength() + source.getLength() + 2];
    517 
    518         // merge the sort keys with the same number of levels
    519         int rindex = 0;
    520         int index = 0;
    521         int sourceindex = 0;
    522         while (true) {
    523             // copy level from src1 not including 00 or 01
    524             // unsigned issues
    525             while (m_key_[index] < 0 || m_key_[index] >= MERGE_SEPERATOR_) {
    526                 result[rindex++] = m_key_[index++];
    527             }
    528 
    529             // add a 02 merge separator
    530             result[rindex++] = MERGE_SEPERATOR_;
    531 
    532             // copy level from src2 not including 00 or 01
    533             while (source.m_key_[sourceindex] < 0
    534                    || source.m_key_[sourceindex] >= MERGE_SEPERATOR_) {
    535                 result[rindex++] = source.m_key_[sourceindex++];
    536             }
    537 
    538             // if both sort keys have another level, then add a 01 level
    539             // separator and continue
    540             if (m_key_[index] == Collation.LEVEL_SEPARATOR_BYTE
    541                 && source.m_key_[sourceindex]
    542                         == Collation.LEVEL_SEPARATOR_BYTE) {
    543                 ++index;
    544                 ++sourceindex;
    545                 result[rindex++] = Collation.LEVEL_SEPARATOR_BYTE;
    546             }
    547             else {
    548                 break;
    549             }
    550         }
    551 
    552         // here, at least one sort key is finished now, but the other one
    553         // might have some contents left from containing more levels;
    554         // that contents is just appended to the result
    555         int remainingLength;
    556         if ((remainingLength = m_length_ - index) > 0) {
    557             System.arraycopy(m_key_, index, result, rindex, remainingLength);
    558             rindex += remainingLength;
    559         }
    560         else if ((remainingLength = source.m_length_ - sourceindex) > 0) {
    561             System.arraycopy(source.m_key_, sourceindex, result, rindex, remainingLength);
    562             rindex += remainingLength;
    563         }
    564         result[rindex] = 0;
    565 
    566         assert rindex == result.length - 1;
    567         return new CollationKey(null, result, rindex);
    568     }
    569 
    570     // private data members -------------------------------------------------
    571 
    572     /**
    573      * Sequence of bytes that represents the sort key
    574      */
    575     private byte m_key_[];
    576 
    577     /**
    578      * Source string this CollationKey represents
    579      */
    580     private String m_source_;
    581 
    582     /**
    583      * Hash code for the key
    584      */
    585     private int m_hashCode_;
    586     /**
    587      * Gets the length of this CollationKey
    588      */
    589     private int m_length_;
    590     /**
    591      * Collation key merge seperator
    592      */
    593     private static final int MERGE_SEPERATOR_ = 2;
    594 
    595     // private methods ------------------------------------------------------
    596 
    597     /**
    598      * Gets the length of the CollationKey
    599      * @return length of the CollationKey
    600      */
    601     private int getLength()
    602     {
    603         if (m_length_ >= 0) {
    604             return m_length_;
    605         }
    606         int length = m_key_.length;
    607         for (int index = 0; index < length; index ++) {
    608             if (m_key_[index] == 0) {
    609                 length = index;
    610                 break;
    611             }
    612         }
    613         m_length_ = length;
    614         return m_length_;
    615     }
    616 }
    617