Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /**
      5 *******************************************************************************
      6 * Copyright (C) 1996-2016, International Business Machines Corporation and
      7 * others. All Rights Reserved.
      8 *******************************************************************************
      9 */
     10 package android.icu.text;
     11 
     12 import java.util.Comparator;
     13 import java.util.LinkedList;
     14 import java.util.Locale;
     15 import java.util.MissingResourceException;
     16 import java.util.Set;
     17 
     18 import android.icu.impl.ICUData;
     19 import android.icu.impl.ICUDebug;
     20 import android.icu.impl.ICUResourceBundle;
     21 import android.icu.impl.UResource;
     22 import android.icu.impl.coll.CollationData;
     23 import android.icu.impl.coll.CollationRoot;
     24 import android.icu.lang.UCharacter;
     25 import android.icu.lang.UProperty;
     26 import android.icu.lang.UScript;
     27 import android.icu.util.Freezable;
     28 import android.icu.util.ICUException;
     29 import android.icu.util.ULocale;
     30 import android.icu.util.ULocale.Category;
     31 import android.icu.util.UResourceBundle;
     32 import android.icu.util.VersionInfo;
     33 
     34 /**
     35 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.text.Collator}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
     36 *
     37 * <p>Collator performs locale-sensitive string comparison. A concrete
     38 * subclass, RuleBasedCollator, allows customization of the collation
     39 * ordering by the use of rule sets.
     40 *
     41 * <p>A Collator is thread-safe only when frozen. See {@link #isFrozen()} and {@link Freezable}.
     42 *
     43 * <p>Following the <a href=http://www.unicode.org>Unicode
     44 * Consortium</a>'s specifications for the
     45 * <a href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation
     46 * Algorithm (UCA)</a>, there are 5 different levels of strength used
     47 * in comparisons:
     48 *
     49 * <ul>
     50 * <li>PRIMARY strength: Typically, this is used to denote differences between
     51 *     base characters (for example, "a" &lt; "b").
     52 *     It is the strongest difference. For example, dictionaries are divided
     53 *     into different sections by base character.
     54 * <li>SECONDARY strength: Accents in the characters are considered secondary
     55 *     differences (for example, "as" &lt; "&agrave;s" &lt; "at"). Other
     56 *     differences
     57 *     between letters can also be considered secondary differences, depending
     58 *     on the language. A secondary difference is ignored when there is a
     59 *     primary difference anywhere in the strings.
     60 * <li>TERTIARY strength: Upper and lower case differences in characters are
     61 *     distinguished at tertiary strength (for example, "ao" &lt; "Ao" &lt;
     62 *     "a&ograve;"). In addition, a variant of a letter differs from the base
     63 *     form on the tertiary strength (such as "A" and ""). Another
     64 *     example is the
     65 *     difference between large and small Kana. A tertiary difference is ignored
     66 *     when there is a primary or secondary difference anywhere in the strings.
     67 * <li>QUATERNARY strength: When punctuation is ignored
     68 *     (see <a href="http://userguide.icu-project.org/collation/concepts#TOC-Ignoring-Punctuation">
     69 *     Ignoring Punctuations in the User Guide</a>) at PRIMARY to TERTIARY
     70 *     strength, an additional strength level can
     71 *     be used to distinguish words with and without punctuation (for example,
     72 *     "ab" &lt; "a-b" &lt; "aB").
     73 *     This difference is ignored when there is a PRIMARY, SECONDARY or TERTIARY
     74 *     difference. The QUATERNARY strength should only be used if ignoring
     75 *     punctuation is required.
     76 * <li>IDENTICAL strength:
     77 *     When all other strengths are equal, the IDENTICAL strength is used as a
     78 *     tiebreaker. The Unicode code point values of the NFD form of each string
     79 *     are compared, just in case there is no difference.
     80 *     For example, Hebrew cantellation marks are only distinguished at this
     81 *     strength. This strength should be used sparingly, as only code point
     82 *     value differences between two strings is an extremely rare occurrence.
     83 *     Using this strength substantially decreases the performance for both
     84 *     comparison and collation key generation APIs. This strength also
     85 *     increases the size of the collation key.
     86 * </ul>
     87 *
     88 * Unlike the JDK, ICU4J's Collator deals only with 2 decomposition modes,
     89 * the canonical decomposition mode and one that does not use any decomposition.
     90 * The compatibility decomposition mode, java.text.Collator.FULL_DECOMPOSITION
     91 * is not supported here. If the canonical
     92 * decomposition mode is set, the Collator handles un-normalized text properly,
     93 * producing the same results as if the text were normalized in NFD. If
     94 * canonical decomposition is turned off, it is the user's responsibility to
     95 * ensure that all text is already in the appropriate form before performing
     96 * a comparison or before getting a CollationKey.
     97 *
     98 * <p>For more information about the collation service see the
     99 * <a href="http://userguide.icu-project.org/collation">User Guide</a>.
    100 *
    101 * <p>Examples of use
    102 * <pre>
    103 * // Get the Collator for US English and set its strength to PRIMARY
    104 * Collator usCollator = Collator.getInstance(Locale.US);
    105 * usCollator.setStrength(Collator.PRIMARY);
    106 * if (usCollator.compare("abc", "ABC") == 0) {
    107 *     System.out.println("Strings are equivalent");
    108 * }
    109 *
    110 * The following example shows how to compare two strings using the
    111 * Collator for the default locale.
    112 *
    113 * // Compare two strings in the default locale
    114 * Collator myCollator = Collator.getInstance();
    115 * myCollator.setDecomposition(NO_DECOMPOSITION);
    116 * if (myCollator.compare("&agrave;&#92;u0325", "a&#92;u0325&#768;") != 0) {
    117 *     System.out.println("&agrave;&#92;u0325 is not equals to a&#92;u0325&#768; without decomposition");
    118 *     myCollator.setDecomposition(CANONICAL_DECOMPOSITION);
    119 *     if (myCollator.compare("&agrave;&#92;u0325", "a&#92;u0325&#768;") != 0) {
    120 *         System.out.println("Error: &agrave;&#92;u0325 should be equals to a&#92;u0325&#768; with decomposition");
    121 *     }
    122 *     else {
    123 *         System.out.println("&agrave;&#92;u0325 is equals to a&#92;u0325&#768; with decomposition");
    124 *     }
    125 * }
    126 * else {
    127 *     System.out.println("Error: &agrave;&#92;u0325 should be not equals to a&#92;u0325&#768; without decomposition");
    128 * }
    129 * </pre>
    130 *
    131 * @see RuleBasedCollator
    132 * @see CollationKey
    133 * @author Syn Wee Quek
    134 */
    135 public abstract class Collator implements Comparator<Object>, Freezable<Collator>, Cloneable
    136 {
    137     // public data members ---------------------------------------------------
    138 
    139     /**
    140      * Strongest collator strength value. Typically used to denote differences
    141      * between base characters. See class documentation for more explanation.
    142      * @see #setStrength
    143      * @see #getStrength
    144      */
    145     public final static int PRIMARY = 0;
    146 
    147     /**
    148      * Second level collator strength value.
    149      * Accents in the characters are considered secondary differences.
    150      * Other differences between letters can also be considered secondary
    151      * differences, depending on the language.
    152      * See class documentation for more explanation.
    153      * @see #setStrength
    154      * @see #getStrength
    155      */
    156     public final static int SECONDARY = 1;
    157 
    158     /**
    159      * Third level collator strength value.
    160      * Upper and lower case differences in characters are distinguished at this
    161      * strength level. In addition, a variant of a letter differs from the base
    162      * form on the tertiary level.
    163      * See class documentation for more explanation.
    164      * @see #setStrength
    165      * @see #getStrength
    166      */
    167     public final static int TERTIARY = 2;
    168 
    169     /**
    170      * <strong>[icu]</strong> Fourth level collator strength value.
    171      * When punctuation is ignored
    172      * (see <a href="http://userguide.icu-project.org/collation/concepts#TOC-Ignoring-Punctuation">
    173      * Ignoring Punctuation in the User Guide</a>) at PRIMARY to TERTIARY
    174      * strength, an additional strength level can
    175      * be used to distinguish words with and without punctuation.
    176      * See class documentation for more explanation.
    177      * @see #setStrength
    178      * @see #getStrength
    179      */
    180     public final static int QUATERNARY = 3;
    181 
    182     /**
    183      * Smallest Collator strength value. When all other strengths are equal,
    184      * the IDENTICAL strength is used as a tiebreaker. The Unicode code point
    185      * values of the NFD form of each string are compared, just in case there
    186      * is no difference.
    187      * See class documentation for more explanation.
    188      * <p>
    189      * Note this value is different from JDK's
    190      */
    191     public final static int IDENTICAL = 15;
    192 
    193     /**
    194      * <strong>[icu] Note:</strong> This is for backwards compatibility with Java APIs only.  It
    195      * should not be used, IDENTICAL should be used instead.  ICU's
    196      * collation does not support Java's FULL_DECOMPOSITION mode.
    197      */
    198     public final static int FULL_DECOMPOSITION = IDENTICAL;
    199 
    200     /**
    201      * Decomposition mode value. With NO_DECOMPOSITION set, Strings
    202      * will not be decomposed for collation. This is the default
    203      * decomposition setting unless otherwise specified by the locale
    204      * used to create the Collator.
    205      *
    206      * <p><strong>Note</strong> this value is different from the JDK's.
    207      * @see #CANONICAL_DECOMPOSITION
    208      * @see #getDecomposition
    209      * @see #setDecomposition
    210      */
    211     public final static int NO_DECOMPOSITION = 16;
    212 
    213     /**
    214      * Decomposition mode value. With CANONICAL_DECOMPOSITION set,
    215      * characters that are canonical variants according to the Unicode standard
    216      * will be decomposed for collation.
    217      *
    218      * <p>CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
    219      * described in <a href="http://www.unicode.org/unicode/reports/tr15/">
    220      * Unicode Technical Report #15</a>.
    221      *
    222      * @see #NO_DECOMPOSITION
    223      * @see #getDecomposition
    224      * @see #setDecomposition
    225      */
    226     public final static int CANONICAL_DECOMPOSITION = 17;
    227 
    228     /**
    229      * Reordering codes for non-script groups that can be reordered under collation.
    230      *
    231      * @see #getReorderCodes
    232      * @see #setReorderCodes
    233      * @see #getEquivalentReorderCodes
    234      */
    235     public static interface ReorderCodes {
    236         /**
    237          * A special reordering code that is used to specify the default reordering codes for a locale.
    238          */
    239         public final static int DEFAULT          = -1;  // == UScript.INVALID_CODE
    240         /**
    241          * A special reordering code that is used to specify no reordering codes.
    242          */
    243         public final static int NONE          = UScript.UNKNOWN;
    244         /**
    245          * A special reordering code that is used to specify all other codes used for reordering except
    246          * for the codes listed as ReorderingCodes and those listed explicitly in a reordering.
    247          */
    248         public final static int OTHERS          = UScript.UNKNOWN;
    249         /**
    250          * Characters with the space property.
    251          * This is equivalent to the rule value "space".
    252          */
    253         public final static int SPACE          = 0x1000;
    254         /**
    255          * The first entry in the enumeration of reordering groups. This is intended for use in
    256          * range checking and enumeration of the reorder codes.
    257          */
    258         public final static int FIRST          = SPACE;
    259         /**
    260          * Characters with the punctuation property.
    261          * This is equivalent to the rule value "punct".
    262          */
    263         public final static int PUNCTUATION    = 0x1001;
    264         /**
    265          * Characters with the symbol property.
    266          * This is equivalent to the rule value "symbol".
    267          */
    268         public final static int SYMBOL         = 0x1002;
    269         /**
    270          * Characters with the currency property.
    271          * This is equivalent to the rule value "currency".
    272          */
    273         public final static int CURRENCY       = 0x1003;
    274         /**
    275          * Characters with the digit property.
    276          * This is equivalent to the rule value "digit".
    277          */
    278         public final static int DIGIT          = 0x1004;
    279         /**
    280          * One more than the highest normal ReorderCodes value.
    281          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
    282          * @hide unsupported on Android
    283          */
    284         @Deprecated
    285         public final static int LIMIT          = 0x1005;
    286     }
    287 
    288     // public methods --------------------------------------------------------
    289 
    290     /**
    291      * Compares the equality of two Collator objects. Collator objects are equal if they have the same
    292      * collation (sorting &amp; searching) behavior.
    293      *
    294      * <p>The base class checks for null and for equal types.
    295      * Subclasses should override.
    296      *
    297      * @param obj the Collator to compare to.
    298      * @return true if this Collator has exactly the same collation behavior as obj, false otherwise.
    299      */
    300     @Override
    301     public boolean equals(Object obj) {
    302         // Subclasses: Call this method and then add more specific checks.
    303         return this == obj || (obj != null && getClass() == obj.getClass());
    304     }
    305 
    306     /**
    307      * Generates a hash code for this Collator object.
    308      *
    309      * <p>The implementation exists just for consistency with {@link #equals(Object)}
    310      * implementation in this class and does not generate a useful hash code.
    311      * Subclasses should override this implementation.
    312      *
    313      * @return a hash code value.
    314      */
    315     @Override
    316     public int hashCode() {
    317         // Dummy return to prevent compile warnings.
    318         return 0;
    319     }
    320 
    321     // public setters --------------------------------------------------------
    322 
    323     private void checkNotFrozen() {
    324         if (isFrozen()) {
    325             throw new UnsupportedOperationException("Attempt to modify frozen Collator");
    326         }
    327     }
    328 
    329     /**
    330      * Sets this Collator's strength attribute. The strength attribute
    331      * determines the minimum level of difference considered significant
    332      * during comparison.
    333      *
    334      * <p>The base class method does nothing. Subclasses should override it if appropriate.
    335      *
    336      * <p>See the Collator class description for an example of use.
    337      * @param newStrength the new strength value.
    338      * @see #getStrength
    339      * @see #PRIMARY
    340      * @see #SECONDARY
    341      * @see #TERTIARY
    342      * @see #QUATERNARY
    343      * @see #IDENTICAL
    344      * @throws IllegalArgumentException if the new strength value is not valid.
    345      */
    346     public void setStrength(int newStrength)
    347     {
    348         checkNotFrozen();
    349     }
    350 
    351     /**
    352      * @return this, for chaining
    353      * @deprecated This API is ICU internal only.
    354      * @hide original deprecated declaration
    355      * @hide draft / provisional / internal are hidden on Android
    356      */
    357     @Deprecated
    358     public Collator setStrength2(int newStrength)
    359     {
    360         setStrength(newStrength);
    361         return this;
    362     }
    363 
    364     /**
    365      * Sets the decomposition mode of this Collator.  Setting this
    366      * decomposition attribute with CANONICAL_DECOMPOSITION allows the
    367      * Collator to handle un-normalized text properly, producing the
    368      * same results as if the text were normalized. If
    369      * NO_DECOMPOSITION is set, it is the user's responsibility to
    370      * insure that all text is already in the appropriate form before
    371      * a comparison or before getting a CollationKey. Adjusting
    372      * decomposition mode allows the user to select between faster and
    373      * more complete collation behavior.
    374      *
    375      * <p>Since a great many of the world's languages do not require
    376      * text normalization, most locales set NO_DECOMPOSITION as the
    377      * default decomposition mode.
    378      *
    379      * <p>The base class method does nothing. Subclasses should override it if appropriate.
    380      *
    381      * <p>See getDecomposition for a description of decomposition
    382      * mode.
    383      *
    384      * @param decomposition the new decomposition mode
    385      * @see #getDecomposition
    386      * @see #NO_DECOMPOSITION
    387      * @see #CANONICAL_DECOMPOSITION
    388      * @throws IllegalArgumentException If the given value is not a valid
    389      *            decomposition mode.
    390      */
    391     public void setDecomposition(int decomposition)
    392     {
    393         checkNotFrozen();
    394     }
    395 
    396     /**
    397      * Sets the reordering codes for this collator.
    398      * Collation reordering allows scripts and some other groups of characters
    399      * to be moved relative to each other. This reordering is done on top of
    400      * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
    401      * at the start and/or the end of the collation order. These groups are specified using
    402      * UScript codes and {@link Collator.ReorderCodes} entries.
    403      *
    404      * <p>By default, reordering codes specified for the start of the order are placed in the
    405      * order given after several special non-script blocks. These special groups of characters
    406      * are space, punctuation, symbol, currency, and digit. These special groups are represented with
    407      * {@link Collator.ReorderCodes} entries. Script groups can be intermingled with
    408      * these special non-script groups if those special groups are explicitly specified in the reordering.
    409      *
    410      * <p>The special code {@link Collator.ReorderCodes#OTHERS OTHERS}
    411      * stands for any script that is not explicitly
    412      * mentioned in the list of reordering codes given. Anything that is after OTHERS
    413      * will go at the very end of the reordering in the order given.
    414      *
    415      * <p>The special reorder code {@link Collator.ReorderCodes#DEFAULT DEFAULT}
    416      * will reset the reordering for this collator
    417      * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
    418      * was specified when this collator was created from resource data or from rules. The
    419      * DEFAULT code <b>must</b> be the sole code supplied when it is used.
    420      * If not, then an {@link IllegalArgumentException} will be thrown.
    421      *
    422      * <p>The special reorder code {@link Collator.ReorderCodes#NONE NONE}
    423      * will remove any reordering for this collator.
    424      * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
    425      * NONE code <b>must</b> be the sole code supplied when it is used.
    426      *
    427      * @param order the reordering codes to apply to this collator; if this is null or an empty array
    428      * then this clears any existing reordering
    429      * @see #getReorderCodes
    430      * @see #getEquivalentReorderCodes
    431      * @see Collator.ReorderCodes
    432      * @see UScript
    433      */
    434     public void setReorderCodes(int... order)
    435     {
    436         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
    437     }
    438 
    439     // public getters --------------------------------------------------------
    440 
    441     /**
    442      * Returns the Collator for the current default locale.
    443      * The default locale is determined by java.util.Locale.getDefault().
    444      * @return the Collator for the default locale (for example, en_US) if it
    445      *         is created successfully. Otherwise if there is no Collator
    446      *         associated with the current locale, the root collator
    447      *         will be returned.
    448      * @see java.util.Locale#getDefault()
    449      * @see #getInstance(Locale)
    450      */
    451     public static final Collator getInstance()
    452     {
    453         return getInstance(ULocale.getDefault());
    454     }
    455 
    456     /**
    457      * Clones the collator.
    458      * @return a clone of this collator.
    459      */
    460     @Override
    461     public Object clone() throws CloneNotSupportedException {
    462         return super.clone();
    463     }
    464 
    465     // begin registry stuff
    466 
    467     /**
    468      * A factory used with registerFactory to register multiple collators and provide
    469      * display names for them.  If standard locale display names are sufficient,
    470      * Collator instances may be registered instead.
    471      * <p><b>Note:</b> as of ICU4J 3.2, the default API for CollatorFactory uses
    472      * ULocale instead of Locale.  Instead of overriding createCollator(Locale),
    473      * new implementations should override createCollator(ULocale).  Note that
    474      * one of these two methods <b>MUST</b> be overridden or else an infinite
    475      * loop will occur.
    476      * @hide unsupported on Android
    477      */
    478     public static abstract class CollatorFactory {
    479         /**
    480          * Return true if this factory will be visible.  Default is true.
    481          * If not visible, the locales supported by this factory will not
    482          * be listed by getAvailableLocales.
    483          *
    484          * @return true if this factory is visible
    485          */
    486         public boolean visible() {
    487             return true;
    488         }
    489 
    490         /**
    491          * Return an instance of the appropriate collator.  If the locale
    492          * is not supported, return null.
    493          * <b>Note:</b> as of ICU4J 3.2, implementations should override
    494          * this method instead of createCollator(Locale).
    495          * @param loc the locale for which this collator is to be created.
    496          * @return the newly created collator.
    497          */
    498         public Collator createCollator(ULocale loc) {
    499             return createCollator(loc.toLocale());
    500         }
    501 
    502         /**
    503          * Return an instance of the appropriate collator.  If the locale
    504          * is not supported, return null.
    505          * <p><b>Note:</b> as of ICU4J 3.2, implementations should override
    506          * createCollator(ULocale) instead of this method, and inherit this
    507          * method's implementation.  This method is no longer abstract
    508          * and instead delegates to createCollator(ULocale).
    509          * @param loc the locale for which this collator is to be created.
    510          * @return the newly created collator.
    511          */
    512          public Collator createCollator(Locale loc) {
    513             return createCollator(ULocale.forLocale(loc));
    514         }
    515 
    516         /**
    517          * Return the name of the collator for the objectLocale, localized for the displayLocale.
    518          * If objectLocale is not visible or not defined by the factory, return null.
    519          * @param objectLocale the locale identifying the collator
    520          * @param displayLocale the locale for which the display name of the collator should be localized
    521          * @return the display name
    522          */
    523         public String getDisplayName(Locale objectLocale, Locale displayLocale) {
    524             return getDisplayName(ULocale.forLocale(objectLocale), ULocale.forLocale(displayLocale));
    525         }
    526 
    527         /**
    528          * Return the name of the collator for the objectLocale, localized for the displayLocale.
    529          * If objectLocale is not visible or not defined by the factory, return null.
    530          * @param objectLocale the locale identifying the collator
    531          * @param displayLocale the locale for which the display name of the collator should be localized
    532          * @return the display name
    533          */
    534         public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
    535             if (visible()) {
    536                 Set<String> supported = getSupportedLocaleIDs();
    537                 String name = objectLocale.getBaseName();
    538                 if (supported.contains(name)) {
    539                     return objectLocale.getDisplayName(displayLocale);
    540                 }
    541             }
    542             return null;
    543         }
    544 
    545         /**
    546          * Return an unmodifiable collection of the locale names directly
    547          * supported by this factory.
    548          *
    549          * @return the set of supported locale IDs.
    550          */
    551         public abstract Set<String> getSupportedLocaleIDs();
    552 
    553         /**
    554          * Empty default constructor.
    555          */
    556         protected CollatorFactory() {
    557         }
    558     }
    559 
    560     static abstract class ServiceShim {
    561         abstract Collator getInstance(ULocale l);
    562         abstract Object registerInstance(Collator c, ULocale l);
    563         abstract Object registerFactory(CollatorFactory f);
    564         abstract boolean unregister(Object k);
    565         abstract Locale[] getAvailableLocales(); // TODO remove
    566         abstract ULocale[] getAvailableULocales();
    567         abstract String getDisplayName(ULocale ol, ULocale dl);
    568     }
    569 
    570     private static ServiceShim shim;
    571     private static ServiceShim getShim() {
    572         // Note: this instantiation is safe on loose-memory-model configurations
    573         // despite lack of synchronization, since the shim instance has no state--
    574         // it's all in the class init.  The worst problem is we might instantiate
    575         // two shim instances, but they'll share the same state so that's ok.
    576         if (shim == null) {
    577             try {
    578                 Class<?> cls = Class.forName("android.icu.text.CollatorServiceShim");
    579                 shim = (ServiceShim)cls.newInstance();
    580             }
    581             catch (MissingResourceException e)
    582             {
    583                 ///CLOVER:OFF
    584                 throw e;
    585                 ///CLOVER:ON
    586             }
    587             catch (Exception e) {
    588                 ///CLOVER:OFF
    589                 if(DEBUG){
    590                     e.printStackTrace();
    591                 }
    592                 throw new ICUException(e);
    593                 ///CLOVER:ON
    594             }
    595         }
    596         return shim;
    597     }
    598 
    599     /**
    600      * Simpler/faster methods for ASCII than ones based on Unicode data.
    601      * TODO: There should be code like this somewhere already??
    602      */
    603     private static final class ASCII {
    604         static boolean equalIgnoreCase(CharSequence left, CharSequence right) {
    605             int length = left.length();
    606             if (length != right.length()) { return false; }
    607             for (int i = 0; i < length; ++i) {
    608                 char lc = left.charAt(i);
    609                 char rc = right.charAt(i);
    610                 if (lc == rc) { continue; }
    611                 if ('A' <= lc && lc <= 'Z') {
    612                     if ((lc + 0x20) == rc) { continue; }
    613                 } else if ('A' <= rc && rc <= 'Z') {
    614                     if ((rc + 0x20) == lc) { continue; }
    615                 }
    616                 return false;
    617             }
    618             return true;
    619         }
    620     }
    621 
    622     private static final boolean getYesOrNo(String keyword, String s) {
    623         if (ASCII.equalIgnoreCase(s, "yes")) {
    624             return true;
    625         }
    626         if (ASCII.equalIgnoreCase(s, "no")) {
    627             return false;
    628         }
    629         throw new IllegalArgumentException("illegal locale keyword=value: " + keyword + "=" + s);
    630     }
    631 
    632     private static final int getIntValue(String keyword, String s, String... values) {
    633         for (int i = 0; i < values.length; ++i) {
    634             if (ASCII.equalIgnoreCase(s, values[i])) {
    635                 return i;
    636             }
    637         }
    638         throw new IllegalArgumentException("illegal locale keyword=value: " + keyword + "=" + s);
    639     }
    640 
    641     private static final int getReorderCode(String keyword, String s) {
    642         return Collator.ReorderCodes.FIRST +
    643                 getIntValue(keyword, s, "space", "punct", "symbol", "currency", "digit");
    644         // Not supporting "others" = UCOL_REORDER_CODE_OTHERS
    645         // as a synonym for Zzzz = USCRIPT_UNKNOWN for now:
    646         // Avoid introducing synonyms/aliases.
    647     }
    648 
    649     /**
    650      * Sets collation attributes according to locale keywords. See
    651      * http://www.unicode.org/reports/tr35/tr35-collation.html#Collation_Settings
    652      *
    653      * Using "alias" keywords and values where defined:
    654      * http://www.unicode.org/reports/tr35/tr35.html#Old_Locale_Extension_Syntax
    655      * http://unicode.org/repos/cldr/trunk/common/bcp47/collation.xml
    656      */
    657     private static void setAttributesFromKeywords(ULocale loc, Collator coll, RuleBasedCollator rbc) {
    658         // Check for collation keywords that were already deprecated
    659         // before any were supported in createInstance() (except for "collation").
    660         String value = loc.getKeywordValue("colHiraganaQuaternary");
    661         if (value != null) {
    662             throw new UnsupportedOperationException("locale keyword kh/colHiraganaQuaternary");
    663         }
    664         value = loc.getKeywordValue("variableTop");
    665         if (value != null) {
    666             throw new UnsupportedOperationException("locale keyword vt/variableTop");
    667         }
    668         // Parse known collation keywords, ignore others.
    669         value = loc.getKeywordValue("colStrength");
    670         if (value != null) {
    671             // Note: Not supporting typo "quarternary" because it was never supported in locale IDs.
    672             int strength = getIntValue("colStrength", value,
    673                     "primary", "secondary", "tertiary", "quaternary", "identical");
    674             coll.setStrength(strength <= Collator.QUATERNARY ? strength : Collator.IDENTICAL);
    675         }
    676         value = loc.getKeywordValue("colBackwards");
    677         if (value != null) {
    678             if (rbc != null) {
    679                 rbc.setFrenchCollation(getYesOrNo("colBackwards", value));
    680             } else {
    681                 throw new UnsupportedOperationException(
    682                         "locale keyword kb/colBackwards only settable for RuleBasedCollator");
    683             }
    684         }
    685         value = loc.getKeywordValue("colCaseLevel");
    686         if (value != null) {
    687             if (rbc != null) {
    688                 rbc.setCaseLevel(getYesOrNo("colCaseLevel", value));
    689             } else {
    690                 throw new UnsupportedOperationException(
    691                         "locale keyword kb/colBackwards only settable for RuleBasedCollator");
    692             }
    693         }
    694         value = loc.getKeywordValue("colCaseFirst");
    695         if (value != null) {
    696             if (rbc != null) {
    697                 int cf = getIntValue("colCaseFirst", value, "no", "lower", "upper");
    698                 if (cf == 0) {
    699                     rbc.setLowerCaseFirst(false);
    700                     rbc.setUpperCaseFirst(false);
    701                 } else if (cf == 1) {
    702                     rbc.setLowerCaseFirst(true);
    703                 } else /* cf == 2 */ {
    704                     rbc.setUpperCaseFirst(true);
    705                 }
    706             } else {
    707                 throw new UnsupportedOperationException(
    708                         "locale keyword kf/colCaseFirst only settable for RuleBasedCollator");
    709             }
    710         }
    711         value = loc.getKeywordValue("colAlternate");
    712         if (value != null) {
    713             if (rbc != null) {
    714                 rbc.setAlternateHandlingShifted(
    715                         getIntValue("colAlternate", value, "non-ignorable", "shifted") != 0);
    716             } else {
    717                 throw new UnsupportedOperationException(
    718                         "locale keyword ka/colAlternate only settable for RuleBasedCollator");
    719             }
    720         }
    721         value = loc.getKeywordValue("colNormalization");
    722         if (value != null) {
    723             coll.setDecomposition(getYesOrNo("colNormalization", value) ?
    724                     Collator.CANONICAL_DECOMPOSITION : Collator.NO_DECOMPOSITION);
    725         }
    726         value = loc.getKeywordValue("colNumeric");
    727         if (value != null) {
    728             if (rbc != null) {
    729                 rbc.setNumericCollation(getYesOrNo("colNumeric", value));
    730             } else {
    731                 throw new UnsupportedOperationException(
    732                         "locale keyword kn/colNumeric only settable for RuleBasedCollator");
    733             }
    734         }
    735         value = loc.getKeywordValue("colReorder");
    736         if (value != null) {
    737             int[] codes = new int[UScript.CODE_LIMIT + Collator.ReorderCodes.LIMIT - Collator.ReorderCodes.FIRST];
    738             int codesLength = 0;
    739             int scriptNameStart = 0;
    740             for (;;) {
    741                 if (codesLength == codes.length) {
    742                     throw new IllegalArgumentException(
    743                             "too many script codes for colReorder locale keyword: " + value);
    744                 }
    745                 int limit = scriptNameStart;
    746                 while (limit < value.length() && value.charAt(limit) != '-') { ++limit; }
    747                 String scriptName = value.substring(scriptNameStart, limit);
    748                 int code;
    749                 if (scriptName.length() == 4) {
    750                     // Strict parsing, accept only 4-letter script codes, not long names.
    751                     code = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptName);
    752                 } else {
    753                     code = getReorderCode("colReorder", scriptName);
    754                 }
    755                 codes[codesLength++] = code;
    756                 if (limit == value.length()) { break; }
    757                 scriptNameStart = limit + 1;
    758             }
    759             if (codesLength == 0) {
    760                 throw new IllegalArgumentException("no script codes for colReorder locale keyword");
    761             }
    762             int[] args = new int[codesLength];
    763             System.arraycopy(codes, 0, args, 0, codesLength);
    764             coll.setReorderCodes(args);
    765         }
    766         value = loc.getKeywordValue("kv");
    767         if (value != null) {
    768             coll.setMaxVariable(getReorderCode("kv", value));
    769         }
    770     }
    771 
    772     /**
    773      * <strong>[icu]</strong> Returns the Collator for the desired locale.
    774      *
    775      * <p>For some languages, multiple collation types are available;
    776      * for example, "de@collation=phonebook".
    777      * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
    778      * in the old locale extension syntax ("el@colCaseFirst=upper")
    779      * or in language tag syntax ("el-u-kf-upper").
    780      * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>.
    781      *
    782      * @param locale the desired locale.
    783      * @return Collator for the desired locale if it is created successfully.
    784      *         Otherwise if there is no Collator
    785      *         associated with the current locale, the root collator will
    786      *         be returned.
    787      * @see java.util.Locale
    788      * @see java.util.ResourceBundle
    789      * @see #getInstance(Locale)
    790      * @see #getInstance()
    791      */
    792     public static final Collator getInstance(ULocale locale) {
    793         // fetching from service cache is faster than instantiation
    794         if (locale == null) {
    795             locale = ULocale.getDefault();
    796         }
    797         Collator coll = getShim().getInstance(locale);
    798         if (!locale.getName().equals(locale.getBaseName())) {  // any keywords?
    799             setAttributesFromKeywords(locale, coll,
    800                     (coll instanceof RuleBasedCollator) ? (RuleBasedCollator)coll : null);
    801         }
    802         return coll;
    803     }
    804 
    805     /**
    806      * Returns the Collator for the desired locale.
    807      *
    808      * <p>For some languages, multiple collation types are available;
    809      * for example, "de-u-co-phonebk".
    810      * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
    811      * in the old locale extension syntax ("el@colCaseFirst=upper", only with {@link ULocale})
    812      * or in language tag syntax ("el-u-kf-upper").
    813      * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>.
    814      *
    815      * @param locale the desired locale.
    816      * @return Collator for the desired locale if it is created successfully.
    817      *         Otherwise if there is no Collator
    818      *         associated with the current locale, the root collator will
    819      *         be returned.
    820      * @see java.util.Locale
    821      * @see java.util.ResourceBundle
    822      * @see #getInstance(ULocale)
    823      * @see #getInstance()
    824      */
    825     public static final Collator getInstance(Locale locale) {
    826         return getInstance(ULocale.forLocale(locale));
    827     }
    828 
    829     /**
    830      * <strong>[icu]</strong> Registers a collator as the default collator for the provided locale.  The
    831      * collator should not be modified after it is registered.
    832      *
    833      * <p>Because ICU may choose to cache Collator objects internally, this must
    834      * be called at application startup, prior to any calls to
    835      * Collator.getInstance to avoid undefined behavior.
    836      *
    837      * @param collator the collator to register
    838      * @param locale the locale for which this is the default collator
    839      * @return an object that can be used to unregister the registered collator.
    840      *
    841      * @hide unsupported on Android
    842      */
    843     public static final Object registerInstance(Collator collator, ULocale locale) {
    844         return getShim().registerInstance(collator, locale);
    845     }
    846 
    847     /**
    848      * <strong>[icu]</strong> Registers a collator factory.
    849      *
    850      * <p>Because ICU may choose to cache Collator objects internally, this must
    851      * be called at application startup, prior to any calls to
    852      * Collator.getInstance to avoid undefined behavior.
    853      *
    854      * @param factory the factory to register
    855      * @return an object that can be used to unregister the registered factory.
    856      *
    857      * @hide unsupported on Android
    858      */
    859     public static final Object registerFactory(CollatorFactory factory) {
    860         return getShim().registerFactory(factory);
    861     }
    862 
    863     /**
    864      * <strong>[icu]</strong> Unregisters a collator previously registered using registerInstance.
    865      * @param registryKey the object previously returned by registerInstance.
    866      * @return true if the collator was successfully unregistered.
    867      * @hide unsupported on Android
    868      */
    869     public static final boolean unregister(Object registryKey) {
    870         if (shim == null) {
    871             return false;
    872         }
    873         return shim.unregister(registryKey);
    874     }
    875 
    876     /**
    877      * Returns the set of locales, as Locale objects, for which collators
    878      * are installed.  Note that Locale objects do not support RFC 3066.
    879      * @return the list of locales in which collators are installed.
    880      * This list includes any that have been registered, in addition to
    881      * those that are installed with ICU4J.
    882      */
    883     public static Locale[] getAvailableLocales() {
    884         // TODO make this wrap getAvailableULocales later
    885         if (shim == null) {
    886             return ICUResourceBundle.getAvailableLocales(
    887                 ICUData.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
    888         }
    889         return shim.getAvailableLocales();
    890     }
    891 
    892     /**
    893      * <strong>[icu]</strong> Returns the set of locales, as ULocale objects, for which collators
    894      * are installed.  ULocale objects support RFC 3066.
    895      * @return the list of locales in which collators are installed.
    896      * This list includes any that have been registered, in addition to
    897      * those that are installed with ICU4J.
    898      */
    899     public static final ULocale[] getAvailableULocales() {
    900         if (shim == null) {
    901             return ICUResourceBundle.getAvailableULocales(
    902                 ICUData.ICU_COLLATION_BASE_NAME, ICUResourceBundle.ICU_DATA_CLASS_LOADER);
    903         }
    904         return shim.getAvailableULocales();
    905     }
    906 
    907     /**
    908      * The list of keywords for this service.  This must be kept in sync with
    909      * the resource data.
    910      */
    911     private static final String[] KEYWORDS = { "collation" };
    912 
    913     /**
    914      * The resource name for this service.  Note that this is not the same as
    915      * the keyword for this service.
    916      */
    917     private static final String RESOURCE = "collations";
    918 
    919     /**
    920      * The resource bundle base name for this service.
    921      * *since ICU 3.0
    922      */
    923 
    924     private static final String BASE = ICUData.ICU_COLLATION_BASE_NAME;
    925 
    926     /**
    927      * <strong>[icu]</strong> Returns an array of all possible keywords that are relevant to
    928      * collation. At this point, the only recognized keyword for this
    929      * service is "collation".
    930      * @return an array of valid collation keywords.
    931      * @see #getKeywordValues
    932      */
    933     public static final String[] getKeywords() {
    934         return KEYWORDS;
    935     }
    936 
    937     /**
    938      * <strong>[icu]</strong> Given a keyword, returns an array of all values for
    939      * that keyword that are currently in use.
    940      * @param keyword one of the keywords returned by getKeywords.
    941      * @see #getKeywords
    942      */
    943     public static final String[] getKeywordValues(String keyword) {
    944         if (!keyword.equals(KEYWORDS[0])) {
    945             throw new IllegalArgumentException("Invalid keyword: " + keyword);
    946         }
    947         return ICUResourceBundle.getKeywordValues(BASE, RESOURCE);
    948     }
    949 
    950     /**
    951      * <strong>[icu]</strong> Given a key and a locale, returns an array of string values in a preferred
    952      * order that would make a difference. These are all and only those values where
    953      * the open (creation) of the service with the locale formed from the input locale
    954      * plus input keyword and that value has different behavior than creation with the
    955      * input locale alone.
    956      * @param key           one of the keys supported by this service.  For now, only
    957      *                      "collation" is supported.
    958      * @param locale        the locale
    959      * @param commonlyUsed  if set to true it will return only commonly used values
    960      *                      with the given locale in preferred order.  Otherwise,
    961      *                      it will return all the available values for the locale.
    962      * @return an array of string values for the given key and the locale.
    963      */
    964     public static final String[] getKeywordValuesForLocale(String key, ULocale locale,
    965                                                            boolean commonlyUsed) {
    966         // Note: The parameter commonlyUsed is not used.
    967         // The switch is in the method signature for consistency
    968         // with other locale services.
    969 
    970         // Read available collation values from collation bundles.
    971         ICUResourceBundle bundle = (ICUResourceBundle)
    972                 UResourceBundle.getBundleInstance(
    973                         ICUData.ICU_COLLATION_BASE_NAME, locale);
    974         KeywordsSink sink = new KeywordsSink();
    975         bundle.getAllItemsWithFallback("collations", sink);
    976         return sink.values.toArray(new String[sink.values.size()]);
    977     }
    978 
    979     private static final class KeywordsSink extends UResource.Sink {
    980         LinkedList<String> values = new LinkedList<String>();
    981         boolean hasDefault = false;
    982 
    983         @Override
    984         public void put(UResource.Key key, UResource.Value value, boolean noFallback) {
    985             UResource.Table collations = value.getTable();
    986             for (int i = 0; collations.getKeyAndValue(i, key, value); ++i) {
    987                 int type = value.getType();
    988                 if (type == UResourceBundle.STRING) {
    989                     if (!hasDefault && key.contentEquals("default")) {
    990                         String defcoll = value.getString();
    991                         if (!defcoll.isEmpty()) {
    992                             values.remove(defcoll);
    993                             values.addFirst(defcoll);
    994                             hasDefault = true;
    995                         }
    996                     }
    997                 } else if (type == UResourceBundle.TABLE && !key.startsWith("private-")) {
    998                     String collkey = key.toString();
    999                     if (!values.contains(collkey)) {
   1000                         values.add(collkey);
   1001                     }
   1002                 }
   1003             }
   1004         }
   1005     }
   1006 
   1007     /**
   1008      * <strong>[icu]</strong> Returns the functionally equivalent locale for the given
   1009      * requested locale, with respect to given keyword, for the
   1010      * collation service.  If two locales return the same result, then
   1011      * collators instantiated for these locales will behave
   1012      * equivalently.  The converse is not always true; two collators
   1013      * may in fact be equivalent, but return different results, due to
   1014      * internal details.  The return result has no other meaning than
   1015      * that stated above, and implies nothing as to the relationship
   1016      * between the two locales.  This is intended for use by
   1017      * applications who wish to cache collators, or otherwise reuse
   1018      * collators when possible.  The functional equivalent may change
   1019      * over time.  For more information, please see the <a
   1020      * href="http://userguide.icu-project.org/locale#TOC-Locales-and-Services">
   1021      * Locales and Services</a> section of the ICU User Guide.
   1022      * @param keyword a particular keyword as enumerated by
   1023      * getKeywords.
   1024      * @param locID The requested locale
   1025      * @param isAvailable If non-null, isAvailable[0] will receive and
   1026      * output boolean that indicates whether the requested locale was
   1027      * 'available' to the collation service. If non-null, isAvailable
   1028      * must have length &gt;= 1.
   1029      * @return the locale
   1030      */
   1031     public static final ULocale getFunctionalEquivalent(String keyword,
   1032                                                         ULocale locID,
   1033                                                         boolean isAvailable[]) {
   1034         return ICUResourceBundle.getFunctionalEquivalent(BASE, ICUResourceBundle.ICU_DATA_CLASS_LOADER, RESOURCE,
   1035                                                          keyword, locID, isAvailable, true);
   1036     }
   1037 
   1038     /**
   1039      * <strong>[icu]</strong> Returns the functionally equivalent locale for the given
   1040      * requested locale, with respect to given keyword, for the
   1041      * collation service.
   1042      * @param keyword a particular keyword as enumerated by
   1043      * getKeywords.
   1044      * @param locID The requested locale
   1045      * @return the locale
   1046      * @see #getFunctionalEquivalent(String,ULocale,boolean[])
   1047      */
   1048     public static final ULocale getFunctionalEquivalent(String keyword,
   1049                                                         ULocale locID) {
   1050         return getFunctionalEquivalent(keyword, locID, null);
   1051     }
   1052 
   1053     /**
   1054      * <strong>[icu]</strong> Returns the name of the collator for the objectLocale, localized for the
   1055      * displayLocale.
   1056      * @param objectLocale the locale of the collator
   1057      * @param displayLocale the locale for the collator's display name
   1058      * @return the display name
   1059      */
   1060     static public String getDisplayName(Locale objectLocale, Locale displayLocale) {
   1061         return getShim().getDisplayName(ULocale.forLocale(objectLocale),
   1062                                         ULocale.forLocale(displayLocale));
   1063     }
   1064 
   1065     /**
   1066      * <strong>[icu]</strong> Returns the name of the collator for the objectLocale, localized for the
   1067      * displayLocale.
   1068      * @param objectLocale the locale of the collator
   1069      * @param displayLocale the locale for the collator's display name
   1070      * @return the display name
   1071      */
   1072     static public String getDisplayName(ULocale objectLocale, ULocale displayLocale) {
   1073         return getShim().getDisplayName(objectLocale, displayLocale);
   1074     }
   1075 
   1076     /**
   1077      * <strong>[icu]</strong> Returns the name of the collator for the objectLocale, localized for the
   1078      * default <code>DISPLAY</code> locale.
   1079      * @param objectLocale the locale of the collator
   1080      * @return the display name
   1081      * @see android.icu.util.ULocale.Category#DISPLAY
   1082      */
   1083     static public String getDisplayName(Locale objectLocale) {
   1084         return getShim().getDisplayName(ULocale.forLocale(objectLocale), ULocale.getDefault(Category.DISPLAY));
   1085     }
   1086 
   1087     /**
   1088      * <strong>[icu]</strong> Returns the name of the collator for the objectLocale, localized for the
   1089      * default <code>DISPLAY</code> locale.
   1090      * @param objectLocale the locale of the collator
   1091      * @return the display name
   1092      * @see android.icu.util.ULocale.Category#DISPLAY
   1093      */
   1094     static public String getDisplayName(ULocale objectLocale) {
   1095         return getShim().getDisplayName(objectLocale, ULocale.getDefault(Category.DISPLAY));
   1096     }
   1097 
   1098     /**
   1099      * Returns this Collator's strength attribute. The strength attribute
   1100      * determines the minimum level of difference considered significant.
   1101      * <strong>[icu] Note:</strong> This can return QUATERNARY strength, which is not supported by the
   1102      * JDK version.
   1103      * <p>
   1104      * See the Collator class description for more details.
   1105      * <p>The base class method always returns {@link #TERTIARY}.
   1106      * Subclasses should override it if appropriate.
   1107      *
   1108      * @return this Collator's current strength attribute.
   1109      * @see #setStrength
   1110      * @see #PRIMARY
   1111      * @see #SECONDARY
   1112      * @see #TERTIARY
   1113      * @see #QUATERNARY
   1114      * @see #IDENTICAL
   1115      */
   1116     public int getStrength()
   1117     {
   1118         return TERTIARY;
   1119     }
   1120 
   1121     /**
   1122      * Returns the decomposition mode of this Collator. The decomposition mode
   1123      * determines how Unicode composed characters are handled.
   1124      * <p>
   1125      * See the Collator class description for more details.
   1126      * <p>The base class method always returns {@link #NO_DECOMPOSITION}.
   1127      * Subclasses should override it if appropriate.
   1128      *
   1129      * @return the decomposition mode
   1130      * @see #setDecomposition
   1131      * @see #NO_DECOMPOSITION
   1132      * @see #CANONICAL_DECOMPOSITION
   1133      */
   1134     public int getDecomposition()
   1135     {
   1136         return NO_DECOMPOSITION;
   1137     }
   1138 
   1139     // public other methods -------------------------------------------------
   1140 
   1141     /**
   1142      * Compares the equality of two text Strings using
   1143      * this Collator's rules, strength and decomposition mode.  Convenience method.
   1144      * @param source the source string to be compared.
   1145      * @param target the target string to be compared.
   1146      * @return true if the strings are equal according to the collation
   1147      *         rules, otherwise false.
   1148      * @see #compare
   1149      * @throws NullPointerException thrown if either arguments is null.
   1150      */
   1151     public boolean equals(String source, String target)
   1152     {
   1153         return (compare(source, target) == 0);
   1154     }
   1155 
   1156     /**
   1157      * <strong>[icu]</strong> Returns a UnicodeSet that contains all the characters and sequences tailored
   1158      * in this collator.
   1159      * @return a pointer to a UnicodeSet object containing all the
   1160      *         code points and sequences that may sort differently than
   1161      *         in the root collator.
   1162      */
   1163     public UnicodeSet getTailoredSet()
   1164     {
   1165         return new UnicodeSet(0, 0x10FFFF);
   1166     }
   1167 
   1168     /**
   1169      * Compares the source text String to the target text String according to
   1170      * this Collator's rules, strength and decomposition mode.
   1171      * Returns an integer less than,
   1172      * equal to or greater than zero depending on whether the source String is
   1173      * less than, equal to or greater than the target String. See the Collator
   1174      * class description for an example of use.
   1175      *
   1176      * @param source the source String.
   1177      * @param target the target String.
   1178      * @return Returns an integer value. Value is less than zero if source is
   1179      *         less than target, value is zero if source and target are equal,
   1180      *         value is greater than zero if source is greater than target.
   1181      * @see CollationKey
   1182      * @see #getCollationKey
   1183      * @throws NullPointerException thrown if either argument is null.
   1184      */
   1185     public abstract int compare(String source, String target);
   1186 
   1187     /**
   1188      * Compares the source Object to the target Object.
   1189      *
   1190      * @param source the source Object.
   1191      * @param target the target Object.
   1192      * @return Returns an integer value. Value is less than zero if source is
   1193      *         less than target, value is zero if source and target are equal,
   1194      *         value is greater than zero if source is greater than target.
   1195      * @throws ClassCastException thrown if either arguments cannot be cast to CharSequence.
   1196      */
   1197     @Override
   1198     public int compare(Object source, Object target) {
   1199         return doCompare((CharSequence)source, (CharSequence)target);
   1200     }
   1201 
   1202     /**
   1203      * Compares two CharSequences.
   1204      * The base class just calls compare(left.toString(), right.toString()).
   1205      * Subclasses should instead implement this method and have the String API call this method.
   1206      * @deprecated This API is ICU internal only.
   1207      * @hide original deprecated declaration
   1208      * @hide draft / provisional / internal are hidden on Android
   1209      */
   1210     @Deprecated
   1211     protected int doCompare(CharSequence left, CharSequence right) {
   1212         return compare(left.toString(), right.toString());
   1213     }
   1214 
   1215     /**
   1216      * <p>
   1217      * Transforms the String into a CollationKey suitable for efficient
   1218      * repeated comparison.  The resulting key depends on the collator's
   1219      * rules, strength and decomposition mode.
   1220      *
   1221      * <p>Note that collation keys are often less efficient than simply doing comparison.
   1222      * For more details, see the ICU User Guide.
   1223      *
   1224      * <p>See the CollationKey class documentation for more information.
   1225      * @param source the string to be transformed into a CollationKey.
   1226      * @return the CollationKey for the given String based on this Collator's
   1227      *         collation rules. If the source String is null, a null
   1228      *         CollationKey is returned.
   1229      * @see CollationKey
   1230      * @see #compare(String, String)
   1231      */
   1232     public abstract CollationKey getCollationKey(String source);
   1233 
   1234     /**
   1235      * <strong>[icu]</strong> Returns the simpler form of a CollationKey for the String source following
   1236      * the rules of this Collator and stores the result into the user provided argument
   1237      * key.  If key has a internal byte array of length that's too small for the result,
   1238      * the internal byte array will be grown to the exact required size.
   1239      *
   1240      * <p>Note that collation keys are often less efficient than simply doing comparison.
   1241      * For more details, see the ICU User Guide.
   1242      *
   1243      * @param source the text String to be transformed into a RawCollationKey
   1244      * @return If key is null, a new instance of RawCollationKey will be
   1245      *         created and returned, otherwise the user provided key will be
   1246      *         returned.
   1247      * @see #compare(String, String)
   1248      * @see #getCollationKey
   1249      * @see RawCollationKey
   1250      * @hide unsupported on Android
   1251      */
   1252     public abstract RawCollationKey getRawCollationKey(String source,
   1253                                                        RawCollationKey key);
   1254 
   1255     /**
   1256      * <strong>[icu]</strong> Sets the variable top to the top of the specified reordering group.
   1257      * The variable top determines the highest-sorting character
   1258      * which is affected by the alternate handling behavior.
   1259      * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
   1260      *
   1261      * <p>The base class implementation throws an UnsupportedOperationException.
   1262      * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION,
   1263      *              Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY;
   1264      *              or Collator.ReorderCodes.DEFAULT to restore the default max variable group
   1265      * @return this
   1266      * @see #getMaxVariable
   1267      */
   1268     public Collator setMaxVariable(int group) {
   1269         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
   1270     }
   1271 
   1272     /**
   1273      * <strong>[icu]</strong> Returns the maximum reordering group whose characters are affected by
   1274      * the alternate handling behavior.
   1275      *
   1276      * <p>The base class implementation returns Collator.ReorderCodes.PUNCTUATION.
   1277      * @return the maximum variable reordering group.
   1278      * @see #setMaxVariable
   1279      */
   1280     public int getMaxVariable() {
   1281         return Collator.ReorderCodes.PUNCTUATION;
   1282     }
   1283 
   1284     /**
   1285      * <strong>[icu]</strong> Sets the variable top to the primary weight of the specified string.
   1286      *
   1287      * <p>Beginning with ICU 53, the variable top is pinned to
   1288      * the top of one of the supported reordering groups,
   1289      * and it must not be beyond the last of those groups.
   1290      * See {@link #setMaxVariable(int)}.
   1291      *
   1292      * @param varTop one or more (if contraction) characters to which the
   1293      *               variable top should be set
   1294      * @return variable top primary weight
   1295      * @exception IllegalArgumentException
   1296      *                is thrown if varTop argument is not a valid variable top element. A variable top element is
   1297      *                invalid when
   1298      *                <ul>
   1299      *                <li>it is a contraction that does not exist in the Collation order
   1300      *                <li>the variable top is beyond
   1301      *                    the last reordering group supported by setMaxVariable()
   1302      *                <li>when the varTop argument is null or zero in length.
   1303      *                </ul>
   1304      * @see #getVariableTop
   1305      * @see RuleBasedCollator#setAlternateHandlingShifted
   1306      * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead.
   1307      * @hide original deprecated declaration
   1308      */
   1309     @Deprecated
   1310     public abstract int setVariableTop(String varTop);
   1311 
   1312     /**
   1313      * <strong>[icu]</strong> Gets the variable top value of a Collator.
   1314      *
   1315      * @return the variable top primary weight
   1316      * @see #getMaxVariable
   1317      */
   1318     public abstract int getVariableTop();
   1319 
   1320     /**
   1321      * <strong>[icu]</strong> Sets the variable top to the specified primary weight.
   1322      *
   1323      * <p>Beginning with ICU 53, the variable top is pinned to
   1324      * the top of one of the supported reordering groups,
   1325      * and it must not be beyond the last of those groups.
   1326      * See {@link #setMaxVariable(int)}.
   1327      *
   1328      * @param varTop primary weight, as returned by setVariableTop or getVariableTop
   1329      * @see #getVariableTop
   1330      * @see #setVariableTop(String)
   1331      * @deprecated ICU 53 Call setMaxVariable() instead.
   1332      * @hide original deprecated declaration
   1333      */
   1334     @Deprecated
   1335     public abstract void setVariableTop(int varTop);
   1336 
   1337     /**
   1338      * <strong>[icu]</strong> Returns the version of this collator object.
   1339      * @return the version object associated with this collator
   1340      */
   1341     public abstract VersionInfo getVersion();
   1342 
   1343     /**
   1344      * <strong>[icu]</strong> Returns the UCA version of this collator object.
   1345      * @return the version object associated with this collator
   1346      */
   1347     public abstract VersionInfo getUCAVersion();
   1348 
   1349     /**
   1350      * Retrieves the reordering codes for this collator.
   1351      * These reordering codes are a combination of UScript codes and ReorderCodes.
   1352      * @return a copy of the reordering codes for this collator;
   1353      * if none are set then returns an empty array
   1354      * @see #setReorderCodes
   1355      * @see #getEquivalentReorderCodes
   1356      * @see Collator.ReorderCodes
   1357      * @see UScript
   1358      */
   1359     public int[] getReorderCodes()
   1360     {
   1361         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
   1362     }
   1363 
   1364     /**
   1365      * Retrieves all the reorder codes that are grouped with the given reorder code. Some reorder
   1366      * codes are grouped and must reorder together.
   1367      * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
   1368      * for example Hiragana and Katakana.
   1369      *
   1370      * @param reorderCode The reorder code to determine equivalence for.
   1371      * @return the set of all reorder codes in the same group as the given reorder code.
   1372      * @see #setReorderCodes
   1373      * @see #getReorderCodes
   1374      * @see Collator.ReorderCodes
   1375      * @see UScript
   1376      */
   1377     public static int[] getEquivalentReorderCodes(int reorderCode) {
   1378         CollationData baseData = CollationRoot.getData();
   1379         return baseData.getEquivalentScripts(reorderCode);
   1380     }
   1381 
   1382 
   1383     // Freezable interface implementation -------------------------------------------------
   1384 
   1385     /**
   1386      * Determines whether the object has been frozen or not.
   1387      *
   1388      * <p>An unfrozen Collator is mutable and not thread-safe.
   1389      * A frozen Collator is immutable and thread-safe.
   1390      */
   1391     @Override
   1392     public boolean isFrozen() {
   1393         return false;
   1394     }
   1395 
   1396     /**
   1397      * Freezes the collator.
   1398      * @return the collator itself.
   1399      */
   1400     @Override
   1401     public Collator freeze() {
   1402         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
   1403     }
   1404 
   1405     /**
   1406      * Provides for the clone operation. Any clone is initially unfrozen.
   1407      */
   1408     @Override
   1409     public Collator cloneAsThawed() {
   1410         throw new UnsupportedOperationException("Needs to be implemented by the subclass.");
   1411     }
   1412 
   1413     /**
   1414      * Empty default constructor to make javadocs happy
   1415      */
   1416     protected Collator()
   1417     {
   1418     }
   1419 
   1420     private static final boolean DEBUG = ICUDebug.enabled("collator");
   1421 
   1422     // -------- BEGIN ULocale boilerplate --------
   1423 
   1424     /**
   1425      * <strong>[icu]</strong> Returns the locale that was used to create this object, or null.
   1426      * This may may differ from the locale requested at the time of
   1427      * this object's creation.  For example, if an object is created
   1428      * for locale <tt>en_US_CALIFORNIA</tt>, the actual data may be
   1429      * drawn from <tt>en</tt> (the <i>actual</i> locale), and
   1430      * <tt>en_US</tt> may be the most specific locale that exists (the
   1431      * <i>valid</i> locale).
   1432      *
   1433      * <p>Note: This method will be implemented in ICU 3.0; ICU 2.8
   1434      * contains a partial preview implementation.  The * <i>actual</i>
   1435      * locale is returned correctly, but the <i>valid</i> locale is
   1436      * not, in most cases.
   1437      *
   1438      * <p>The base class method always returns {@link ULocale#ROOT}.
   1439      * Subclasses should override it if appropriate.
   1440      *
   1441      * @param type type of information requested, either {@link
   1442      * android.icu.util.ULocale#VALID_LOCALE} or {@link
   1443      * android.icu.util.ULocale#ACTUAL_LOCALE}.
   1444      * @return the information specified by <i>type</i>, or null if
   1445      * this object was not constructed from locale data.
   1446      * @see android.icu.util.ULocale
   1447      * @see android.icu.util.ULocale#VALID_LOCALE
   1448      * @see android.icu.util.ULocale#ACTUAL_LOCALE
   1449      * @hide draft / provisional / internal are hidden on Android
   1450      */
   1451     public ULocale getLocale(ULocale.Type type) {
   1452         return ULocale.ROOT;
   1453     }
   1454 
   1455     /**
   1456      * Set information about the locales that were used to create this
   1457      * object.  If the object was not constructed from locale data,
   1458      * both arguments should be set to null.  Otherwise, neither
   1459      * should be null.  The actual locale must be at the same level or
   1460      * less specific than the valid locale.  This method is intended
   1461      * for use by factories or other entities that create objects of
   1462      * this class.
   1463      *
   1464      * <p>The base class method does nothing. Subclasses should override it if appropriate.
   1465      *
   1466      * @param valid the most specific locale containing any resource
   1467      * data, or null
   1468      * @param actual the locale containing data used to construct this
   1469      * object, or null
   1470      * @see android.icu.util.ULocale
   1471      * @see android.icu.util.ULocale#VALID_LOCALE
   1472      * @see android.icu.util.ULocale#ACTUAL_LOCALE
   1473      */
   1474     void setLocale(ULocale valid, ULocale actual) {}
   1475 
   1476     // -------- END ULocale boilerplate --------
   1477 }
   1478