Home | History | Annotate | Download | only in text
      1 /**
      2  *******************************************************************************
      3  * Copyright (C) 1996-2015, International Business Machines Corporation and
      4  * others. All Rights Reserved.
      5  *******************************************************************************
      6  */
      7 package com.ibm.icu.text;
      8 
      9 import java.lang.reflect.InvocationTargetException;
     10 import java.lang.reflect.Method;
     11 import java.text.CharacterIterator;
     12 import java.text.ParseException;
     13 import java.util.Arrays;
     14 import java.util.concurrent.locks.Lock;
     15 import java.util.concurrent.locks.ReentrantLock;
     16 
     17 import com.ibm.icu.impl.ClassLoaderUtil;
     18 import com.ibm.icu.impl.Normalizer2Impl;
     19 import com.ibm.icu.impl.Normalizer2Impl.ReorderingBuffer;
     20 import com.ibm.icu.impl.Utility;
     21 import com.ibm.icu.impl.coll.BOCSU;
     22 import com.ibm.icu.impl.coll.Collation;
     23 import com.ibm.icu.impl.coll.CollationCompare;
     24 import com.ibm.icu.impl.coll.CollationData;
     25 import com.ibm.icu.impl.coll.CollationFastLatin;
     26 import com.ibm.icu.impl.coll.CollationIterator;
     27 import com.ibm.icu.impl.coll.CollationKeys;
     28 import com.ibm.icu.impl.coll.CollationKeys.SortKeyByteSink;
     29 import com.ibm.icu.impl.coll.CollationLoader;
     30 import com.ibm.icu.impl.coll.CollationRoot;
     31 import com.ibm.icu.impl.coll.CollationSettings;
     32 import com.ibm.icu.impl.coll.CollationTailoring;
     33 import com.ibm.icu.impl.coll.ContractionsAndExpansions;
     34 import com.ibm.icu.impl.coll.FCDUTF16CollationIterator;
     35 import com.ibm.icu.impl.coll.SharedObject;
     36 import com.ibm.icu.impl.coll.TailoredSet;
     37 import com.ibm.icu.impl.coll.UTF16CollationIterator;
     38 import com.ibm.icu.lang.UScript;
     39 import com.ibm.icu.util.ULocale;
     40 import com.ibm.icu.util.VersionInfo;
     41 
     42 /**
     43  * <p>
     44  * RuleBasedCollator is a concrete subclass of Collator. It allows customization of the Collator via user-specified rule
     45  * sets. RuleBasedCollator is designed to be fully compliant to the <a
     46  * href="http://www.unicode.org/unicode/reports/tr10/">Unicode Collation Algorithm (UCA)</a> and conforms to ISO 14651.
     47  * </p>
     48  *
     49  * <p>A Collator is thread-safe only when frozen. See {{@link #isFrozen()} and {@link com.ibm.icu.util.Freezable}.
     50  *
     51  * <p>
     52  * Users are strongly encouraged to read the <a href="http://userguide.icu-project.org/collation">User
     53  * Guide</a> for more information about the collation service before using this class.
     54  * </p>
     55  *
     56  * <p>
     57  * Create a RuleBasedCollator from a locale by calling the getInstance(Locale) factory method in the base class
     58  * Collator. Collator.getInstance(Locale) creates a RuleBasedCollator object based on the collation rules defined by the
     59  * argument locale. If a customized collation ordering or attributes is required, use the RuleBasedCollator(String)
     60  * constructor with the appropriate rules. The customized RuleBasedCollator will base its ordering on the CLDR root collation, while
     61  * re-adjusting the attributes and orders of the characters in the specified rule accordingly.
     62  * </p>
     63  *
     64  * <p>
     65  * RuleBasedCollator provides correct collation orders for most locales supported in ICU. If specific data for a locale
     66  * is not available, the orders eventually falls back to the
     67  * <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
     68  * </p>
     69  *
     70  * <p>
     71  * For information about the collation rule syntax and details about customization, please refer to the <a
     72  * href="http://userguide.icu-project.org/collation/customization">Collation customization</a> section of the
     73  * User Guide.
     74  * </p>
     75  *
     76  * <p>
     77  * <strong>Note</strong> that there are some differences between the Collation rule syntax used in Java and ICU4J:
     78  *
     79  * <ul>
     80  * <li>According to the JDK documentation: <i>
     81  * <p>
     82  * Modifier '!' : Turns on Thai/Lao vowel-consonant swapping. If this rule is in force when a Thai vowel of the range
     83  * &#92;U0E40-&#92;U0E44 precedes a Thai consonant of the range &#92;U0E01-&#92;U0E2E OR a Lao vowel of the range
     84  * &#92;U0EC0-&#92;U0EC4 precedes a Lao consonant of the range &#92;U0E81-&#92;U0EAE then the vowel is placed after the
     85  * consonant for collation purposes.
     86  * </p>
     87  * <p>
     88  * If a rule is without the modifier '!', the Thai/Lao vowel-consonant swapping is not turned on.
     89  * </p>
     90  * </i>
     91  * <p>
     92  * ICU4J's RuleBasedCollator does not support turning off the Thai/Lao vowel-consonant swapping, since the UCA clearly
     93  * states that it has to be supported to ensure a correct sorting order. If a '!' is encountered, it is ignored.
     94  * </p>
     95  * <li>As mentioned in the documentation of the base class Collator, compatibility decomposition mode is not supported.
     96  * </ul>
     97  * <p>
     98  * <strong>Examples</strong>
     99  * </p>
    100  * <p>
    101  * Creating Customized RuleBasedCollators: <blockquote>
    102  *
    103  * <pre>
    104  * String simple = "&amp; a &lt; b &lt; c &lt; d";
    105  * RuleBasedCollator simpleCollator = new RuleBasedCollator(simple);
    106  *
    107  * String norwegian = "&amp; a , A &lt; b , B &lt; c , C &lt; d , D &lt; e , E "
    108  *                    + "&lt; f , F &lt; g , G &lt; h , H &lt; i , I &lt; j , "
    109  *                    + "J &lt; k , K &lt; l , L &lt; m , M &lt; n , N &lt; "
    110  *                    + "o , O &lt; p , P &lt; q , Q &lt r , R &lt s , S &lt; "
    111  *                    + "t , T &lt; u , U &lt; v , V &lt; w , W &lt; x , X "
    112  *                    + "&lt; y , Y &lt; z , Z &lt; &#92;u00E5 = a&#92;u030A "
    113  *                    + ", &#92;u00C5 = A&#92;u030A ; aa , AA &lt; &#92;u00E6 "
    114  *                    + ", &#92;u00C6 &lt; &#92;u00F8 , &#92;u00D8";
    115  * RuleBasedCollator norwegianCollator = new RuleBasedCollator(norwegian);
    116  * </pre>
    117  *
    118  * </blockquote>
    119  *
    120  * Concatenating rules to combine <code>Collator</code>s: <blockquote>
    121  *
    122  * <pre>
    123  * // Create an en_US Collator object
    124  * RuleBasedCollator en_USCollator = (RuleBasedCollator)
    125  *     Collator.getInstance(new Locale("en", "US", ""));
    126  * // Create a da_DK Collator object
    127  * RuleBasedCollator da_DKCollator = (RuleBasedCollator)
    128  *     Collator.getInstance(new Locale("da", "DK", ""));
    129  * // Combine the two
    130  * // First, get the collation rules from en_USCollator
    131  * String en_USRules = en_USCollator.getRules();
    132  * // Second, get the collation rules from da_DKCollator
    133  * String da_DKRules = da_DKCollator.getRules();
    134  * RuleBasedCollator newCollator =
    135  *                             new RuleBasedCollator(en_USRules + da_DKRules);
    136  * // newCollator has the combined rules
    137  * </pre>
    138  *
    139  * </blockquote>
    140  *
    141  * Making changes to an existing RuleBasedCollator to create a new <code>Collator</code> object, by appending changes to
    142  * the existing rule: <blockquote>
    143  *
    144  * <pre>
    145  * // Create a new Collator object with additional rules
    146  * String addRules = "&amp; C &lt; ch, cH, Ch, CH";
    147  * RuleBasedCollator myCollator =
    148  *     new RuleBasedCollator(en_USCollator.getRules() + addRules);
    149  * // myCollator contains the new rules
    150  * </pre>
    151  *
    152  * </blockquote>
    153  *
    154  * How to change the order of non-spacing accents: <blockquote>
    155  *
    156  * <pre>
    157  * // old rule with main accents
    158  * String oldRules = "= &#92;u0301 ; &#92;u0300 ; &#92;u0302 ; &#92;u0308 "
    159  *                 + "; &#92;u0327 ; &#92;u0303 ; &#92;u0304 ; &#92;u0305 "
    160  *                 + "; &#92;u0306 ; &#92;u0307 ; &#92;u0309 ; &#92;u030A "
    161  *                 + "; &#92;u030B ; &#92;u030C ; &#92;u030D ; &#92;u030E "
    162  *                 + "; &#92;u030F ; &#92;u0310 ; &#92;u0311 ; &#92;u0312 "
    163  *                 + "&lt; a , A ; ae, AE ; &#92;u00e6 , &#92;u00c6 "
    164  *                 + "&lt; b , B &lt; c, C &lt; e, E &amp; C &lt; d , D";
    165  * // change the order of accent characters
    166  * String addOn = "&amp; &#92;u0300 ; &#92;u0308 ; &#92;u0302";
    167  * RuleBasedCollator myCollator = new RuleBasedCollator(oldRules + addOn);
    168  * </pre>
    169  *
    170  * </blockquote>
    171  *
    172  * Putting in a new primary ordering before the default setting, e.g. sort English characters before or after Japanese
    173  * characters in the Japanese <code>Collator</code>: <blockquote>
    174  *
    175  * <pre>
    176  * // get en_US Collator rules
    177  * RuleBasedCollator en_USCollator
    178  *                        = (RuleBasedCollator)Collator.getInstance(Locale.US);
    179  * // add a few Japanese characters to sort before English characters
    180  * // suppose the last character before the first base letter 'a' in
    181  * // the English collation rule is &#92;u2212
    182  * String jaString = "& &#92;u2212 &lt &#92;u3041, &#92;u3042 &lt &#92;u3043, "
    183  *                   + "&#92;u3044";
    184  * RuleBasedCollator myJapaneseCollator
    185  *              = new RuleBasedCollator(en_USCollator.getRules() + jaString);
    186  * </pre>
    187  *
    188  * </blockquote>
    189  * </p>
    190  * <p>
    191  * This class is not subclassable
    192  * </p>
    193  *
    194  * @author Syn Wee Quek
    195  * @stable ICU 2.8
    196  */
    197 public final class RuleBasedCollator extends Collator {
    198     // public constructors ---------------------------------------------------
    199 
    200     /**
    201      * <p>
    202      * Constructor that takes the argument rules for customization.
    203      * The collator will be based on the CLDR root collation, with the
    204      * attributes and re-ordering of the characters specified in the argument rules.
    205      * </p>
    206      * <p>
    207      * See the User Guide's section on <a href="http://userguide.icu-project.org/collation/customization">
    208      * Collation Customization</a> for details on the rule syntax.
    209      * </p>
    210      *
    211      * @param rules
    212      *            the collation rules to build the collation table from.
    213      * @exception ParseException
    214      *                and IOException thrown. ParseException thrown when argument rules have an invalid syntax.
    215      *                IOException thrown when an error occurred while reading internal data.
    216      * @stable ICU 2.8
    217      */
    218     public RuleBasedCollator(String rules) throws Exception {
    219         if (rules == null) {
    220             throw new IllegalArgumentException("Collation rules can not be null");
    221         }
    222         validLocale = ULocale.ROOT;
    223         internalBuildTailoring(rules);
    224     }
    225 
    226     /**
    227      * Implements from-rule constructors.
    228      * @param rules rule string
    229      * @throws Exception
    230      */
    231     private final void internalBuildTailoring(String rules) throws Exception {
    232         CollationTailoring base = CollationRoot.getRoot();
    233         // Most code using Collator does not need to build a Collator from rules.
    234         // By using reflection, most code will not have a static dependency on the builder code.
    235         // CollationBuilder builder = new CollationBuilder(base);
    236         ClassLoader classLoader = ClassLoaderUtil.getClassLoader(getClass());
    237         CollationTailoring t;
    238         try {
    239             Class<?> builderClass = classLoader.loadClass("com.ibm.icu.impl.coll.CollationBuilder");
    240             Object builder = builderClass.getConstructor(CollationTailoring.class).newInstance(base);
    241             // builder.parseAndBuild(rules);
    242             Method parseAndBuild = builderClass.getMethod("parseAndBuild", String.class);
    243             t = (CollationTailoring)parseAndBuild.invoke(builder, rules);
    244         } catch(InvocationTargetException e) {
    245             throw (Exception)e.getTargetException();
    246         }
    247         t.actualLocale = null;
    248         adoptTailoring(t);
    249     }
    250 
    251     // public methods --------------------------------------------------------
    252 
    253     /**
    254      * Clones the RuleBasedCollator
    255      *
    256      * @return a new instance of this RuleBasedCollator object
    257      * @stable ICU 2.8
    258      */
    259     @Override
    260     public Object clone() throws CloneNotSupportedException {
    261         if (isFrozen()) {
    262             return this;
    263         }
    264         return cloneAsThawed();
    265     }
    266 
    267     private final void initMaxExpansions() {
    268         synchronized(tailoring) {
    269             if (tailoring.maxExpansions == null) {
    270                 tailoring.maxExpansions = CollationElementIterator.computeMaxExpansions(tailoring.data);
    271             }
    272         }
    273     }
    274 
    275     /**
    276      * Return a CollationElementIterator for the given String.
    277      *
    278      * @see CollationElementIterator
    279      * @stable ICU 2.8
    280      */
    281     public CollationElementIterator getCollationElementIterator(String source) {
    282         initMaxExpansions();
    283         return new CollationElementIterator(source, this);
    284     }
    285 
    286     /**
    287      * Return a CollationElementIterator for the given CharacterIterator. The source iterator's integrity will be
    288      * preserved since a new copy will be created for use.
    289      *
    290      * @see CollationElementIterator
    291      * @stable ICU 2.8
    292      */
    293     public CollationElementIterator getCollationElementIterator(CharacterIterator source) {
    294         initMaxExpansions();
    295         CharacterIterator newsource = (CharacterIterator) source.clone();
    296         return new CollationElementIterator(newsource, this);
    297     }
    298 
    299     /**
    300      * Return a CollationElementIterator for the given UCharacterIterator. The source iterator's integrity will be
    301      * preserved since a new copy will be created for use.
    302      *
    303      * @see CollationElementIterator
    304      * @stable ICU 2.8
    305      */
    306     public CollationElementIterator getCollationElementIterator(UCharacterIterator source) {
    307         initMaxExpansions();
    308         return new CollationElementIterator(source, this);
    309     }
    310 
    311     // Freezable interface implementation -------------------------------------------------
    312 
    313     /**
    314      * Determines whether the object has been frozen or not.
    315      *
    316      * <p>An unfrozen Collator is mutable and not thread-safe.
    317      * A frozen Collator is immutable and thread-safe.
    318      *
    319      * @stable ICU 4.8
    320      */
    321     @Override
    322     public boolean isFrozen() {
    323         return frozenLock != null;
    324     }
    325 
    326     /**
    327      * Freezes the collator.
    328      * @return the collator itself.
    329      * @stable ICU 4.8
    330      */
    331     @Override
    332     public Collator freeze() {
    333         if (!isFrozen()) {
    334             frozenLock = new ReentrantLock();
    335             if (collationBuffer == null) {
    336                 collationBuffer = new CollationBuffer(data);
    337             }
    338         }
    339         return this;
    340     }
    341 
    342     /**
    343      * Provides for the clone operation. Any clone is initially unfrozen.
    344      * @stable ICU 4.8
    345      */
    346     @Override
    347     public RuleBasedCollator cloneAsThawed() {
    348         try {
    349             RuleBasedCollator result = (RuleBasedCollator) super.clone();
    350             // since all collation data in the RuleBasedCollator do not change
    351             // we can safely assign the result.fields to this collator
    352             // except in cases where we can't
    353             result.settings = settings.clone();
    354             result.collationBuffer = null;
    355             result.frozenLock = null;
    356             return result;
    357         } catch (CloneNotSupportedException e) {
    358             // Clone is implemented
    359             return null;
    360         }
    361     }
    362 
    363     // public setters --------------------------------------------------------
    364 
    365     private void checkNotFrozen() {
    366         if (isFrozen()) {
    367             throw new UnsupportedOperationException("Attempt to modify frozen RuleBasedCollator");
    368         }
    369     }
    370 
    371     private final CollationSettings getOwnedSettings() {
    372         return settings.copyOnWrite();
    373     }
    374 
    375     private final CollationSettings getDefaultSettings() {
    376         return tailoring.settings.readOnly();
    377     }
    378 
    379     /**
    380      * Sets the Hiragana Quaternary mode to be on or off. When the Hiragana Quaternary mode is turned on, the collator
    381      * positions Hiragana characters before all non-ignorable characters in QUATERNARY strength. This is to produce a
    382      * correct JIS collation order, distinguishing between Katakana and Hiragana characters.
    383      *
    384      * <p>This attribute was an implementation detail of the CLDR Japanese tailoring.
    385      * Since ICU 50, this attribute is not settable any more via API functions.
    386      * Since CLDR 25/ICU 53, explicit quaternary relations are used
    387      * to achieve the same Japanese sort order.
    388      *
    389      * @param flag
    390      *            true if Hiragana Quaternary mode is to be on, false otherwise
    391      * @see #setHiraganaQuaternaryDefault
    392      * @see #isHiraganaQuaternary
    393      * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
    394      */
    395     @Deprecated
    396     public void setHiraganaQuaternary(boolean flag) {
    397         checkNotFrozen();
    398     }
    399 
    400     /**
    401      * Sets the Hiragana Quaternary mode to the initial mode set during construction of the RuleBasedCollator. See
    402      * setHiraganaQuaternary(boolean) for more details.
    403      *
    404      * <p>This attribute was an implementation detail of the CLDR Japanese tailoring.
    405      * Since ICU 50, this attribute is not settable any more via API functions.
    406      * Since CLDR 25/ICU 53, explicit quaternary relations are used
    407      * to achieve the same Japanese sort order.
    408      *
    409      * @see #setHiraganaQuaternary(boolean)
    410      * @see #isHiraganaQuaternary
    411      * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
    412      */
    413     @Deprecated
    414     public void setHiraganaQuaternaryDefault() {
    415         checkNotFrozen();
    416     }
    417 
    418     /**
    419      * Sets whether uppercase characters sort before lowercase characters or vice versa, in strength TERTIARY. The
    420      * default mode is false, and so lowercase characters sort before uppercase characters. If true, sort upper case
    421      * characters first.
    422      *
    423      * @param upperfirst
    424      *            true to sort uppercase characters before lowercase characters, false to sort lowercase characters
    425      *            before uppercase characters
    426      * @see #isLowerCaseFirst
    427      * @see #isUpperCaseFirst
    428      * @see #setLowerCaseFirst
    429      * @see #setCaseFirstDefault
    430      * @stable ICU 2.8
    431      */
    432     public void setUpperCaseFirst(boolean upperfirst) {
    433         checkNotFrozen();
    434         if (upperfirst == isUpperCaseFirst()) { return; }
    435         CollationSettings ownedSettings = getOwnedSettings();
    436         ownedSettings.setCaseFirst(upperfirst ? CollationSettings.CASE_FIRST_AND_UPPER_MASK : 0);
    437         setFastLatinOptions(ownedSettings);
    438     }
    439 
    440     /**
    441      * Sets the orders of lower cased characters to sort before upper cased characters, in strength TERTIARY. The
    442      * default mode is false. If true is set, the RuleBasedCollator will sort lower cased characters before the upper
    443      * cased ones. Otherwise, if false is set, the RuleBasedCollator will ignore case preferences.
    444      *
    445      * @param lowerfirst
    446      *            true for sorting lower cased characters before upper cased characters, false to ignore case
    447      *            preferences.
    448      * @see #isLowerCaseFirst
    449      * @see #isUpperCaseFirst
    450      * @see #setUpperCaseFirst
    451      * @see #setCaseFirstDefault
    452      * @stable ICU 2.8
    453      */
    454     public void setLowerCaseFirst(boolean lowerfirst) {
    455         checkNotFrozen();
    456         if (lowerfirst == isLowerCaseFirst()) { return; }
    457         CollationSettings ownedSettings = getOwnedSettings();
    458         ownedSettings.setCaseFirst(lowerfirst ? CollationSettings.CASE_FIRST : 0);
    459         setFastLatinOptions(ownedSettings);
    460     }
    461 
    462     /**
    463      * Sets the case first mode to the initial mode set during construction of the RuleBasedCollator. See
    464      * setUpperCaseFirst(boolean) and setLowerCaseFirst(boolean) for more details.
    465      *
    466      * @see #isLowerCaseFirst
    467      * @see #isUpperCaseFirst
    468      * @see #setLowerCaseFirst(boolean)
    469      * @see #setUpperCaseFirst(boolean)
    470      * @stable ICU 2.8
    471      */
    472     public final void setCaseFirstDefault() {
    473         checkNotFrozen();
    474         CollationSettings defaultSettings = getDefaultSettings();
    475         if(settings.readOnly() == defaultSettings) { return; }
    476         CollationSettings ownedSettings = getOwnedSettings();
    477         ownedSettings.setCaseFirstDefault(defaultSettings.options);
    478         setFastLatinOptions(ownedSettings);
    479     }
    480 
    481     /**
    482      * Sets the alternate handling mode to the initial mode set during construction of the RuleBasedCollator. See
    483      * setAlternateHandling(boolean) for more details.
    484      *
    485      * @see #setAlternateHandlingShifted(boolean)
    486      * @see #isAlternateHandlingShifted()
    487      * @stable ICU 2.8
    488      */
    489     public void setAlternateHandlingDefault() {
    490         checkNotFrozen();
    491         CollationSettings defaultSettings = getDefaultSettings();
    492         if(settings.readOnly() == defaultSettings) { return; }
    493         CollationSettings ownedSettings = getOwnedSettings();
    494         ownedSettings.setAlternateHandlingDefault(defaultSettings.options);
    495         setFastLatinOptions(ownedSettings);
    496     }
    497 
    498     /**
    499      * Sets the case level mode to the initial mode set during construction of the RuleBasedCollator. See
    500      * setCaseLevel(boolean) for more details.
    501      *
    502      * @see #setCaseLevel(boolean)
    503      * @see #isCaseLevel
    504      * @stable ICU 2.8
    505      */
    506     public void setCaseLevelDefault() {
    507         checkNotFrozen();
    508         CollationSettings defaultSettings = getDefaultSettings();
    509         if(settings.readOnly() == defaultSettings) { return; }
    510         CollationSettings ownedSettings = getOwnedSettings();
    511         ownedSettings.setFlagDefault(CollationSettings.CASE_LEVEL, defaultSettings.options);
    512         setFastLatinOptions(ownedSettings);
    513     }
    514 
    515     /**
    516      * Sets the decomposition mode to the initial mode set during construction of the RuleBasedCollator. See
    517      * setDecomposition(int) for more details.
    518      *
    519      * @see #getDecomposition
    520      * @see #setDecomposition(int)
    521      * @stable ICU 2.8
    522      */
    523     public void setDecompositionDefault() {
    524         checkNotFrozen();
    525         CollationSettings defaultSettings = getDefaultSettings();
    526         if(settings.readOnly() == defaultSettings) { return; }
    527         CollationSettings ownedSettings = getOwnedSettings();
    528         ownedSettings.setFlagDefault(CollationSettings.CHECK_FCD, defaultSettings.options);
    529         setFastLatinOptions(ownedSettings);
    530     }
    531 
    532     /**
    533      * Sets the French collation mode to the initial mode set during construction of the RuleBasedCollator. See
    534      * setFrenchCollation(boolean) for more details.
    535      *
    536      * @see #isFrenchCollation
    537      * @see #setFrenchCollation(boolean)
    538      * @stable ICU 2.8
    539      */
    540     public void setFrenchCollationDefault() {
    541         checkNotFrozen();
    542         CollationSettings defaultSettings = getDefaultSettings();
    543         if(settings.readOnly() == defaultSettings) { return; }
    544         CollationSettings ownedSettings = getOwnedSettings();
    545         ownedSettings.setFlagDefault(CollationSettings.BACKWARD_SECONDARY, defaultSettings.options);
    546         setFastLatinOptions(ownedSettings);
    547     }
    548 
    549     /**
    550      * Sets the collation strength to the initial mode set during the construction of the RuleBasedCollator. See
    551      * setStrength(int) for more details.
    552      *
    553      * @see #setStrength(int)
    554      * @see #getStrength
    555      * @stable ICU 2.8
    556      */
    557     public void setStrengthDefault() {
    558         checkNotFrozen();
    559         CollationSettings defaultSettings = getDefaultSettings();
    560         if(settings.readOnly() == defaultSettings) { return; }
    561         CollationSettings ownedSettings = getOwnedSettings();
    562         ownedSettings.setStrengthDefault(defaultSettings.options);
    563         setFastLatinOptions(ownedSettings);
    564     }
    565 
    566     /**
    567      * Method to set numeric collation to its default value.
    568      *
    569      * @see #getNumericCollation
    570      * @see #setNumericCollation
    571      * @stable ICU 2.8
    572      */
    573     public void setNumericCollationDefault() {
    574         checkNotFrozen();
    575         CollationSettings defaultSettings = getDefaultSettings();
    576         if(settings.readOnly() == defaultSettings) { return; }
    577         CollationSettings ownedSettings = getOwnedSettings();
    578         ownedSettings.setFlagDefault(CollationSettings.NUMERIC, defaultSettings.options);
    579         setFastLatinOptions(ownedSettings);
    580     }
    581 
    582     /**
    583      * Sets the mode for the direction of SECONDARY weights to be used in French collation. The default value is false,
    584      * which treats SECONDARY weights in the order they appear. If set to true, the SECONDARY weights will be sorted
    585      * backwards. See the section on <a href="http://userguide.icu-project.org/collation/architecture">
    586      * French collation</a> for more information.
    587      *
    588      * @param flag
    589      *            true to set the French collation on, false to set it off
    590      * @stable ICU 2.8
    591      * @see #isFrenchCollation
    592      * @see #setFrenchCollationDefault
    593      */
    594     public void setFrenchCollation(boolean flag) {
    595         checkNotFrozen();
    596         if(flag == isFrenchCollation()) { return; }
    597         CollationSettings ownedSettings = getOwnedSettings();
    598         ownedSettings.setFlag(CollationSettings.BACKWARD_SECONDARY, flag);
    599         setFastLatinOptions(ownedSettings);
    600     }
    601 
    602     /**
    603      * Sets the alternate handling for QUATERNARY strength to be either shifted or non-ignorable. See the UCA definition
    604      * on <a href="http://www.unicode.org/unicode/reports/tr10/#Variable_Weighting">Variable Weighting</a>. This
    605      * attribute will only be effective when QUATERNARY strength is set. The default value for this mode is false,
    606      * corresponding to the NON_IGNORABLE mode in UCA. In the NON_IGNORABLE mode, the RuleBasedCollator treats all
    607      * the code points with non-ignorable primary weights in the same way. If the mode is set to true, the behavior
    608      * corresponds to SHIFTED defined in UCA, this causes code points with PRIMARY orders that are equal or below the
    609      * variable top value to be ignored in PRIMARY order and moved to the QUATERNARY order.
    610      *
    611      * @param shifted
    612      *            true if SHIFTED behavior for alternate handling is desired, false for the NON_IGNORABLE behavior.
    613      * @see #isAlternateHandlingShifted
    614      * @see #setAlternateHandlingDefault
    615      * @stable ICU 2.8
    616      */
    617     public void setAlternateHandlingShifted(boolean shifted) {
    618         checkNotFrozen();
    619         if(shifted == isAlternateHandlingShifted()) { return; }
    620         CollationSettings ownedSettings = getOwnedSettings();
    621         ownedSettings.setAlternateHandlingShifted(shifted);
    622         setFastLatinOptions(ownedSettings);
    623     }
    624 
    625     /**
    626      * <p>
    627      * When case level is set to true, an additional weight is formed between the SECONDARY and TERTIARY weight, known
    628      * as the case level. The case level is used to distinguish large and small Japanese Kana characters. Case level
    629      * could also be used in other situations. For example to distinguish certain Pinyin characters. The default value
    630      * is false, which means the case level is not generated. The contents of the case level are affected by the case
    631      * first mode. A simple way to ignore accent differences in a string is to set the strength to PRIMARY and enable
    632      * case level.
    633      * </p>
    634      * <p>
    635      * See the section on <a href="http://userguide.icu-project.org/collation/architecture">case
    636      * level</a> for more information.
    637      * </p>
    638      *
    639      * @param flag
    640      *            true if case level sorting is required, false otherwise
    641      * @stable ICU 2.8
    642      * @see #setCaseLevelDefault
    643      * @see #isCaseLevel
    644      */
    645     public void setCaseLevel(boolean flag) {
    646         checkNotFrozen();
    647         if(flag == isCaseLevel()) { return; }
    648         CollationSettings ownedSettings = getOwnedSettings();
    649         ownedSettings.setFlag(CollationSettings.CASE_LEVEL, flag);
    650         setFastLatinOptions(ownedSettings);
    651     }
    652 
    653     /**
    654      * Sets the decomposition mode of this Collator.  Setting this
    655      * decomposition attribute with CANONICAL_DECOMPOSITION allows the
    656      * Collator to handle un-normalized text properly, producing the
    657      * same results as if the text were normalized. If
    658      * NO_DECOMPOSITION is set, it is the user's responsibility to
    659      * insure that all text is already in the appropriate form before
    660      * a comparison or before getting a CollationKey. Adjusting
    661      * decomposition mode allows the user to select between faster and
    662      * more complete collation behavior.</p>
    663      *
    664      * <p>Since a great many of the world's languages do not require
    665      * text normalization, most locales set NO_DECOMPOSITION as the
    666      * default decomposition mode.</p>
    667      *
    668      * The default decompositon mode for the Collator is
    669      * NO_DECOMPOSITON, unless specified otherwise by the locale used
    670      * to create the Collator.</p>
    671      *
    672      * <p>See getDecomposition for a description of decomposition
    673      * mode.</p>
    674      *
    675      * @param decomposition the new decomposition mode
    676      * @see #getDecomposition
    677      * @see #NO_DECOMPOSITION
    678      * @see #CANONICAL_DECOMPOSITION
    679      * @throws IllegalArgumentException If the given value is not a valid
    680      *            decomposition mode.
    681      * @stable ICU 2.8
    682      */
    683     @Override
    684     public void setDecomposition(int decomposition)
    685     {
    686         checkNotFrozen();
    687         boolean flag;
    688         switch(decomposition) {
    689         case NO_DECOMPOSITION:
    690             flag = false;
    691             break;
    692         case CANONICAL_DECOMPOSITION:
    693             flag = true;
    694             break;
    695         default:
    696             throw new IllegalArgumentException("Wrong decomposition mode.");
    697         }
    698         if(flag == settings.readOnly().getFlag(CollationSettings.CHECK_FCD)) { return; }
    699         CollationSettings ownedSettings = getOwnedSettings();
    700         ownedSettings.setFlag(CollationSettings.CHECK_FCD, flag);
    701         setFastLatinOptions(ownedSettings);
    702     }
    703 
    704     /**
    705      * Sets this Collator's strength attribute. The strength attribute determines the minimum level of difference
    706      * considered significant during comparison.
    707      *
    708      * <p>See the Collator class description for an example of use.
    709      *
    710      * @param newStrength
    711      *            the new strength value.
    712      * @see #getStrength
    713      * @see #setStrengthDefault
    714      * @see #PRIMARY
    715      * @see #SECONDARY
    716      * @see #TERTIARY
    717      * @see #QUATERNARY
    718      * @see #IDENTICAL
    719      * @exception IllegalArgumentException
    720      *                If the new strength value is not one of PRIMARY, SECONDARY, TERTIARY, QUATERNARY or IDENTICAL.
    721      * @stable ICU 2.8
    722      */
    723     @Override
    724     public void setStrength(int newStrength) {
    725         checkNotFrozen();
    726         if(newStrength == getStrength()) { return; }
    727         CollationSettings ownedSettings = getOwnedSettings();
    728         ownedSettings.setStrength(newStrength);
    729         setFastLatinOptions(ownedSettings);
    730     }
    731 
    732     /**
    733      * {@icu} Sets the variable top to the top of the specified reordering group.
    734      * The variable top determines the highest-sorting character
    735      * which is affected by the alternate handling behavior.
    736      * If that attribute is set to NON_IGNORABLE, then the variable top has no effect.
    737      * @param group one of Collator.ReorderCodes.SPACE, Collator.ReorderCodes.PUNCTUATION,
    738      *              Collator.ReorderCodes.SYMBOL, Collator.ReorderCodes.CURRENCY;
    739      *              or Collator.ReorderCodes.DEFAULT to restore the default max variable group
    740      * @return this
    741      * @see #getMaxVariable
    742      * @stable ICU 53
    743      */
    744     @Override
    745     public RuleBasedCollator setMaxVariable(int group) {
    746         // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
    747         int value;
    748         if(group == Collator.ReorderCodes.DEFAULT) {
    749             value = -1;  // UCOL_DEFAULT
    750         } else if(Collator.ReorderCodes.FIRST <= group && group <= Collator.ReorderCodes.CURRENCY) {
    751             value = group - Collator.ReorderCodes.FIRST;
    752         } else {
    753             throw new IllegalArgumentException("illegal max variable group " + group);
    754         }
    755         int oldValue = settings.readOnly().getMaxVariable();
    756         if(value == oldValue) {
    757             return this;
    758         }
    759         CollationSettings defaultSettings = getDefaultSettings();
    760         if(settings.readOnly() == defaultSettings) {
    761             if(value < 0) {  // UCOL_DEFAULT
    762                 return this;
    763             }
    764         }
    765         CollationSettings ownedSettings = getOwnedSettings();
    766 
    767         if(group == Collator.ReorderCodes.DEFAULT) {
    768             group = Collator.ReorderCodes.FIRST + defaultSettings.getMaxVariable();
    769         }
    770         long varTop = data.getLastPrimaryForGroup(group);
    771         assert(varTop != 0);
    772         ownedSettings.setMaxVariable(value, defaultSettings.options);
    773         ownedSettings.variableTop = varTop;
    774         setFastLatinOptions(ownedSettings);
    775         return this;
    776     }
    777 
    778     /**
    779      * {@icu} Returns the maximum reordering group whose characters are affected by
    780      * the alternate handling behavior.
    781      * @return the maximum variable reordering group.
    782      * @see #setMaxVariable
    783      * @stable ICU 53
    784      */
    785     @Override
    786     public int getMaxVariable() {
    787         return Collator.ReorderCodes.FIRST + settings.readOnly().getMaxVariable();
    788     }
    789 
    790     /**
    791      * {@icu} Sets the variable top to the primary weight of the specified string.
    792      *
    793      * <p>Beginning with ICU 53, the variable top is pinned to
    794      * the top of one of the supported reordering groups,
    795      * and it must not be beyond the last of those groups.
    796      * See {@link #setMaxVariable(int)}.
    797      *
    798      * @param varTop
    799      *            one or more (if contraction) characters to which the variable top should be set
    800      * @return variable top primary weight
    801      * @exception IllegalArgumentException
    802      *                is thrown if varTop argument is not a valid variable top element. A variable top element is
    803      *                invalid when
    804      *                <ul>
    805      *                <li>it is a contraction that does not exist in the Collation order
    806      *                <li>the variable top is beyond
    807      *                    the last reordering group supported by setMaxVariable()
    808      *                <li>when the varTop argument is null or zero in length.
    809      *                </ul>
    810      * @see #getVariableTop
    811      * @see RuleBasedCollator#setAlternateHandlingShifted
    812      * @deprecated ICU 53 Call {@link #setMaxVariable(int)} instead.
    813      */
    814     @Override
    815     @Deprecated
    816     public int setVariableTop(String varTop) {
    817         checkNotFrozen();
    818         if (varTop == null || varTop.length() == 0) {
    819             throw new IllegalArgumentException("Variable top argument string can not be null or zero in length.");
    820         }
    821         boolean numeric = settings.readOnly().isNumeric();
    822         long ce1, ce2;
    823         if(settings.readOnly().dontCheckFCD()) {
    824             UTF16CollationIterator ci = new UTF16CollationIterator(data, numeric, varTop, 0);
    825             ce1 = ci.nextCE();
    826             ce2 = ci.nextCE();
    827         } else {
    828             FCDUTF16CollationIterator ci = new FCDUTF16CollationIterator(data, numeric, varTop, 0);
    829             ce1 = ci.nextCE();
    830             ce2 = ci.nextCE();
    831         }
    832         if(ce1 == Collation.NO_CE || ce2 != Collation.NO_CE) {
    833             throw new IllegalArgumentException("Variable top argument string must map to exactly one collation element");
    834         }
    835         internalSetVariableTop(ce1 >>> 32);
    836         return (int)settings.readOnly().variableTop;
    837     }
    838 
    839     /**
    840      * {@icu} Sets the variable top to the specified primary weight.
    841      *
    842      * <p>Beginning with ICU 53, the variable top is pinned to
    843      * the top of one of the supported reordering groups,
    844      * and it must not be beyond the last of those groups.
    845      * See {@link #setMaxVariable(int)}.
    846      *
    847      * @param varTop primary weight, as returned by setVariableTop or getVariableTop
    848      * @see #getVariableTop
    849      * @see #setVariableTop(String)
    850      * @deprecated ICU 53 Call setMaxVariable() instead.
    851      */
    852     @Override
    853     @Deprecated
    854     public void setVariableTop(int varTop) {
    855         checkNotFrozen();
    856         internalSetVariableTop(varTop & 0xffffffffL);
    857     }
    858 
    859     private void internalSetVariableTop(long varTop) {
    860         if(varTop != settings.readOnly().variableTop) {
    861             // Pin the variable top to the end of the reordering group which contains it.
    862             // Only a few special groups are supported.
    863             int group = data.getGroupForPrimary(varTop);
    864             if(group < Collator.ReorderCodes.FIRST || Collator.ReorderCodes.CURRENCY < group) {
    865                 throw new IllegalArgumentException("The variable top must be a primary weight in " +
    866                         "the space/punctuation/symbols/currency symbols range");
    867             }
    868             long v = data.getLastPrimaryForGroup(group);
    869             assert(v != 0 && v >= varTop);
    870             varTop = v;
    871             if(varTop != settings.readOnly().variableTop) {
    872                 CollationSettings ownedSettings = getOwnedSettings();
    873                 ownedSettings.setMaxVariable(group - Collator.ReorderCodes.FIRST,
    874                         getDefaultSettings().options);
    875                 ownedSettings.variableTop = varTop;
    876                 setFastLatinOptions(ownedSettings);
    877             }
    878         }
    879     }
    880 
    881     /**
    882      * {@icu} When numeric collation is turned on, this Collator makes
    883      * substrings of digits sort according to their numeric values.
    884      *
    885      * <p>This is a way to get '100' to sort AFTER '2'. Note that the longest
    886      * digit substring that can be treated as a single unit is
    887      * 254 digits (not counting leading zeros). If a digit substring is
    888      * longer than that, the digits beyond the limit will be treated as a
    889      * separate digit substring.
    890      *
    891      * <p>A "digit" in this sense is a code point with General_Category=Nd,
    892      * which does not include circled numbers, roman numerals, etc.
    893      * Only a contiguous digit substring is considered, that is,
    894      * non-negative integers without separators.
    895      * There is no support for plus/minus signs, decimals, exponents, etc.
    896      *
    897      * @param flag
    898      *            true to turn numeric collation on and false to turn it off
    899      * @see #getNumericCollation
    900      * @see #setNumericCollationDefault
    901      * @stable ICU 2.8
    902      */
    903     public void setNumericCollation(boolean flag) {
    904         checkNotFrozen();
    905         // sort substrings of digits as numbers
    906         if(flag == getNumericCollation()) { return; }
    907         CollationSettings ownedSettings = getOwnedSettings();
    908         ownedSettings.setFlag(CollationSettings.NUMERIC, flag);
    909         setFastLatinOptions(ownedSettings);
    910     }
    911 
    912     /**
    913      * {@inheritDoc}
    914      *
    915      * @param order the reordering codes to apply to this collator; if this is null or an empty array
    916      * then this clears any existing reordering
    917      * @throws IllegalArgumentException if the reordering codes are malformed in any way (e.g. duplicates, multiple reset codes, overlapping equivalent scripts)
    918      * @see #getReorderCodes
    919      * @see Collator#getEquivalentReorderCodes
    920      * @see Collator.ReorderCodes
    921      * @see UScript
    922      * @stable ICU 4.8
    923      */
    924     @Override
    925     public void setReorderCodes(int... order) {
    926         checkNotFrozen();
    927         int length = (order != null) ? order.length : 0;
    928         if(length == 1 && order[0] == ReorderCodes.NONE) {
    929             length = 0;
    930         }
    931         if(length == 0 ?
    932                 settings.readOnly().reorderCodes.length == 0 :
    933                 Arrays.equals(order, settings.readOnly().reorderCodes)) {
    934             return;
    935         }
    936         CollationSettings defaultSettings = getDefaultSettings();
    937         if(length == 1 && order[0] == Collator.ReorderCodes.DEFAULT) {
    938             if(settings.readOnly() != defaultSettings) {
    939                 CollationSettings ownedSettings = getOwnedSettings();
    940                 ownedSettings.copyReorderingFrom(defaultSettings);
    941                 setFastLatinOptions(ownedSettings);
    942             }
    943             return;
    944         }
    945         CollationSettings ownedSettings = getOwnedSettings();
    946         if(length == 0) {
    947             ownedSettings.resetReordering();
    948         } else {
    949             ownedSettings.setReordering(data, order.clone());
    950         }
    951         setFastLatinOptions(ownedSettings);
    952     }
    953 
    954     private void setFastLatinOptions(CollationSettings ownedSettings) {
    955         ownedSettings.fastLatinOptions = CollationFastLatin.getOptions(
    956                 data, ownedSettings, ownedSettings.fastLatinPrimaries);
    957     }
    958 
    959     // public getters --------------------------------------------------------
    960 
    961     /**
    962      * Gets the collation tailoring rules for this RuleBasedCollator.
    963      * Equivalent to String getRules(false).
    964      *
    965      * @return the collation tailoring rules
    966      * @see #getRules(boolean)
    967      * @stable ICU 2.8
    968      */
    969     public String getRules() {
    970         return tailoring.rules;
    971     }
    972 
    973     /**
    974      * Returns current rules.
    975      * The argument defines whether full rules (root collation + tailored) rules are returned
    976      * or just the tailoring.
    977      *
    978      * <p>The root collation rules are an <i>approximation</i> of the root collator's sort order.
    979      * They are almost never used or useful at runtime and can be removed from the data.
    980      * See <a href="http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales">User Guide:
    981      * Collation Customization, Building on Existing Locales</a>
    982      *
    983      * <p>{@link #getRules()} should normally be used instead.
    984      * @param fullrules
    985      *            true if the rules that defines the full set of collation order is required, otherwise false for
    986      *            returning only the tailored rules
    987      * @return the current rules that defines this Collator.
    988      * @see #getRules()
    989      * @stable ICU 2.6
    990      */
    991     public String getRules(boolean fullrules) {
    992         if (!fullrules) {
    993             return tailoring.rules;
    994         }
    995         return CollationLoader.getRootRules() + tailoring.rules;
    996     }
    997 
    998     /**
    999      * Get a UnicodeSet that contains all the characters and sequences tailored in this collator.
   1000      *
   1001      * @return a pointer to a UnicodeSet object containing all the code points and sequences that may sort differently
   1002      *         than in the root collator.
   1003      * @stable ICU 2.4
   1004      */
   1005     @Override
   1006     public UnicodeSet getTailoredSet() {
   1007         UnicodeSet tailored = new UnicodeSet();
   1008         if(data.base != null) {
   1009             new TailoredSet(tailored).forData(data);
   1010         }
   1011         return tailored;
   1012     }
   1013 
   1014     /**
   1015      * Gets unicode sets containing contractions and/or expansions of a collator
   1016      *
   1017      * @param contractions
   1018      *            if not null, set to contain contractions
   1019      * @param expansions
   1020      *            if not null, set to contain expansions
   1021      * @param addPrefixes
   1022      *            add the prefix contextual elements to contractions
   1023      * @throws Exception
   1024      *             Throws an exception if any errors occurs.
   1025      * @stable ICU 3.4
   1026      */
   1027     public void getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, boolean addPrefixes)
   1028             throws Exception {
   1029         if (contractions != null) {
   1030             contractions.clear();
   1031         }
   1032         if (expansions != null) {
   1033             expansions.clear();
   1034         }
   1035         new ContractionsAndExpansions(contractions, expansions, null, addPrefixes).forData(data);
   1036     }
   1037 
   1038     /**
   1039      * Adds the contractions that start with character c to the set.
   1040      * Ignores prefixes. Used by AlphabeticIndex.
   1041      * @internal
   1042      * @deprecated This API is ICU internal only.
   1043      */
   1044     void internalAddContractions(int c, UnicodeSet set) {
   1045         new ContractionsAndExpansions(set, null, null, false).forCodePoint(data, c);
   1046     }
   1047 
   1048     /**
   1049      * <p>
   1050      * Get a Collation key for the argument String source from this RuleBasedCollator.
   1051      * </p>
   1052      * <p>
   1053      * General recommendation: <br>
   1054      * If comparison are to be done to the same String multiple times, it would be more efficient to generate
   1055      * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If the each
   1056      * Strings are compared to only once, using the method RuleBasedCollator.compare(String, String) will have a better
   1057      * performance.
   1058      * </p>
   1059      * <p>
   1060      * See the class documentation for an explanation about CollationKeys.
   1061      * </p>
   1062      *
   1063      * @param source
   1064      *            the text String to be transformed into a collation key.
   1065      * @return the CollationKey for the given String based on this RuleBasedCollator's collation rules. If the source
   1066      *         String is null, a null CollationKey is returned.
   1067      * @see CollationKey
   1068      * @see #compare(String, String)
   1069      * @see #getRawCollationKey
   1070      * @stable ICU 2.8
   1071      */
   1072     @Override
   1073     public CollationKey getCollationKey(String source) {
   1074         if (source == null) {
   1075             return null;
   1076         }
   1077         CollationBuffer buffer = null;
   1078         try {
   1079             buffer = getCollationBuffer();
   1080             return getCollationKey(source, buffer);
   1081         } finally {
   1082             releaseCollationBuffer(buffer);
   1083         }
   1084     }
   1085 
   1086     private CollationKey getCollationKey(String source, CollationBuffer buffer) {
   1087         buffer.rawCollationKey = getRawCollationKey(source, buffer.rawCollationKey, buffer);
   1088         return new CollationKey(source, buffer.rawCollationKey);
   1089     }
   1090 
   1091     /**
   1092      * Gets the simpler form of a CollationKey for the String source following the rules of this Collator and stores the
   1093      * result into the user provided argument key. If key has a internal byte array of length that's too small for the
   1094      * result, the internal byte array will be grown to the exact required size.
   1095      *
   1096      * @param source the text String to be transformed into a RawCollationKey
   1097      * @param key output RawCollationKey to store results
   1098      * @return If key is null, a new instance of RawCollationKey will be created and returned, otherwise the user
   1099      *         provided key will be returned.
   1100      * @see #getCollationKey
   1101      * @see #compare(String, String)
   1102      * @see RawCollationKey
   1103      * @stable ICU 2.8
   1104      */
   1105     @Override
   1106     public RawCollationKey getRawCollationKey(String source, RawCollationKey key) {
   1107         if (source == null) {
   1108             return null;
   1109         }
   1110         CollationBuffer buffer = null;
   1111         try {
   1112             buffer = getCollationBuffer();
   1113             return getRawCollationKey(source, key, buffer);
   1114         } finally {
   1115             releaseCollationBuffer(buffer);
   1116         }
   1117     }
   1118 
   1119     private static final class CollationKeyByteSink extends SortKeyByteSink {
   1120         CollationKeyByteSink(RawCollationKey key) {
   1121             super(key.bytes);
   1122             key_ = key;
   1123         }
   1124 
   1125         @Override
   1126         protected void AppendBeyondCapacity(byte[] bytes, int start, int n, int length) {
   1127             // n > 0 && appended_ > capacity_
   1128             if (Resize(n, length)) {
   1129                 System.arraycopy(bytes, start, buffer_, length, n);
   1130             }
   1131         }
   1132 
   1133         @Override
   1134         protected boolean Resize(int appendCapacity, int length) {
   1135             int newCapacity = 2 * buffer_.length;
   1136             int altCapacity = length + 2 * appendCapacity;
   1137             if (newCapacity < altCapacity) {
   1138                 newCapacity = altCapacity;
   1139             }
   1140             if (newCapacity < 200) {
   1141                 newCapacity = 200;
   1142             }
   1143             // Do not call key_.ensureCapacity(newCapacity) because we do not
   1144             // keep key_.size in sync with appended_.
   1145             // We only set it when we are done.
   1146             byte[] newBytes = new byte[newCapacity];
   1147             System.arraycopy(buffer_, 0, newBytes, 0, length);
   1148             buffer_ = key_.bytes = newBytes;
   1149             return true;
   1150         }
   1151 
   1152         private RawCollationKey key_;
   1153     }
   1154 
   1155     private RawCollationKey getRawCollationKey(CharSequence source, RawCollationKey key, CollationBuffer buffer) {
   1156         if (key == null) {
   1157             key = new RawCollationKey(simpleKeyLengthEstimate(source));
   1158         } else if (key.bytes == null) {
   1159             key.bytes = new byte[simpleKeyLengthEstimate(source)];
   1160         }
   1161         CollationKeyByteSink sink = new CollationKeyByteSink(key);
   1162         writeSortKey(source, sink, buffer);
   1163         key.size = sink.NumberOfBytesAppended();
   1164         return key;
   1165     }
   1166 
   1167     private int simpleKeyLengthEstimate(CharSequence source) {
   1168         return 2 * source.length() + 10;
   1169     }
   1170 
   1171     private void writeSortKey(CharSequence s, CollationKeyByteSink sink, CollationBuffer buffer) {
   1172         boolean numeric = settings.readOnly().isNumeric();
   1173         if(settings.readOnly().dontCheckFCD()) {
   1174             buffer.leftUTF16CollIter.setText(numeric, s, 0);
   1175             CollationKeys.writeSortKeyUpToQuaternary(
   1176                     buffer.leftUTF16CollIter, data.compressibleBytes, settings.readOnly(),
   1177                     sink, Collation.PRIMARY_LEVEL,
   1178                     CollationKeys.SIMPLE_LEVEL_FALLBACK, true);
   1179         } else {
   1180             buffer.leftFCDUTF16Iter.setText(numeric, s, 0);
   1181             CollationKeys.writeSortKeyUpToQuaternary(
   1182                     buffer.leftFCDUTF16Iter, data.compressibleBytes, settings.readOnly(),
   1183                     sink, Collation.PRIMARY_LEVEL,
   1184                     CollationKeys.SIMPLE_LEVEL_FALLBACK, true);
   1185         }
   1186         if(settings.readOnly().getStrength() == IDENTICAL) {
   1187             writeIdenticalLevel(s, sink);
   1188         }
   1189         sink.Append(Collation.TERMINATOR_BYTE);
   1190     }
   1191 
   1192     private void writeIdenticalLevel(CharSequence s, CollationKeyByteSink sink) {
   1193         // NFD quick check
   1194         int nfdQCYesLimit = data.nfcImpl.decompose(s, 0, s.length(), null);
   1195         sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
   1196         // Sync the ByteArrayWrapper size with the key length.
   1197         sink.key_.size = sink.NumberOfBytesAppended();
   1198         int prev = 0;
   1199         if(nfdQCYesLimit != 0) {
   1200             prev = BOCSU.writeIdenticalLevelRun(prev, s, 0, nfdQCYesLimit, sink.key_);
   1201         }
   1202         // Is there non-NFD text?
   1203         if(nfdQCYesLimit < s.length()) {
   1204             int destLengthEstimate = s.length() - nfdQCYesLimit;
   1205             StringBuilder nfd = new StringBuilder();
   1206             data.nfcImpl.decompose(s, nfdQCYesLimit, s.length(), nfd, destLengthEstimate);
   1207             BOCSU.writeIdenticalLevelRun(prev, nfd, 0, nfd.length(), sink.key_);
   1208         }
   1209         // Sync the key with the buffer again which got bytes appended and may have been reallocated.
   1210         sink.setBufferAndAppended(sink.key_.bytes, sink.key_.size);
   1211     }
   1212 
   1213     /**
   1214      * Returns the CEs for the string.
   1215      * @param str the string
   1216      * @internal for tests & tools
   1217      * @deprecated This API is ICU internal only.
   1218      */
   1219     @Deprecated
   1220     public long[] internalGetCEs(CharSequence str) {
   1221         CollationBuffer buffer = null;
   1222         try {
   1223             buffer = getCollationBuffer();
   1224             boolean numeric = settings.readOnly().isNumeric();
   1225             CollationIterator iter;
   1226             if(settings.readOnly().dontCheckFCD()) {
   1227                 buffer.leftUTF16CollIter.setText(numeric, str, 0);
   1228                 iter = buffer.leftUTF16CollIter;
   1229             } else {
   1230                 buffer.leftFCDUTF16Iter.setText(numeric, str, 0);
   1231                 iter = buffer.leftFCDUTF16Iter;
   1232             }
   1233             int length = iter.fetchCEs() - 1;
   1234             assert length >= 0 && iter.getCE(length) == Collation.NO_CE;
   1235             long[] ces = new long[length];
   1236             System.arraycopy(iter.getCEs(), 0, ces, 0, length);
   1237             return ces;
   1238         } finally {
   1239             releaseCollationBuffer(buffer);
   1240         }
   1241     }
   1242 
   1243     /**
   1244      * Returns this Collator's strength attribute. The strength attribute
   1245      * determines the minimum level of difference considered significant.
   1246      *
   1247      * <p>{@icunote} This can return QUATERNARY strength, which is not supported by the
   1248      * JDK version.
   1249      *
   1250      * <p>See the Collator class description for more details.
   1251      *
   1252      * @return this Collator's current strength attribute.
   1253      * @see #setStrength
   1254      * @see #PRIMARY
   1255      * @see #SECONDARY
   1256      * @see #TERTIARY
   1257      * @see #QUATERNARY
   1258      * @see #IDENTICAL
   1259      * @stable ICU 2.8
   1260      */
   1261     @Override
   1262     public int getStrength() {
   1263         return settings.readOnly().getStrength();
   1264     }
   1265 
   1266     /**
   1267      * Returns the decomposition mode of this Collator. The decomposition mode
   1268      * determines how Unicode composed characters are handled.
   1269      *
   1270      * <p>See the Collator class description for more details.
   1271      *
   1272      * @return the decomposition mode
   1273      * @see #setDecomposition
   1274      * @see #NO_DECOMPOSITION
   1275      * @see #CANONICAL_DECOMPOSITION
   1276      * @stable ICU 2.8
   1277      */
   1278     @Override
   1279     public int getDecomposition() {
   1280         return (settings.readOnly().options & CollationSettings.CHECK_FCD) != 0 ?
   1281                 CANONICAL_DECOMPOSITION : NO_DECOMPOSITION;
   1282     }
   1283 
   1284     /**
   1285      * Return true if an uppercase character is sorted before the corresponding lowercase character. See
   1286      * setCaseFirst(boolean) for details.
   1287      *
   1288      * @see #setUpperCaseFirst
   1289      * @see #setLowerCaseFirst
   1290      * @see #isLowerCaseFirst
   1291      * @see #setCaseFirstDefault
   1292      * @return true if upper cased characters are sorted before lower cased characters, false otherwise
   1293      * @stable ICU 2.8
   1294      */
   1295     public boolean isUpperCaseFirst() {
   1296         return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST_AND_UPPER_MASK);
   1297     }
   1298 
   1299     /**
   1300      * Return true if a lowercase character is sorted before the corresponding uppercase character. See
   1301      * setCaseFirst(boolean) for details.
   1302      *
   1303      * @see #setUpperCaseFirst
   1304      * @see #setLowerCaseFirst
   1305      * @see #isUpperCaseFirst
   1306      * @see #setCaseFirstDefault
   1307      * @return true lower cased characters are sorted before upper cased characters, false otherwise
   1308      * @stable ICU 2.8
   1309      */
   1310     public boolean isLowerCaseFirst() {
   1311         return (settings.readOnly().getCaseFirst() == CollationSettings.CASE_FIRST);
   1312     }
   1313 
   1314     /**
   1315      * Checks if the alternate handling behavior is the UCA defined SHIFTED or NON_IGNORABLE. If return value is true,
   1316      * then the alternate handling attribute for the Collator is SHIFTED. Otherwise if return value is false, then the
   1317      * alternate handling attribute for the Collator is NON_IGNORABLE See setAlternateHandlingShifted(boolean) for more
   1318      * details.
   1319      *
   1320      * @return true or false
   1321      * @see #setAlternateHandlingShifted(boolean)
   1322      * @see #setAlternateHandlingDefault
   1323      * @stable ICU 2.8
   1324      */
   1325     public boolean isAlternateHandlingShifted() {
   1326         return settings.readOnly().getAlternateHandling();
   1327     }
   1328 
   1329     /**
   1330      * Checks if case level is set to true. See setCaseLevel(boolean) for details.
   1331      *
   1332      * @return the case level mode
   1333      * @see #setCaseLevelDefault
   1334      * @see #isCaseLevel
   1335      * @see #setCaseLevel(boolean)
   1336      * @stable ICU 2.8
   1337      */
   1338     public boolean isCaseLevel() {
   1339         return (settings.readOnly().options & CollationSettings.CASE_LEVEL) != 0;
   1340     }
   1341 
   1342     /**
   1343      * Checks if French Collation is set to true. See setFrenchCollation(boolean) for details.
   1344      *
   1345      * @return true if French Collation is set to true, false otherwise
   1346      * @see #setFrenchCollation(boolean)
   1347      * @see #setFrenchCollationDefault
   1348      * @stable ICU 2.8
   1349      */
   1350     public boolean isFrenchCollation() {
   1351         return (settings.readOnly().options & CollationSettings.BACKWARD_SECONDARY) != 0;
   1352     }
   1353 
   1354     /**
   1355      * Checks if the Hiragana Quaternary mode is set on. See setHiraganaQuaternary(boolean) for more details.
   1356      *
   1357      * <p>This attribute was an implementation detail of the CLDR Japanese tailoring.
   1358      * Since ICU 50, this attribute is not settable any more via API functions.
   1359      * Since CLDR 25/ICU 53, explicit quaternary relations are used
   1360      * to achieve the same Japanese sort order.
   1361      *
   1362      * @return false
   1363      * @see #setHiraganaQuaternaryDefault
   1364      * @see #setHiraganaQuaternary(boolean)
   1365      * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
   1366      */
   1367     @Deprecated
   1368     public boolean isHiraganaQuaternary() {
   1369         return false;
   1370     }
   1371 
   1372     /**
   1373      * {@icu} Gets the variable top value of a Collator.
   1374      *
   1375      * @return the variable top primary weight
   1376      * @see #getMaxVariable
   1377      * @stable ICU 2.6
   1378      */
   1379     @Override
   1380     public int getVariableTop() {
   1381         return (int)settings.readOnly().variableTop;
   1382     }
   1383 
   1384     /**
   1385      * Method to retrieve the numeric collation value. When numeric collation is turned on, this Collator generates a
   1386      * collation key for the numeric value of substrings of digits. This is a way to get '100' to sort AFTER '2'
   1387      *
   1388      * @see #setNumericCollation
   1389      * @see #setNumericCollationDefault
   1390      * @return true if numeric collation is turned on, false otherwise
   1391      * @stable ICU 2.8
   1392      */
   1393     public boolean getNumericCollation() {
   1394         return (settings.readOnly().options & CollationSettings.NUMERIC) != 0;
   1395     }
   1396 
   1397     /**
   1398      * Retrieves the reordering codes for this collator.
   1399      * These reordering codes are a combination of UScript codes and ReorderCodes.
   1400      * @return a copy of the reordering codes for this collator;
   1401      * if none are set then returns an empty array
   1402      * @see #setReorderCodes
   1403      * @see Collator#getEquivalentReorderCodes
   1404      * @stable ICU 4.8
   1405      */
   1406     @Override
   1407     public int[] getReorderCodes() {
   1408         return settings.readOnly().reorderCodes.clone();
   1409     }
   1410 
   1411     // public other methods -------------------------------------------------
   1412 
   1413     /**
   1414      * {@inheritDoc}
   1415      * @stable ICU 2.8
   1416      */
   1417     @Override
   1418     public boolean equals(Object obj) {
   1419         if (this == obj) {
   1420             return true;
   1421         }
   1422         if (!super.equals(obj)) {
   1423             return false;
   1424         }
   1425         RuleBasedCollator o = (RuleBasedCollator) obj;
   1426         if(!settings.readOnly().equals(o.settings.readOnly())) { return false; }
   1427         if(data == o.data) { return true; }
   1428         boolean thisIsRoot = data.base == null;
   1429         boolean otherIsRoot = o.data.base == null;
   1430         assert(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
   1431         if(thisIsRoot != otherIsRoot) { return false; }
   1432         if((thisIsRoot || tailoring.rules.length() != 0) &&
   1433                 (otherIsRoot || o.tailoring.rules.length() != 0)) {
   1434             // Shortcut: If both collators have valid rule strings, then compare those.
   1435             if(tailoring.rules.equals(o.tailoring.rules)) { return true; }
   1436         }
   1437         // Different rule strings can result in the same or equivalent tailoring.
   1438         // The rule strings are optional in ICU resource bundles, although included by default.
   1439         // cloneBinary() drops the rule string.
   1440         UnicodeSet thisTailored = getTailoredSet();
   1441         UnicodeSet otherTailored = o.getTailoredSet();
   1442         if(!thisTailored.equals(otherTailored)) { return false; }
   1443         // For completeness, we should compare all of the mappings;
   1444         // or we should create a list of strings, sort it with one collator,
   1445         // and check if both collators compare adjacent strings the same
   1446         // (order & strength, down to quaternary); or similar.
   1447         // Testing equality of collators seems unusual.
   1448         return true;
   1449     }
   1450 
   1451     /**
   1452      * Generates a unique hash code for this RuleBasedCollator.
   1453      *
   1454      * @return the unique hash code for this Collator
   1455      * @stable ICU 2.8
   1456      */
   1457     @Override
   1458     public int hashCode() {
   1459         int h = settings.readOnly().hashCode();
   1460         if(data.base == null) { return h; }  // root collator
   1461         // Do not rely on the rule string, see comments in operator==().
   1462         UnicodeSet set = getTailoredSet();
   1463         UnicodeSetIterator iter = new UnicodeSetIterator(set);
   1464         while(iter.next() && iter.codepoint != UnicodeSetIterator.IS_STRING) {
   1465             h ^= data.getCE32(iter.codepoint);
   1466         }
   1467         return h;
   1468     }
   1469 
   1470     /**
   1471      * Compares the source text String to the target text String according to the collation rules, strength and
   1472      * decomposition mode for this RuleBasedCollator. Returns an integer less than, equal to or greater than zero
   1473      * depending on whether the source String is less than, equal to or greater than the target String. See the Collator
   1474      * class description for an example of use. </p>
   1475      * <p>
   1476      * General recommendation: <br>
   1477      * If comparison are to be done to the same String multiple times, it would be more efficient to generate
   1478      * CollationKeys for the Strings and use CollationKey.compareTo(CollationKey) for the comparisons. If speed
   1479      * performance is critical and object instantiation is to be reduced, further optimization may be achieved by
   1480      * generating a simpler key of the form RawCollationKey and reusing this RawCollationKey object with the method
   1481      * RuleBasedCollator.getRawCollationKey. Internal byte representation can be directly accessed via RawCollationKey
   1482      * and stored for future use. Like CollationKey, RawCollationKey provides a method RawCollationKey.compareTo for key
   1483      * comparisons. If the each Strings are compared to only once, using the method RuleBasedCollator.compare(String,
   1484      * String) will have a better performance.
   1485      * </p>
   1486      *
   1487      * @param source
   1488      *            the source text String.
   1489      * @param target
   1490      *            the target text String.
   1491      * @return Returns an integer value. Value is less than zero if source is less than target, value is zero if source
   1492      *         and target are equal, value is greater than zero if source is greater than target.
   1493      * @see CollationKey
   1494      * @see #getCollationKey
   1495      * @stable ICU 2.8
   1496      */
   1497     @Override
   1498     public int compare(String source, String target) {
   1499         return doCompare(source, target);
   1500     }
   1501 
   1502     /**
   1503     * Abstract iterator for identical-level string comparisons.
   1504     * Returns FCD code points and handles temporary switching to NFD.
   1505     *
   1506     * <p>As with CollationIterator,
   1507     * Java NFDIterator instances are partially constructed and cached,
   1508     * and completed when reset for use.
   1509     * C++ NFDIterator instances are stack-allocated.
   1510     */
   1511     private static abstract class NFDIterator {
   1512         /**
   1513          * Partial constructor, must call reset().
   1514          */
   1515         NFDIterator() {}
   1516         final void reset() {
   1517             index = -1;
   1518         }
   1519 
   1520         /**
   1521          * Returns the next code point from the internal normalization buffer,
   1522          * or else the next text code point.
   1523          * Returns -1 at the end of the text.
   1524          */
   1525         final int nextCodePoint() {
   1526             if(index >= 0) {
   1527                 if(index == decomp.length()) {
   1528                     index = -1;
   1529                 } else {
   1530                     int c = Character.codePointAt(decomp, index);
   1531                     index += Character.charCount(c);
   1532                     return c;
   1533                 }
   1534             }
   1535             return nextRawCodePoint();
   1536         }
   1537         /**
   1538          * @param nfcImpl
   1539          * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
   1540          * @return the first code point in c's decomposition,
   1541          *         or c itself if it was decomposed already or if it does not decompose
   1542          */
   1543         final int nextDecomposedCodePoint(Normalizer2Impl nfcImpl, int c) {
   1544             if(index >= 0) { return c; }
   1545             decomp = nfcImpl.getDecomposition(c);
   1546             if(decomp == null) { return c; }
   1547             c = Character.codePointAt(decomp, 0);
   1548             index = Character.charCount(c);
   1549             return c;
   1550         }
   1551 
   1552         /**
   1553          * Returns the next text code point in FCD order.
   1554          * Returns -1 at the end of the text.
   1555          */
   1556         protected abstract int nextRawCodePoint();
   1557 
   1558         private String decomp;
   1559         private int index;
   1560     }
   1561 
   1562     private static class UTF16NFDIterator extends NFDIterator {
   1563         UTF16NFDIterator() {}
   1564         void setText(CharSequence seq, int start) {
   1565             reset();
   1566             s = seq;
   1567             pos = start;
   1568         }
   1569 
   1570         @Override
   1571         protected int nextRawCodePoint() {
   1572             if(pos == s.length()) { return Collation.SENTINEL_CP; }
   1573             int c = Character.codePointAt(s, pos);
   1574             pos += Character.charCount(c);
   1575             return c;
   1576         }
   1577 
   1578         protected CharSequence s;
   1579         protected int pos;
   1580     }
   1581 
   1582     private static final class FCDUTF16NFDIterator extends UTF16NFDIterator {
   1583         FCDUTF16NFDIterator() {}
   1584         void setText(Normalizer2Impl nfcImpl, CharSequence seq, int start) {
   1585             reset();
   1586             int spanLimit = nfcImpl.makeFCD(seq, start, seq.length(), null);
   1587             if(spanLimit == seq.length()) {
   1588                 s = seq;
   1589                 pos = start;
   1590             } else {
   1591                 if(str == null) {
   1592                     str = new StringBuilder();
   1593                 } else {
   1594                     str.setLength(0);
   1595                 }
   1596                 str.append(seq, start, spanLimit);
   1597                 ReorderingBuffer buffer = new ReorderingBuffer(nfcImpl, str, seq.length() - start);
   1598                 nfcImpl.makeFCD(seq, spanLimit, seq.length(), buffer);
   1599                 s = str;
   1600                 pos = 0;
   1601             }
   1602         }
   1603 
   1604         private StringBuilder str;
   1605     }
   1606 
   1607     private static final int compareNFDIter(Normalizer2Impl nfcImpl, NFDIterator left, NFDIterator right) {
   1608         for(;;) {
   1609             // Fetch the next FCD code point from each string.
   1610             int leftCp = left.nextCodePoint();
   1611             int rightCp = right.nextCodePoint();
   1612             if(leftCp == rightCp) {
   1613                 if(leftCp < 0) { break; }
   1614                 continue;
   1615             }
   1616             // If they are different, then decompose each and compare again.
   1617             if(leftCp < 0) {
   1618                 leftCp = -2;  // end of string
   1619             } else if(leftCp == 0xfffe) {
   1620                 leftCp = -1;  // U+FFFE: merge separator
   1621             } else {
   1622                 leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
   1623             }
   1624             if(rightCp < 0) {
   1625                 rightCp = -2;  // end of string
   1626             } else if(rightCp == 0xfffe) {
   1627                 rightCp = -1;  // U+FFFE: merge separator
   1628             } else {
   1629                 rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
   1630             }
   1631             if(leftCp < rightCp) { return Collation.LESS; }
   1632             if(leftCp > rightCp) { return Collation.GREATER; }
   1633         }
   1634         return Collation.EQUAL;
   1635     }
   1636 
   1637     /**
   1638      * Compares two CharSequences.
   1639      * @internal
   1640      * @deprecated This API is ICU internal only.
   1641      */
   1642     @Override
   1643     @Deprecated
   1644     protected int doCompare(CharSequence left, CharSequence right) {
   1645         if(left == right) {
   1646             return Collation.EQUAL;
   1647         }
   1648 
   1649         // Identical-prefix test.
   1650         int equalPrefixLength = 0;
   1651         for(;;) {
   1652             if(equalPrefixLength == left.length()) {
   1653                 if(equalPrefixLength == right.length()) { return Collation.EQUAL; }
   1654                 break;
   1655             } else if(equalPrefixLength == right.length() ||
   1656                       left.charAt(equalPrefixLength) != right.charAt(equalPrefixLength)) {
   1657                 break;
   1658             }
   1659             ++equalPrefixLength;
   1660         }
   1661 
   1662         CollationSettings roSettings = settings.readOnly();
   1663         boolean numeric = roSettings.isNumeric();
   1664         if(equalPrefixLength > 0) {
   1665             if((equalPrefixLength != left.length() &&
   1666                         data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) ||
   1667                     (equalPrefixLength != right.length() &&
   1668                         data.isUnsafeBackward(right.charAt(equalPrefixLength), numeric))) {
   1669                 // Identical prefix: Back up to the start of a contraction or reordering sequence.
   1670                 while(--equalPrefixLength > 0 &&
   1671                         data.isUnsafeBackward(left.charAt(equalPrefixLength), numeric)) {}
   1672             }
   1673             // Notes:
   1674             // - A longer string can compare equal to a prefix of it if only ignorables follow.
   1675             // - With a backward level, a longer string can compare less-than a prefix of it.
   1676 
   1677             // Pass the actual start of each string into the CollationIterators,
   1678             // plus the equalPrefixLength position,
   1679             // so that prefix matches back into the equal prefix work.
   1680         }
   1681 
   1682         int result;
   1683         int fastLatinOptions = roSettings.fastLatinOptions;
   1684         if(fastLatinOptions >= 0 &&
   1685                 (equalPrefixLength == left.length() ||
   1686                     left.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX) &&
   1687                 (equalPrefixLength == right.length() ||
   1688                     right.charAt(equalPrefixLength) <= CollationFastLatin.LATIN_MAX)) {
   1689             result = CollationFastLatin.compareUTF16(data.fastLatinTable,
   1690                                                       roSettings.fastLatinPrimaries,
   1691                                                       fastLatinOptions,
   1692                                                       left, right, equalPrefixLength);
   1693         } else {
   1694             result = CollationFastLatin.BAIL_OUT_RESULT;
   1695         }
   1696 
   1697         if(result == CollationFastLatin.BAIL_OUT_RESULT) {
   1698             CollationBuffer buffer = null;
   1699             try {
   1700                 buffer = getCollationBuffer();
   1701                 if(roSettings.dontCheckFCD()) {
   1702                     buffer.leftUTF16CollIter.setText(numeric, left, equalPrefixLength);
   1703                     buffer.rightUTF16CollIter.setText(numeric, right, equalPrefixLength);
   1704                     result = CollationCompare.compareUpToQuaternary(
   1705                             buffer.leftUTF16CollIter, buffer.rightUTF16CollIter, roSettings);
   1706                 } else {
   1707                     buffer.leftFCDUTF16Iter.setText(numeric, left, equalPrefixLength);
   1708                     buffer.rightFCDUTF16Iter.setText(numeric, right, equalPrefixLength);
   1709                     result = CollationCompare.compareUpToQuaternary(
   1710                             buffer.leftFCDUTF16Iter, buffer.rightFCDUTF16Iter, roSettings);
   1711                 }
   1712             } finally {
   1713                 releaseCollationBuffer(buffer);
   1714             }
   1715         }
   1716         if(result != Collation.EQUAL || roSettings.getStrength() < Collator.IDENTICAL) {
   1717             return result;
   1718         }
   1719 
   1720         CollationBuffer buffer = null;
   1721         try {
   1722             buffer = getCollationBuffer();
   1723             // Compare identical level.
   1724             Normalizer2Impl nfcImpl = data.nfcImpl;
   1725             if(roSettings.dontCheckFCD()) {
   1726                 buffer.leftUTF16NFDIter.setText(left, equalPrefixLength);
   1727                 buffer.rightUTF16NFDIter.setText(right, equalPrefixLength);
   1728                 return compareNFDIter(nfcImpl, buffer.leftUTF16NFDIter, buffer.rightUTF16NFDIter);
   1729             } else {
   1730                 buffer.leftFCDUTF16NFDIter.setText(nfcImpl, left, equalPrefixLength);
   1731                 buffer.rightFCDUTF16NFDIter.setText(nfcImpl, right, equalPrefixLength);
   1732                 return compareNFDIter(nfcImpl, buffer.leftFCDUTF16NFDIter, buffer.rightFCDUTF16NFDIter);
   1733             }
   1734         } finally {
   1735             releaseCollationBuffer(buffer);
   1736         }
   1737     }
   1738 
   1739     // package private constructors ------------------------------------------
   1740 
   1741     RuleBasedCollator(CollationTailoring t, ULocale vl) {
   1742         data = t.data;
   1743         settings = t.settings.clone();
   1744         tailoring = t;
   1745         validLocale = vl;
   1746         actualLocaleIsSameAsValid = false;
   1747     }
   1748 
   1749     private void adoptTailoring(CollationTailoring t) {
   1750         assert(settings == null && data == null && tailoring == null);
   1751         data = t.data;
   1752         settings = t.settings.clone();
   1753         tailoring = t;
   1754         validLocale = t.actualLocale;
   1755         actualLocaleIsSameAsValid = false;
   1756     }
   1757 
   1758     // package private methods -----------------------------------------------
   1759 
   1760     /**
   1761      * Tests whether a character is "unsafe" for use as a collation starting point.
   1762      *
   1763      * @param c code point or code unit
   1764      * @return true if c is unsafe
   1765      * @see CollationElementIterator#setOffset(int)
   1766      */
   1767     final boolean isUnsafe(int c) {
   1768         return data.isUnsafeBackward(c, settings.readOnly().isNumeric());
   1769     }
   1770 
   1771     /**
   1772      * Frozen state of the collator.
   1773      */
   1774     private Lock frozenLock;
   1775 
   1776     private static final class CollationBuffer {
   1777         private CollationBuffer(CollationData data) {
   1778             leftUTF16CollIter = new UTF16CollationIterator(data);
   1779             rightUTF16CollIter = new UTF16CollationIterator(data);
   1780             leftFCDUTF16Iter = new FCDUTF16CollationIterator(data);
   1781             rightFCDUTF16Iter = new FCDUTF16CollationIterator(data);
   1782             leftUTF16NFDIter = new UTF16NFDIterator();
   1783             rightUTF16NFDIter = new UTF16NFDIterator();
   1784             leftFCDUTF16NFDIter = new FCDUTF16NFDIterator();
   1785             rightFCDUTF16NFDIter = new FCDUTF16NFDIterator();
   1786         }
   1787 
   1788         UTF16CollationIterator leftUTF16CollIter;
   1789         UTF16CollationIterator rightUTF16CollIter;
   1790         FCDUTF16CollationIterator leftFCDUTF16Iter;
   1791         FCDUTF16CollationIterator rightFCDUTF16Iter;
   1792 
   1793         UTF16NFDIterator leftUTF16NFDIter;
   1794         UTF16NFDIterator rightUTF16NFDIter;
   1795         FCDUTF16NFDIterator leftFCDUTF16NFDIter;
   1796         FCDUTF16NFDIterator rightFCDUTF16NFDIter;
   1797 
   1798         RawCollationKey rawCollationKey;
   1799     }
   1800 
   1801     /**
   1802      * Get the version of this collator object.
   1803      *
   1804      * @return the version object associated with this collator
   1805      * @stable ICU 2.8
   1806      */
   1807     @Override
   1808     public VersionInfo getVersion() {
   1809         int version = tailoring.version;
   1810         int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor();
   1811         return VersionInfo.getInstance(
   1812                 (version >>> 24) + (rtVersion << 4) + (rtVersion >> 4),
   1813                 ((version >> 16) & 0xff), ((version >> 8) & 0xff), (version & 0xff));
   1814     }
   1815 
   1816     /**
   1817      * Get the UCA version of this collator object.
   1818      *
   1819      * @return the version object associated with this collator
   1820      * @stable ICU 2.8
   1821      */
   1822     @Override
   1823     public VersionInfo getUCAVersion() {
   1824         VersionInfo v = getVersion();
   1825         // Note: This is tied to how the current implementation encodes the UCA version
   1826         // in the overall getVersion().
   1827         // Alternatively, we could load the root collator and get at lower-level data from there.
   1828         // Either way, it will reflect the input collator's UCA version only
   1829         // if it is a known implementation.
   1830         // (C++ comment) It would be cleaner to make this a virtual Collator method.
   1831         // (In Java, it is virtual.)
   1832         return VersionInfo.getInstance(v.getMinor() >> 3, v.getMinor() & 7, v.getMilli() >> 6, 0);
   1833     }
   1834 
   1835     private CollationBuffer collationBuffer;
   1836 
   1837     private final CollationBuffer getCollationBuffer() {
   1838         if (isFrozen()) {
   1839             frozenLock.lock();
   1840         } else if (collationBuffer == null) {
   1841             collationBuffer = new CollationBuffer(data);
   1842         }
   1843         return collationBuffer;
   1844     }
   1845 
   1846     private final void releaseCollationBuffer(CollationBuffer buffer) {
   1847         if (isFrozen()) {
   1848             frozenLock.unlock();
   1849         }
   1850     }
   1851 
   1852     /**
   1853      * {@inheritDoc}
   1854      * @draft ICU 53 (retain)
   1855      * @provisional This API might change or be removed in a future release.
   1856      */
   1857     @Override
   1858     public ULocale getLocale(ULocale.Type type) {
   1859         if (type == ULocale.ACTUAL_LOCALE) {
   1860             return actualLocaleIsSameAsValid ? validLocale : tailoring.actualLocale;
   1861         } else if(type == ULocale.VALID_LOCALE) {
   1862             return validLocale;
   1863         } else {
   1864             throw new IllegalArgumentException("unknown ULocale.Type " + type);
   1865         }
   1866     }
   1867 
   1868     /**
   1869      * {@inheritDoc}
   1870      */
   1871     @Override
   1872     void setLocale(ULocale valid, ULocale actual) {
   1873         // This method is called
   1874         // by other protected functions that checks and makes sure that
   1875         // valid and actual are not null before passing
   1876         assert (valid == null) == (actual == null);
   1877         // Another check we could do is that the actual locale is at
   1878         // the same level or less specific than the valid locale.
   1879         // TODO: Starting with Java 7, use Objects.equals(a, b).
   1880         if(Utility.objectEquals(actual, tailoring.actualLocale)) {
   1881             actualLocaleIsSameAsValid = false;
   1882         } else {
   1883             assert(Utility.objectEquals(actual, valid));
   1884             actualLocaleIsSameAsValid = true;
   1885         }
   1886         // Do not modify tailoring.actualLocale:
   1887         // We cannot be sure that that would be thread-safe.
   1888         validLocale = valid;
   1889     }
   1890 
   1891     CollationData data;
   1892     SharedObject.Reference<CollationSettings> settings;  // reference-counted
   1893     CollationTailoring tailoring;  // C++: reference-counted
   1894     private ULocale validLocale;
   1895     // Note: No need in Java to track which attributes have been set explicitly.
   1896     // int or EnumSet  explicitlySetAttributes;
   1897 
   1898     private boolean actualLocaleIsSameAsValid;
   1899 }
   1900