Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 2000-2016, International Business Machines Corporation and
      7  * others. All Rights Reserved.
      8  *******************************************************************************
      9  */
     10 package android.icu.text;
     11 import java.nio.CharBuffer;
     12 import java.text.CharacterIterator;
     13 
     14 import android.icu.impl.Norm2AllModes;
     15 import android.icu.impl.Normalizer2Impl;
     16 import android.icu.impl.UCaseProps;
     17 import android.icu.lang.UCharacter;
     18 import android.icu.util.ICUCloneNotSupportedException;
     19 
     20 /**
     21  * Old Unicode normalization API.
     22  *
     23  * <p>This API has been replaced by the {@link Normalizer2} class and is only available
     24  * for backward compatibility. This class simply delegates to the Normalizer2 class.
     25  * There are two exceptions: The new API does not provide a replacement for
     26  * <code>QuickCheckResult</code> and <code>compare()</code>.
     27  *
     28  * <p><code>normalize</code> transforms Unicode text into an equivalent composed or
     29  * decomposed form, allowing for easier sorting and searching of text.
     30  * <code>normalize</code> supports the standard normalization forms described in
     31  * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
     32  * Unicode Standard Annex #15 &mdash; Unicode Normalization Forms</a>.
     33  *
     34  * <p>Characters with accents or other adornments can be encoded in
     35  * several different ways in Unicode.  For example, take the character A-acute.
     36  * In Unicode, this can be encoded as a single character (the
     37  * "composed" form):
     38  *
     39  * <pre>
     40  *      00C1    LATIN CAPITAL LETTER A WITH ACUTE
     41  * </pre>
     42  *
     43  * or as two separate characters (the "decomposed" form):
     44  *
     45  * <pre>
     46  *      0041    LATIN CAPITAL LETTER A
     47  *      0301    COMBINING ACUTE ACCENT
     48  * </pre>
     49  *
     50  * <p>To a user of your program, however, both of these sequences should be
     51  * treated as the same "user-level" character "A with acute accent".  When you
     52  * are searching or comparing text, you must ensure that these two sequences are
     53  * treated equivalently.  In addition, you must handle characters with more than
     54  * one accent.  Sometimes the order of a character's combining accents is
     55  * significant, while in other cases accent sequences in different orders are
     56  * really equivalent.
     57  *
     58  * <p>Similarly, the string "ffi" can be encoded as three separate letters:
     59  *
     60  * <pre>
     61  *      0066    LATIN SMALL LETTER F
     62  *      0066    LATIN SMALL LETTER F
     63  *      0069    LATIN SMALL LETTER I
     64  * </pre>
     65  *
     66  * or as the single character
     67  *
     68  * <pre>
     69  *      FB03    LATIN SMALL LIGATURE FFI
     70  * </pre>
     71  *
     72  * <p>The ffi ligature is not a distinct semantic character, and strictly speaking
     73  * it shouldn't be in Unicode at all, but it was included for compatibility
     74  * with existing character sets that already provided it.  The Unicode standard
     75  * identifies such characters by giving them "compatibility" decompositions
     76  * into the corresponding semantic characters.  When sorting and searching, you
     77  * will often want to use these mappings.
     78  *
     79  * <p><code>normalize</code> helps solve these problems by transforming text into
     80  * the canonical composed and decomposed forms as shown in the first example
     81  * above. In addition, you can have it perform compatibility decompositions so
     82  * that you can treat compatibility characters the same as their equivalents.
     83  * Finally, <code>normalize</code> rearranges accents into the proper canonical
     84  * order, so that you do not have to worry about accent rearrangement on your
     85  * own.
     86  *
     87  * <p>Form FCD, "Fast C or D", is also designed for collation.
     88  * It allows to work on strings that are not necessarily normalized
     89  * with an algorithm (like in collation) that works under "canonical closure",
     90  * i.e., it treats precomposed characters and their decomposed equivalents the
     91  * same.
     92  *
     93  * <p>It is not a normalization form because it does not provide for uniqueness of
     94  * representation. Multiple strings may be canonically equivalent (their NFDs
     95  * are identical) and may all conform to FCD without being identical themselves.
     96  *
     97  * <p>The form is defined such that the "raw decomposition", the recursive
     98  * canonical decomposition of each character, results in a string that is
     99  * canonically ordered. This means that precomposed characters are allowed for
    100  * as long as their decompositions do not need canonical reordering.
    101  *
    102  * <p>Its advantage for a process like collation is that all NFD and most NFC texts
    103  * - and many unnormalized texts - already conform to FCD and do not need to be
    104  * normalized (NFD) for such a process. The FCD quick check will return YES for
    105  * most strings in practice.
    106  *
    107  * <p>normalize(FCD) may be implemented with NFD.
    108  *
    109  * <p>For more details on FCD see Unicode Technical Note #5 (Canonical Equivalence in Applications):
    110  * http://www.unicode.org/notes/tn5/#FCD
    111  *
    112  * <p>ICU collation performs either NFD or FCD normalization automatically if
    113  * normalization is turned on for the collator object. Beyond collation and
    114  * string search, normalized strings may be useful for string equivalence
    115  * comparisons, transliteration/transcription, unique representations, etc.
    116  *
    117  * <p>The W3C generally recommends to exchange texts in NFC.
    118  * Note also that most legacy character encodings use only precomposed forms and
    119  * often do not encode any combining marks by themselves. For conversion to such
    120  * character encodings the Unicode text needs to be normalized to NFC.
    121  * For more usage examples, see the Unicode Standard Annex.
    122  *
    123  * <p>Note: The Normalizer class also provides API for iterative normalization.
    124  * While the setIndex() and getIndex() refer to indices in the
    125  * underlying Unicode input text, the next() and previous() methods
    126  * iterate through characters in the normalized output.
    127  * This means that there is not necessarily a one-to-one correspondence
    128  * between characters returned by next() and previous() and the indices
    129  * passed to and returned from setIndex() and getIndex().
    130  * It is for this reason that Normalizer does not implement the CharacterIterator interface.
    131  */
    132 public final class Normalizer implements Cloneable {
    133     // The input text and our position in it
    134     private UCharacterIterator  text;
    135     private Normalizer2         norm2;
    136     private Mode                mode;
    137     private int                 options;
    138 
    139     // The normalization buffer is the result of normalization
    140     // of the source in [currentIndex..nextIndex[ .
    141     private int                 currentIndex;
    142     private int                 nextIndex;
    143 
    144     // A buffer for holding intermediate results
    145     private StringBuilder       buffer;
    146     private int                 bufferPos;
    147 
    148     // Helper classes to defer loading of normalization data.
    149     private static final class ModeImpl {
    150         private ModeImpl(Normalizer2 n2) {
    151             normalizer2 = n2;
    152         }
    153         private final Normalizer2 normalizer2;
    154     }
    155     private static final class NFDModeImpl {
    156         private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFDInstance());
    157     }
    158     private static final class NFKDModeImpl {
    159         private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFKDInstance());
    160     }
    161     private static final class NFCModeImpl {
    162         private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFCInstance());
    163     }
    164     private static final class NFKCModeImpl {
    165         private static final ModeImpl INSTANCE = new ModeImpl(Normalizer2.getNFKCInstance());
    166     }
    167     private static final class FCDModeImpl {
    168         private static final ModeImpl INSTANCE = new ModeImpl(Norm2AllModes.getFCDNormalizer2());
    169     }
    170 
    171     private static final class Unicode32 {
    172         private static final UnicodeSet INSTANCE = new UnicodeSet("[:age=3.2:]").freeze();
    173     }
    174     private static final class NFD32ModeImpl {
    175         private static final ModeImpl INSTANCE =
    176             new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFDInstance(),
    177                                                  Unicode32.INSTANCE));
    178     }
    179     private static final class NFKD32ModeImpl {
    180         private static final ModeImpl INSTANCE =
    181             new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFKDInstance(),
    182                                                  Unicode32.INSTANCE));
    183     }
    184     private static final class NFC32ModeImpl {
    185         private static final ModeImpl INSTANCE =
    186             new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFCInstance(),
    187                                                  Unicode32.INSTANCE));
    188     }
    189     private static final class NFKC32ModeImpl {
    190         private static final ModeImpl INSTANCE =
    191             new ModeImpl(new FilteredNormalizer2(Normalizer2.getNFKCInstance(),
    192                                                  Unicode32.INSTANCE));
    193     }
    194     private static final class FCD32ModeImpl {
    195         private static final ModeImpl INSTANCE =
    196             new ModeImpl(new FilteredNormalizer2(Norm2AllModes.getFCDNormalizer2(),
    197                                                  Unicode32.INSTANCE));
    198     }
    199 
    200     /**
    201      * Options bit set value to select Unicode 3.2 normalization
    202      * (except NormalizationCorrections).
    203      * At most one Unicode version can be selected at a time.
    204      *
    205      * @deprecated ICU 56 Use {@link FilteredNormalizer2} instead.
    206      * @hide original deprecated declaration
    207      */
    208     @Deprecated
    209     public static final int UNICODE_3_2=0x20;
    210 
    211     /**
    212      * Constant indicating that the end of the iteration has been reached.
    213      * This is guaranteed to have the same value as {@link UCharacterIterator#DONE}.
    214      *
    215      * @deprecated ICU 56
    216      * @hide original deprecated declaration
    217      */
    218     @Deprecated
    219     public static final int DONE = UCharacterIterator.DONE;
    220 
    221     /**
    222      * Constants for normalization modes.
    223      * <p>
    224      * The Mode class is not intended for public subclassing.
    225      * Only the Mode constants provided by the Normalizer class should be used,
    226      * and any fields or methods should not be called or overridden by users.
    227      *
    228      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    229      * @hide original deprecated declaration
    230      */
    231     @Deprecated
    232     public static abstract class Mode {
    233         /**
    234          * Sole constructor
    235          * @deprecated This API is ICU internal only.
    236          * @hide original deprecated declaration
    237          * @hide draft / provisional / internal are hidden on Android
    238          */
    239         @Deprecated
    240         protected Mode() {
    241         }
    242 
    243         /**
    244          * @deprecated This API is ICU internal only.
    245          * @hide original deprecated declaration
    246          * @hide draft / provisional / internal are hidden on Android
    247          */
    248         @Deprecated
    249         protected abstract Normalizer2 getNormalizer2(int options);
    250     }
    251 
    252     private static final class NONEMode extends Mode {
    253         @Override
    254         protected Normalizer2 getNormalizer2(int options) { return Norm2AllModes.NOOP_NORMALIZER2; }
    255     }
    256     private static final class NFDMode extends Mode {
    257         @Override
    258         protected Normalizer2 getNormalizer2(int options) {
    259             return (options&UNICODE_3_2) != 0 ?
    260                     NFD32ModeImpl.INSTANCE.normalizer2 : NFDModeImpl.INSTANCE.normalizer2;
    261         }
    262     }
    263     private static final class NFKDMode extends Mode {
    264         @Override
    265         protected Normalizer2 getNormalizer2(int options) {
    266             return (options&UNICODE_3_2) != 0 ?
    267                     NFKD32ModeImpl.INSTANCE.normalizer2 : NFKDModeImpl.INSTANCE.normalizer2;
    268         }
    269     }
    270     private static final class NFCMode extends Mode {
    271         @Override
    272         protected Normalizer2 getNormalizer2(int options) {
    273             return (options&UNICODE_3_2) != 0 ?
    274                     NFC32ModeImpl.INSTANCE.normalizer2 : NFCModeImpl.INSTANCE.normalizer2;
    275         }
    276     }
    277     private static final class NFKCMode extends Mode {
    278         @Override
    279         protected Normalizer2 getNormalizer2(int options) {
    280             return (options&UNICODE_3_2) != 0 ?
    281                     NFKC32ModeImpl.INSTANCE.normalizer2 : NFKCModeImpl.INSTANCE.normalizer2;
    282         }
    283     }
    284     private static final class FCDMode extends Mode {
    285         @Override
    286         protected Normalizer2 getNormalizer2(int options) {
    287             return (options&UNICODE_3_2) != 0 ?
    288                     FCD32ModeImpl.INSTANCE.normalizer2 : FCDModeImpl.INSTANCE.normalizer2;
    289         }
    290     }
    291 
    292     /**
    293      * No decomposition/composition.
    294      *
    295      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    296      * @hide original deprecated declaration
    297      */
    298     @Deprecated
    299     public static final Mode NONE = new NONEMode();
    300 
    301     /**
    302      * Canonical decomposition.
    303      *
    304      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    305      * @hide original deprecated declaration
    306      */
    307     @Deprecated
    308     public static final Mode NFD = new NFDMode();
    309 
    310     /**
    311      * Compatibility decomposition.
    312      *
    313      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    314      * @hide original deprecated declaration
    315      */
    316     @Deprecated
    317     public static final Mode NFKD = new NFKDMode();
    318 
    319     /**
    320      * Canonical decomposition followed by canonical composition.
    321      *
    322      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    323      * @hide original deprecated declaration
    324      */
    325     @Deprecated
    326     public static final Mode NFC = new NFCMode();
    327 
    328     /**
    329      * Default normalization.
    330      *
    331      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    332      * @hide original deprecated declaration
    333      */
    334     @Deprecated
    335     public static final Mode DEFAULT = NFC;
    336 
    337     /**
    338      * Compatibility decomposition followed by canonical composition.
    339      *
    340      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    341      * @hide original deprecated declaration
    342      */
    343     @Deprecated
    344     public static final Mode NFKC =new NFKCMode();
    345 
    346     /**
    347      * "Fast C or D" form.
    348      *
    349      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    350      * @hide original deprecated declaration
    351      */
    352     @Deprecated
    353     public static final Mode FCD = new FCDMode();
    354 
    355     /**
    356      * Null operation for use with the {@link android.icu.text.Normalizer constructors}
    357      * and the static {@link #normalize normalize} method.  This value tells
    358      * the <tt>Normalizer</tt> to do nothing but return unprocessed characters
    359      * from the underlying String or CharacterIterator.  If you have code which
    360      * requires raw text at some times and normalized text at others, you can
    361      * use <tt>NO_OP</tt> for the cases where you want raw text, rather
    362      * than having a separate code path that bypasses <tt>Normalizer</tt>
    363      * altogether.
    364      * <p>
    365      * @see #setMode
    366      * @deprecated ICU 2.8. Use Nomalizer.NONE
    367      * @see #NONE
    368      * @hide original deprecated declaration
    369      */
    370     @Deprecated
    371     public static final Mode NO_OP = NONE;
    372 
    373     /**
    374      * Canonical decomposition followed by canonical composition.  Used with the
    375      * {@link android.icu.text.Normalizer constructors} and the static
    376      * {@link #normalize normalize} method to determine the operation to be
    377      * performed.
    378      * <p>
    379      * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
    380      * off, this operation produces output that is in
    381      * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
    382      * Form</a>
    383      * <b>C</b>.
    384      * <p>
    385      * @see #setMode
    386      * @deprecated ICU 2.8. Use Normalier.NFC
    387      * @see #NFC
    388      * @hide original deprecated declaration
    389      */
    390     @Deprecated
    391     public static final Mode COMPOSE = NFC;
    392 
    393     /**
    394      * Compatibility decomposition followed by canonical composition.
    395      * Used with the {@link android.icu.text.Normalizer constructors} and the static
    396      * {@link #normalize normalize} method to determine the operation to be
    397      * performed.
    398      * <p>
    399      * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
    400      * off, this operation produces output that is in
    401      * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
    402      * Form</a>
    403      * <b>KC</b>.
    404      * <p>
    405      * @see #setMode
    406      * @deprecated ICU 2.8. Use Normalizer.NFKC
    407      * @see #NFKC
    408      * @hide original deprecated declaration
    409      */
    410     @Deprecated
    411     public static final Mode COMPOSE_COMPAT = NFKC;
    412 
    413     /**
    414      * Canonical decomposition.  This value is passed to the
    415      * {@link android.icu.text.Normalizer constructors} and the static
    416      * {@link #normalize normalize}
    417      * method to determine the operation to be performed.
    418      * <p>
    419      * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
    420      * off, this operation produces output that is in
    421      * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
    422      * Form</a>
    423      * <b>D</b>.
    424      * <p>
    425      * @see #setMode
    426      * @deprecated ICU 2.8. Use Normalizer.NFD
    427      * @see #NFD
    428      * @hide original deprecated declaration
    429      */
    430     @Deprecated
    431     public static final Mode DECOMP = NFD;
    432 
    433     /**
    434      * Compatibility decomposition.  This value is passed to the
    435      * {@link android.icu.text.Normalizer constructors} and the static
    436      * {@link #normalize normalize}
    437      * method to determine the operation to be performed.
    438      * <p>
    439      * If all optional features (<i>e.g.</i> {@link #IGNORE_HANGUL}) are turned
    440      * off, this operation produces output that is in
    441      * <a href=http://www.unicode.org/unicode/reports/tr15/>Unicode Canonical
    442      * Form</a>
    443      * <b>KD</b>.
    444      * <p>
    445      * @see #setMode
    446      * @deprecated ICU 2.8. Use Normalizer.NFKD
    447      * @see #NFKD
    448      * @hide original deprecated declaration
    449      */
    450     @Deprecated
    451     public static final Mode DECOMP_COMPAT = NFKD;
    452 
    453     /**
    454      * Option to disable Hangul/Jamo composition and decomposition.
    455      * This option applies to Korean text,
    456      * which can be represented either in the Jamo alphabet or in Hangul
    457      * characters, which are really just two or three Jamo combined
    458      * into one visual glyph.  Since Jamo takes up more storage space than
    459      * Hangul, applications that process only Hangul text may wish to turn
    460      * this option on when decomposing text.
    461      * <p>
    462      * The Unicode standard treates Hangul to Jamo conversion as a
    463      * canonical decomposition, so this option must be turned <b>off</b> if you
    464      * wish to transform strings into one of the standard
    465      * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
    466      * Unicode Normalization Forms</a>.
    467      * <p>
    468      * @see #setOption
    469      * @deprecated ICU 2.8. This option is no longer supported.
    470      * @hide original deprecated declaration
    471      */
    472     @Deprecated
    473     public static final int IGNORE_HANGUL = 0x0001;
    474 
    475     /**
    476      * Result values for quickCheck().
    477      * For details see Unicode Technical Report 15.
    478      */
    479     public static final class QuickCheckResult{
    480         //private int resultValue;
    481         private QuickCheckResult(int value) {
    482             //resultValue=value;
    483         }
    484     }
    485     /**
    486      * Indicates that string is not in the normalized format
    487      */
    488     public static final QuickCheckResult NO = new QuickCheckResult(0);
    489 
    490     /**
    491      * Indicates that string is in the normalized format
    492      */
    493     public static final QuickCheckResult YES = new QuickCheckResult(1);
    494 
    495     /**
    496      * Indicates it cannot be determined if string is in the normalized
    497      * format without further thorough checks.
    498      */
    499     public static final QuickCheckResult MAYBE = new QuickCheckResult(2);
    500 
    501     /**
    502      * Option bit for compare:
    503      * Case sensitively compare the strings
    504      */
    505     public static final int FOLD_CASE_DEFAULT =  UCharacter.FOLD_CASE_DEFAULT;
    506 
    507     /**
    508      * Option bit for compare:
    509      * Both input strings are assumed to fulfill FCD conditions.
    510      */
    511     public static final int INPUT_IS_FCD    =      0x20000;
    512 
    513     /**
    514      * Option bit for compare:
    515      * Perform case-insensitive comparison.
    516      */
    517     public static final int COMPARE_IGNORE_CASE  =     0x10000;
    518 
    519     /**
    520      * Option bit for compare:
    521      * Compare strings in code point order instead of code unit order.
    522      */
    523     public static final int COMPARE_CODE_POINT_ORDER = 0x8000;
    524 
    525     /**
    526      * Option value for case folding:
    527      * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
    528      * and dotless i appropriately for Turkic languages (tr, az).
    529      * @see UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I
    530      */
    531     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = UCharacter.FOLD_CASE_EXCLUDE_SPECIAL_I;
    532 
    533     /**
    534      * Lowest-order bit number of compare() options bits corresponding to
    535      * normalization options bits.
    536      *
    537      * The options parameter for compare() uses most bits for
    538      * itself and for various comparison and folding flags.
    539      * The most significant bits, however, are shifted down and passed on
    540      * to the normalization implementation.
    541      * (That is, from compare(..., options, ...),
    542      * options&gt;&gt;COMPARE_NORM_OPTIONS_SHIFT will be passed on to the
    543      * internal normalization functions.)
    544      *
    545      * @see #compare
    546      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    547      * @hide original deprecated declaration
    548      */
    549     @Deprecated
    550     public static final int COMPARE_NORM_OPTIONS_SHIFT  = 20;
    551 
    552     //-------------------------------------------------------------------------
    553     // Iterator constructors
    554     //-------------------------------------------------------------------------
    555 
    556     /**
    557      * Creates a new <tt>Normalizer</tt> object for iterating over the
    558      * normalized form of a given string.
    559      * <p>
    560      * The <tt>options</tt> parameter specifies which optional
    561      * <tt>Normalizer</tt> features are to be enabled for this object.
    562      * <p>
    563      * @param str  The string to be normalized.  The normalization
    564      *              will start at the beginning of the string.
    565      *
    566      * @param mode The normalization mode.
    567      *
    568      * @param opt Any optional features to be enabled.
    569      *            Currently the only available option is {@link #UNICODE_3_2}.
    570      *            If you want the default behavior corresponding to one of the
    571      *            standard Unicode Normalization Forms, use 0 for this argument.
    572      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    573      * @hide original deprecated declaration
    574      */
    575     @Deprecated
    576     public Normalizer(String str, Mode mode, int opt) {
    577         this.text = UCharacterIterator.getInstance(str);
    578         this.mode = mode;
    579         this.options=opt;
    580         norm2 = mode.getNormalizer2(opt);
    581         buffer = new StringBuilder();
    582     }
    583 
    584     /**
    585      * Creates a new <tt>Normalizer</tt> object for iterating over the
    586      * normalized form of the given text.
    587      * <p>
    588      * @param iter  The input text to be normalized.  The normalization
    589      *              will start at the beginning of the string.
    590      *
    591      * @param mode  The normalization mode.
    592      *
    593      * @param opt Any optional features to be enabled.
    594      *            Currently the only available option is {@link #UNICODE_3_2}.
    595      *            If you want the default behavior corresponding to one of the
    596      *            standard Unicode Normalization Forms, use 0 for this argument.
    597      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    598      * @hide original deprecated declaration
    599      */
    600     @Deprecated
    601     public Normalizer(CharacterIterator iter, Mode mode, int opt) {
    602         this.text = UCharacterIterator.getInstance((CharacterIterator)iter.clone());
    603         this.mode = mode;
    604         this.options = opt;
    605         norm2 = mode.getNormalizer2(opt);
    606         buffer = new StringBuilder();
    607     }
    608 
    609     /**
    610      * Creates a new <tt>Normalizer</tt> object for iterating over the
    611      * normalized form of the given text.
    612      * <p>
    613      * @param iter  The input text to be normalized.  The normalization
    614      *              will start at the beginning of the string.
    615      *
    616      * @param mode  The normalization mode.
    617      * @param options The normalization options, ORed together (0 for no options).
    618      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    619      * @hide original deprecated declaration
    620      */
    621     @Deprecated
    622     public Normalizer(UCharacterIterator iter, Mode mode, int options) {
    623         try {
    624             this.text     = (UCharacterIterator)iter.clone();
    625             this.mode     = mode;
    626             this.options  = options;
    627             norm2 = mode.getNormalizer2(options);
    628             buffer = new StringBuilder();
    629         } catch (CloneNotSupportedException e) {
    630             throw new ICUCloneNotSupportedException(e);
    631         }
    632     }
    633 
    634     /**
    635      * Clones this <tt>Normalizer</tt> object.  All properties of this
    636      * object are duplicated in the new object, including the cloning of any
    637      * {@link CharacterIterator} that was passed in to the constructor
    638      * or to {@link #setText(CharacterIterator) setText}.
    639      * However, the text storage underlying
    640      * the <tt>CharacterIterator</tt> is not duplicated unless the
    641      * iterator's <tt>clone</tt> method does so.
    642      *
    643      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    644      * @hide original deprecated declaration
    645      */
    646     @Deprecated
    647     @Override
    648     public Object clone() {
    649         try {
    650             Normalizer copy = (Normalizer) super.clone();
    651             copy.text = (UCharacterIterator) text.clone();
    652             copy.mode = mode;
    653             copy.options = options;
    654             copy.norm2 = norm2;
    655             copy.buffer = new StringBuilder(buffer);
    656             copy.bufferPos = bufferPos;
    657             copy.currentIndex = currentIndex;
    658             copy.nextIndex = nextIndex;
    659             return copy;
    660         }
    661         catch (CloneNotSupportedException e) {
    662             throw new ICUCloneNotSupportedException(e);
    663         }
    664     }
    665 
    666     //--------------------------------------------------------------------------
    667     // Static Utility methods
    668     //--------------------------------------------------------------------------
    669 
    670     private static final Normalizer2 getComposeNormalizer2(boolean compat, int options) {
    671         return (compat ? NFKC : NFC).getNormalizer2(options);
    672     }
    673     private static final Normalizer2 getDecomposeNormalizer2(boolean compat, int options) {
    674         return (compat ? NFKD : NFD).getNormalizer2(options);
    675     }
    676 
    677     /**
    678      * Compose a string.
    679      * The string will be composed to according to the specified mode.
    680      * @param str        The string to compose.
    681      * @param compat     If true the string will be composed according to
    682      *                    NFKC rules and if false will be composed according to
    683      *                    NFC rules.
    684      * @return String    The composed string
    685      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    686      * @hide original deprecated declaration
    687      */
    688     @Deprecated
    689     public static String compose(String str, boolean compat) {
    690         return compose(str,compat,0);
    691     }
    692 
    693     /**
    694      * Compose a string.
    695      * The string will be composed to according to the specified mode.
    696      * @param str        The string to compose.
    697      * @param compat     If true the string will be composed according to
    698      *                    NFKC rules and if false will be composed according to
    699      *                    NFC rules.
    700      * @param options    The only recognized option is UNICODE_3_2
    701      * @return String    The composed string
    702      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    703      * @hide original deprecated declaration
    704      */
    705     @Deprecated
    706     public static String compose(String str, boolean compat, int options) {
    707         return getComposeNormalizer2(compat, options).normalize(str);
    708     }
    709 
    710     /**
    711      * Compose a string.
    712      * The string will be composed to according to the specified mode.
    713      * @param source The char array to compose.
    714      * @param target A char buffer to receive the normalized text.
    715      * @param compat If true the char array will be composed according to
    716      *                NFKC rules and if false will be composed according to
    717      *                NFC rules.
    718      * @param options The normalization options, ORed together (0 for no options).
    719      * @return int   The total buffer size needed;if greater than length of
    720      *                result, the output was truncated.
    721      * @exception IndexOutOfBoundsException if target.length is less than the
    722      *             required length
    723      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    724      * @hide original deprecated declaration
    725      */
    726     @Deprecated
    727     public static int compose(char[] source,char[] target, boolean compat, int options) {
    728         return compose(source, 0, source.length, target, 0, target.length, compat, options);
    729     }
    730 
    731     /**
    732      * Compose a string.
    733      * The string will be composed to according to the specified mode.
    734      * @param src       The char array to compose.
    735      * @param srcStart  Start index of the source
    736      * @param srcLimit  Limit index of the source
    737      * @param dest      The char buffer to fill in
    738      * @param destStart Start index of the destination buffer
    739      * @param destLimit End index of the destination buffer
    740      * @param compat If true the char array will be composed according to
    741      *                NFKC rules and if false will be composed according to
    742      *                NFC rules.
    743      * @param options The normalization options, ORed together (0 for no options).
    744      * @return int   The total buffer size needed;if greater than length of
    745      *                result, the output was truncated.
    746      * @exception IndexOutOfBoundsException if target.length is less than the
    747      *             required length
    748      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    749      * @hide original deprecated declaration
    750      */
    751     @Deprecated
    752     public static int compose(char[] src,int srcStart, int srcLimit,
    753                               char[] dest,int destStart, int destLimit,
    754                               boolean compat, int options) {
    755         CharBuffer srcBuffer = CharBuffer.wrap(src, srcStart, srcLimit - srcStart);
    756         CharsAppendable app = new CharsAppendable(dest, destStart, destLimit);
    757         getComposeNormalizer2(compat, options).normalize(srcBuffer, app);
    758         return app.length();
    759     }
    760 
    761     /**
    762      * Decompose a string.
    763      * The string will be decomposed to according to the specified mode.
    764      * @param str       The string to decompose.
    765      * @param compat    If true the string will be decomposed according to NFKD
    766      *                   rules and if false will be decomposed according to NFD
    767      *                   rules.
    768      * @return String   The decomposed string
    769      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    770      * @hide original deprecated declaration
    771      */
    772     @Deprecated
    773     public static String decompose(String str, boolean compat) {
    774         return decompose(str,compat,0);
    775     }
    776 
    777     /**
    778      * Decompose a string.
    779      * The string will be decomposed to according to the specified mode.
    780      * @param str     The string to decompose.
    781      * @param compat  If true the string will be decomposed according to NFKD
    782      *                 rules and if false will be decomposed according to NFD
    783      *                 rules.
    784      * @param options The normalization options, ORed together (0 for no options).
    785      * @return String The decomposed string
    786      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    787      * @hide original deprecated declaration
    788      */
    789     @Deprecated
    790     public static String decompose(String str, boolean compat, int options) {
    791         return getDecomposeNormalizer2(compat, options).normalize(str);
    792     }
    793 
    794     /**
    795      * Decompose a string.
    796      * The string will be decomposed to according to the specified mode.
    797      * @param source The char array to decompose.
    798      * @param target A char buffer to receive the normalized text.
    799      * @param compat If true the char array will be decomposed according to NFKD
    800      *                rules and if false will be decomposed according to
    801      *                NFD rules.
    802      * @return int   The total buffer size needed;if greater than length of
    803      *                result,the output was truncated.
    804      * @param options The normalization options, ORed together (0 for no options).
    805      * @exception IndexOutOfBoundsException if the target capacity is less than
    806      *             the required length
    807      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    808      * @hide original deprecated declaration
    809      */
    810     @Deprecated
    811     public static int decompose(char[] source,char[] target, boolean compat, int options) {
    812         return decompose(source, 0, source.length, target, 0, target.length, compat, options);
    813     }
    814 
    815     /**
    816      * Decompose a string.
    817      * The string will be decomposed to according to the specified mode.
    818      * @param src       The char array to compose.
    819      * @param srcStart  Start index of the source
    820      * @param srcLimit  Limit index of the source
    821      * @param dest      The char buffer to fill in
    822      * @param destStart Start index of the destination buffer
    823      * @param destLimit End index of the destination buffer
    824      * @param compat If true the char array will be decomposed according to NFKD
    825      *                rules and if false will be decomposed according to
    826      *                NFD rules.
    827      * @param options The normalization options, ORed together (0 for no options).
    828      * @return int   The total buffer size needed;if greater than length of
    829      *                result,the output was truncated.
    830      * @exception IndexOutOfBoundsException if the target capacity is less than
    831      *             the required length
    832      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    833      * @hide original deprecated declaration
    834      */
    835     @Deprecated
    836     public static int decompose(char[] src,int srcStart, int srcLimit,
    837                                 char[] dest,int destStart, int destLimit,
    838                                 boolean compat, int options) {
    839         CharBuffer srcBuffer = CharBuffer.wrap(src, srcStart, srcLimit - srcStart);
    840         CharsAppendable app = new CharsAppendable(dest, destStart, destLimit);
    841         getDecomposeNormalizer2(compat, options).normalize(srcBuffer, app);
    842         return app.length();
    843     }
    844 
    845     /**
    846      * Normalizes a <tt>String</tt> using the given normalization operation.
    847      * <p>
    848      * The <tt>options</tt> parameter specifies which optional
    849      * <tt>Normalizer</tt> features are to be enabled for this operation.
    850      * Currently the only available option is {@link #UNICODE_3_2}.
    851      * If you want the default behavior corresponding to one of the standard
    852      * Unicode Normalization Forms, use 0 for this argument.
    853      * <p>
    854      * @param str       the input string to be normalized.
    855      * @param mode      the normalization mode
    856      * @param options   the optional features to be enabled.
    857      * @return String   the normalized string
    858      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    859      * @hide original deprecated declaration
    860      */
    861     @Deprecated
    862     public static String normalize(String str, Mode mode, int options) {
    863         return mode.getNormalizer2(options).normalize(str);
    864     }
    865 
    866     /**
    867      * Normalize a string.
    868      * The string will be normalized according to the specified normalization
    869      * mode and options.
    870      * @param src        The string to normalize.
    871      * @param mode       The normalization mode; one of Normalizer.NONE,
    872      *                    Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
    873      *                    Normalizer.NFKD, Normalizer.DEFAULT
    874      * @return the normalized string
    875      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    876      * @hide original deprecated declaration
    877      */
    878     @Deprecated
    879     public static String normalize(String src,Mode mode) {
    880         return normalize(src, mode, 0);
    881     }
    882     /**
    883      * Normalize a string.
    884      * The string will be normalized according to the specified normalization
    885      * mode and options.
    886      * @param source The char array to normalize.
    887      * @param target A char buffer to receive the normalized text.
    888      * @param mode   The normalization mode; one of Normalizer.NONE,
    889      *                Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
    890      *                Normalizer.NFKD, Normalizer.DEFAULT
    891      * @param options The normalization options, ORed together (0 for no options).
    892      * @return int   The total buffer size needed;if greater than length of
    893      *                result, the output was truncated.
    894      * @exception    IndexOutOfBoundsException if the target capacity is less
    895      *                than the required length
    896      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    897      * @hide original deprecated declaration
    898      */
    899     @Deprecated
    900     public static int normalize(char[] source,char[] target, Mode  mode, int options) {
    901         return normalize(source,0,source.length,target,0,target.length,mode, options);
    902     }
    903 
    904     /**
    905      * Normalize a string.
    906      * The string will be normalized according to the specified normalization
    907      * mode and options.
    908      * @param src       The char array to compose.
    909      * @param srcStart  Start index of the source
    910      * @param srcLimit  Limit index of the source
    911      * @param dest      The char buffer to fill in
    912      * @param destStart Start index of the destination buffer
    913      * @param destLimit End index of the destination buffer
    914      * @param mode      The normalization mode; one of Normalizer.NONE,
    915      *                   Normalizer.NFD, Normalizer.NFC, Normalizer.NFKC,
    916      *                   Normalizer.NFKD, Normalizer.DEFAULT
    917      * @param options The normalization options, ORed together (0 for no options).
    918      * @return int      The total buffer size needed;if greater than length of
    919      *                   result, the output was truncated.
    920      * @exception       IndexOutOfBoundsException if the target capacity is
    921      *                   less than the required length
    922      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    923      * @hide original deprecated declaration
    924      */
    925     @Deprecated
    926     public static int normalize(char[] src,int srcStart, int srcLimit,
    927                                 char[] dest,int destStart, int destLimit,
    928                                 Mode  mode, int options) {
    929         CharBuffer srcBuffer = CharBuffer.wrap(src, srcStart, srcLimit - srcStart);
    930         CharsAppendable app = new CharsAppendable(dest, destStart, destLimit);
    931         mode.getNormalizer2(options).normalize(srcBuffer, app);
    932         return app.length();
    933     }
    934 
    935     /**
    936      * Normalize a codepoint according to the given mode
    937      * @param char32    The input string to be normalized.
    938      * @param mode      The normalization mode
    939      * @param options   Options for use with exclusion set and tailored Normalization
    940      *                                   The only option that is currently recognized is UNICODE_3_2
    941      * @return String   The normalized string
    942      * @see #UNICODE_3_2
    943      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    944      * @hide original deprecated declaration
    945      */
    946     @Deprecated
    947     public static String normalize(int char32, Mode mode, int options) {
    948         if(mode == NFD && options == 0) {
    949             String decomposition = Normalizer2.getNFCInstance().getDecomposition(char32);
    950             if(decomposition == null) {
    951                 decomposition = UTF16.valueOf(char32);
    952             }
    953             return decomposition;
    954         }
    955         return normalize(UTF16.valueOf(char32), mode, options);
    956     }
    957 
    958     /**
    959      * Convenience method to normalize a codepoint according to the given mode
    960      * @param char32    The input string to be normalized.
    961      * @param mode      The normalization mode
    962      * @return String   The normalized string
    963      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    964      * @hide original deprecated declaration
    965      */
    966     @Deprecated
    967     public static String normalize(int char32, Mode mode) {
    968         return normalize(char32, mode, 0);
    969     }
    970 
    971     /**
    972      * Convenience method.
    973      *
    974      * @param source   string for determining if it is in a normalized format
    975      * @param mode     normalization format (Normalizer.NFC,Normalizer.NFD,
    976      *                  Normalizer.NFKC,Normalizer.NFKD)
    977      * @return         Return code to specify if the text is normalized or not
    978      *                     (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE)
    979      * @deprecated ICU 56 Use {@link Normalizer2} instead.
    980      * @hide original deprecated declaration
    981      */
    982     @Deprecated
    983     public static QuickCheckResult quickCheck(String source, Mode mode) {
    984         return quickCheck(source, mode, 0);
    985     }
    986 
    987     /**
    988      * Performing quick check on a string, to quickly determine if the string is
    989      * in a particular normalization format.
    990      * Three types of result can be returned Normalizer.YES, Normalizer.NO or
    991      * Normalizer.MAYBE. Result Normalizer.YES indicates that the argument
    992      * string is in the desired normalized format, Normalizer.NO determines that
    993      * argument string is not in the desired normalized format. A
    994      * Normalizer.MAYBE result indicates that a more thorough check is required,
    995      * the user may have to put the string in its normalized form and compare
    996      * the results.
    997      *
    998      * @param source   string for determining if it is in a normalized format
    999      * @param mode     normalization format (Normalizer.NFC,Normalizer.NFD,
   1000      *                  Normalizer.NFKC,Normalizer.NFKD)
   1001      * @param options   Options for use with exclusion set and tailored Normalization
   1002      *                                   The only option that is currently recognized is UNICODE_3_2
   1003      * @return         Return code to specify if the text is normalized or not
   1004      *                     (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE)
   1005      * @deprecated ICU 56 Use {@link Normalizer2} instead.
   1006      * @hide original deprecated declaration
   1007      */
   1008     @Deprecated
   1009     public static QuickCheckResult quickCheck(String source, Mode mode, int options) {
   1010         return mode.getNormalizer2(options).quickCheck(source);
   1011     }
   1012 
   1013     /**
   1014      * Convenience method.
   1015      *
   1016      * @param source Array of characters for determining if it is in a
   1017      *                normalized format
   1018      * @param mode   normalization format (Normalizer.NFC,Normalizer.NFD,
   1019      *                Normalizer.NFKC,Normalizer.NFKD)
   1020      * @param options   Options for use with exclusion set and tailored Normalization
   1021      *                                   The only option that is currently recognized is UNICODE_3_2
   1022      * @return       Return code to specify if the text is normalized or not
   1023      *                (Normalizer.YES, Normalizer.NO or Normalizer.MAYBE)
   1024      * @deprecated ICU 56 Use {@link Normalizer2} instead.
   1025      * @hide original deprecated declaration
   1026      */
   1027     @Deprecated
   1028     public static QuickCheckResult quickCheck(char[] source, Mode mode, int options) {
   1029         return quickCheck(source, 0, source.length, mode, options);
   1030     }
   1031 
   1032     /**
   1033      * Performing quick check on a string, to quickly determine if the string is
   1034      * in a particular normalization format.
   1035      * Three types of result can be returned Normalizer.YES, Normalizer.NO or
   1036      * Normalizer.MAYBE. Result Normalizer.YES indicates that the argument
   1037      * string is in the desired normalized format, Normalizer.NO determines that
   1038      * argument string is not in the desired normalized format. A
   1039      * Normalizer.MAYBE result indicates that a more thorough check is required,
   1040      * the user may have to put the string in its normalized form and compare
   1041      * the results.
   1042      *
   1043      * @param source    string for determining if it is in a normalized format
   1044      * @param start     the start index of the source
   1045      * @param limit     the limit index of the source it is equal to the length
   1046      * @param mode      normalization format (Normalizer.NFC,Normalizer.NFD,
   1047      *                   Normalizer.NFKC,Normalizer.NFKD)
   1048      * @param options   Options for use with exclusion set and tailored Normalization
   1049      *                                   The only option that is currently recognized is UNICODE_3_2
   1050      * @return          Return code to specify if the text is normalized or not
   1051      *                   (Normalizer.YES, Normalizer.NO or
   1052      *                   Normalizer.MAYBE)
   1053      * @deprecated ICU 56 Use {@link Normalizer2} instead.
   1054      * @hide original deprecated declaration
   1055      */
   1056     @Deprecated
   1057     public static QuickCheckResult quickCheck(char[] source,int start,
   1058                                               int limit, Mode mode,int options) {
   1059         CharBuffer srcBuffer = CharBuffer.wrap(source, start, limit - start);
   1060         return mode.getNormalizer2(options).quickCheck(srcBuffer);
   1061     }
   1062 
   1063     /**
   1064      * Test if a string is in a given normalization form.
   1065      * This is semantically equivalent to source.equals(normalize(source, mode)).
   1066      *
   1067      * Unlike quickCheck(), this function returns a definitive result,
   1068      * never a "maybe".
   1069      * For NFD, NFKD, and FCD, both functions work exactly the same.
   1070      * For NFC and NFKC where quickCheck may return "maybe", this function will
   1071      * perform further tests to arrive at a true/false result.
   1072      * @param src       The input array of characters to be checked to see if
   1073      *                   it is normalized
   1074      * @param start     The strart index in the source
   1075      * @param limit     The limit index in the source
   1076      * @param mode      the normalization mode
   1077      * @param options   Options for use with exclusion set and tailored Normalization
   1078      *                                   The only option that is currently recognized is UNICODE_3_2
   1079      * @return Boolean value indicating whether the source string is in the
   1080      *         "mode" normalization form
   1081      * @deprecated ICU 56 Use {@link Normalizer2} instead.
   1082      * @hide original deprecated declaration
   1083      */
   1084     @Deprecated
   1085     public static boolean isNormalized(char[] src,int start,
   1086                                        int limit, Mode mode,
   1087                                        int options) {
   1088         CharBuffer srcBuffer = CharBuffer.wrap(src, start, limit - start);
   1089         return mode.getNormalizer2(options).isNormalized(srcBuffer);
   1090     }
   1091 
   1092     /**
   1093      * Test if a string is in a given normalization form.
   1094      * This is semantically equivalent to source.equals(normalize(source, mode)).
   1095      *
   1096      * Unlike quickCheck(), this function returns a definitive result,
   1097      * never a "maybe".
   1098      * For NFD, NFKD, and FCD, both functions work exactly the same.
   1099      * For NFC and NFKC where quickCheck may return "maybe", this function will
   1100      * perform further tests to arrive at a true/false result.
   1101      * @param str       the input string to be checked to see if it is
   1102      *                   normalized
   1103      * @param mode      the normalization mode
   1104      * @param options   Options for use with exclusion set and tailored Normalization
   1105      *                  The only option that is currently recognized is UNICODE_3_2
   1106      * @see #isNormalized
   1107      * @deprecated ICU 56 Use {@link Normalizer2} instead.
   1108      * @hide original deprecated declaration
   1109      */
   1110     @Deprecated
   1111     public static boolean isNormalized(String str, Mode mode, int options) {
   1112         return mode.getNormalizer2(options).isNormalized(str);
   1113     }
   1114 
   1115     /**
   1116      * Convenience Method
   1117      * @param char32    the input code point to be checked to see if it is
   1118      *                   normalized
   1119      * @param mode      the normalization mode
   1120      * @param options   Options for use with exclusion set and tailored Normalization
   1121      *                  The only option that is currently recognized is UNICODE_3_2
   1122      *
   1123      * @see #isNormalized
   1124      * @deprecated ICU 56 Use {@link Normalizer2} instead.
   1125      * @hide original deprecated declaration
   1126      */
   1127     @Deprecated
   1128     public static boolean isNormalized(int char32, Mode mode,int options) {
   1129         return isNormalized(UTF16.valueOf(char32), mode, options);
   1130     }
   1131 
   1132     /**
   1133      * Compare two strings for canonical equivalence.
   1134      * Further options include case-insensitive comparison and
   1135      * code point order (as opposed to code unit order).
   1136      *
   1137      * Canonical equivalence between two strings is defined as their normalized
   1138      * forms (NFD or NFC) being identical.
   1139      * This function compares strings incrementally instead of normalizing
   1140      * (and optionally case-folding) both strings entirely,
   1141      * improving performance significantly.
   1142      *
   1143      * Bulk normalization is only necessary if the strings do not fulfill the
   1144      * FCD conditions. Only in this case, and only if the strings are relatively
   1145      * long, is memory allocated temporarily.
   1146      * For FCD strings and short non-FCD strings there is no memory allocation.
   1147      *
   1148      * Semantically, this is equivalent to
   1149      *   strcmp[CodePointOrder](foldCase(NFD(s1)), foldCase(NFD(s2)))
   1150      * where code point order and foldCase are all optional.
   1151      *
   1152      * @param s1        First source character array.
   1153      * @param s1Start   start index of source
   1154      * @param s1Limit   limit of the source
   1155      *
   1156      * @param s2        Second source character array.
   1157      * @param s2Start   start index of the source
   1158      * @param s2Limit   limit of the source
   1159      *
   1160      * @param options A bit set of options:
   1161      *   - FOLD_CASE_DEFAULT or 0 is used for default options:
   1162      *     Case-sensitive comparison in code unit order, and the input strings
   1163      *     are quick-checked for FCD.
   1164      *
   1165      *   - INPUT_IS_FCD
   1166      *     Set if the caller knows that both s1 and s2 fulfill the FCD
   1167      *     conditions.If not set, the function will quickCheck for FCD
   1168      *     and normalize if necessary.
   1169      *
   1170      *   - COMPARE_CODE_POINT_ORDER
   1171      *     Set to choose code point order instead of code unit order
   1172      *
   1173      *   - COMPARE_IGNORE_CASE
   1174      *     Set to compare strings case-insensitively using case folding,
   1175      *     instead of case-sensitively.
   1176      *     If set, then the following case folding options are used.
   1177      *
   1178      *
   1179      * @return &lt;0 or 0 or &gt;0 as usual for string comparisons
   1180      *
   1181      * @see #normalize
   1182      * @see #FCD
   1183      */
   1184     public static int compare(char[] s1, int s1Start, int s1Limit,
   1185                               char[] s2, int s2Start, int s2Limit,
   1186                               int options) {
   1187         if( s1==null || s1Start<0 || s1Limit<0 ||
   1188             s2==null || s2Start<0 || s2Limit<0 ||
   1189             s1Limit<s1Start || s2Limit<s2Start
   1190         ) {
   1191             throw new IllegalArgumentException();
   1192         }
   1193         return internalCompare(CharBuffer.wrap(s1, s1Start, s1Limit-s1Start),
   1194                                CharBuffer.wrap(s2, s2Start, s2Limit-s2Start),
   1195                                options);
   1196     }
   1197 
   1198     /**
   1199      * Compare two strings for canonical equivalence.
   1200      * Further options include case-insensitive comparison and
   1201      * code point order (as opposed to code unit order).
   1202      *
   1203      * Canonical equivalence between two strings is defined as their normalized
   1204      * forms (NFD or NFC) being identical.
   1205      * This function compares strings incrementally instead of normalizing
   1206      * (and optionally case-folding) both strings entirely,
   1207      * improving performance significantly.
   1208      *
   1209      * Bulk normalization is only necessary if the strings do not fulfill the
   1210      * FCD conditions. Only in this case, and only if the strings are relatively
   1211      * long, is memory allocated temporarily.
   1212      * For FCD strings and short non-FCD strings there is no memory allocation.
   1213      *
   1214      * Semantically, this is equivalent to
   1215      *   strcmp[CodePointOrder](foldCase(NFD(s1)), foldCase(NFD(s2)))
   1216      * where code point order and foldCase are all optional.
   1217      *
   1218      * @param s1 First source string.
   1219      * @param s2 Second source string.
   1220      *
   1221      * @param options A bit set of options:
   1222      *   - FOLD_CASE_DEFAULT or 0 is used for default options:
   1223      *     Case-sensitive comparison in code unit order, and the input strings
   1224      *     are quick-checked for FCD.
   1225      *
   1226      *   - INPUT_IS_FCD
   1227      *     Set if the caller knows that both s1 and s2 fulfill the FCD
   1228      *     conditions. If not set, the function will quickCheck for FCD
   1229      *     and normalize if necessary.
   1230      *
   1231      *   - COMPARE_CODE_POINT_ORDER
   1232      *     Set to choose code point order instead of code unit order
   1233      *
   1234      *   - COMPARE_IGNORE_CASE
   1235      *     Set to compare strings case-insensitively using case folding,
   1236      *     instead of case-sensitively.
   1237      *     If set, then the following case folding options are used.
   1238      *
   1239      * @return &lt;0 or 0 or &gt;0 as usual for string comparisons
   1240      *
   1241      * @see #normalize
   1242      * @see #FCD
   1243      */
   1244     public static int compare(String s1, String s2, int options) {
   1245         return internalCompare(s1, s2, options);
   1246     }
   1247 
   1248     /**
   1249      * Compare two strings for canonical equivalence.
   1250      * Further options include case-insensitive comparison and
   1251      * code point order (as opposed to code unit order).
   1252      * Convenience method.
   1253      *
   1254      * @param s1 First source string.
   1255      * @param s2 Second source string.
   1256      *
   1257      * @param options A bit set of options:
   1258      *   - FOLD_CASE_DEFAULT or 0 is used for default options:
   1259      *     Case-sensitive comparison in code unit order, and the input strings
   1260      *     are quick-checked for FCD.
   1261      *
   1262      *   - INPUT_IS_FCD
   1263      *     Set if the caller knows that both s1 and s2 fulfill the FCD
   1264      *     conditions. If not set, the function will quickCheck for FCD
   1265      *     and normalize if necessary.
   1266      *
   1267      *   - COMPARE_CODE_POINT_ORDER
   1268      *     Set to choose code point order instead of code unit order
   1269      *
   1270      *   - COMPARE_IGNORE_CASE
   1271      *     Set to compare strings case-insensitively using case folding,
   1272      *     instead of case-sensitively.
   1273      *     If set, then the following case folding options are used.
   1274      *
   1275      * @return &lt;0 or 0 or &gt;0 as usual for string comparisons
   1276      *
   1277      * @see #normalize
   1278      * @see #FCD
   1279      */
   1280     public static int compare(char[] s1, char[] s2, int options) {
   1281         return internalCompare(CharBuffer.wrap(s1), CharBuffer.wrap(s2), options);
   1282     }
   1283 
   1284     /**
   1285      * Convenience method that can have faster implementation
   1286      * by not allocating buffers.
   1287      * @param char32a    the first code point to be checked against the
   1288      * @param char32b    the second code point
   1289      * @param options    A bit set of options
   1290      */
   1291     public static int compare(int char32a, int char32b, int options) {
   1292         return internalCompare(UTF16.valueOf(char32a), UTF16.valueOf(char32b), options|INPUT_IS_FCD);
   1293     }
   1294 
   1295     /**
   1296      * Convenience method that can have faster implementation
   1297      * by not allocating buffers.
   1298      * @param char32a   the first code point to be checked against
   1299      * @param str2      the second string
   1300      * @param options   A bit set of options
   1301      */
   1302     public static int compare(int char32a, String str2, int options) {
   1303         return internalCompare(UTF16.valueOf(char32a), str2, options);
   1304     }
   1305 
   1306     /* Concatenation of normalized strings --------------------------------- */
   1307     /**
   1308      * Concatenate normalized strings, making sure that the result is normalized
   1309      * as well.
   1310      *
   1311      * If both the left and the right strings are in
   1312      * the normalization form according to "mode",
   1313      * then the result will be
   1314      *
   1315      * <code>
   1316      *     dest=normalize(left+right, mode)
   1317      * </code>
   1318      *
   1319      * With the input strings already being normalized,
   1320      * this function will use next() and previous()
   1321      * to find the adjacent end pieces of the input strings.
   1322      * Only the concatenation of these end pieces will be normalized and
   1323      * then concatenated with the remaining parts of the input strings.
   1324      *
   1325      * It is allowed to have dest==left to avoid copying the entire left string.
   1326      *
   1327      * @param left Left source array, may be same as dest.
   1328      * @param leftStart start in the left array.
   1329      * @param leftLimit limit in the left array (==length)
   1330      * @param right Right source array.
   1331      * @param rightStart start in the right array.
   1332      * @param rightLimit limit in the right array (==length)
   1333      * @param dest The output buffer; can be null if destStart==destLimit==0
   1334      *              for pure preflighting.
   1335      * @param destStart start in the destination array
   1336      * @param destLimit limit in the destination array (==length)
   1337      * @param mode The normalization mode.
   1338      * @param options The normalization options, ORed together (0 for no options).
   1339      * @return Length of output (number of chars) when successful or
   1340      *          IndexOutOfBoundsException
   1341      * @exception IndexOutOfBoundsException whose message has the string
   1342      *             representation of destination capacity required.
   1343      * @see #normalize
   1344      * @see #next
   1345      * @see #previous
   1346      * @exception IndexOutOfBoundsException if target capacity is less than the
   1347      *             required length
   1348      * @deprecated ICU 56 Use {@link Normalizer2} instead.
   1349      * @hide original deprecated declaration
   1350      */
   1351     @Deprecated
   1352     public static int concatenate(char[] left,  int leftStart,  int leftLimit,
   1353                                   char[] right, int rightStart, int rightLimit,
   1354                                   char[] dest,  int destStart,  int destLimit,
   1355                                   Normalizer.Mode mode, int options) {
   1356         if(dest == null) {
   1357             throw new IllegalArgumentException();
   1358         }
   1359 
   1360         /* check for overlapping right and destination */
   1361         if (right == dest && rightStart < destLimit && destStart < rightLimit) {
   1362             throw new IllegalArgumentException("overlapping right and dst ranges");
   1363         }
   1364 
   1365         /* allow left==dest */
   1366         StringBuilder destBuilder=new StringBuilder(leftLimit-leftStart+rightLimit-rightStart+16);
   1367         destBuilder.append(left, leftStart, leftLimit-leftStart);
   1368         CharBuffer rightBuffer=CharBuffer.wrap(right, rightStart, rightLimit-rightStart);
   1369         mode.getNormalizer2(options).append(destBuilder, rightBuffer);
   1370         int destLength=destBuilder.length();
   1371         if(destLength<=(destLimit-destStart)) {
   1372             destBuilder.getChars(0, destLength, dest, destStart);
   1373             return destLength;
   1374         } else {
   1375             throw new IndexOutOfBoundsException(Integer.toString(destLength));
   1376         }
   1377     }
   1378 
   1379     /**
   1380      * Concatenate normalized strings, making sure that the result is normalized
   1381      * as well.
   1382      *
   1383      * If both the left and the right strings are in
   1384      * the normalization form according to "mode",
   1385      * then the result will be
   1386      *
   1387      * <code>
   1388      *     dest=normalize(left+right, mode)
   1389      * </code>
   1390      *
   1391      * For details see concatenate
   1392      *
   1393      * @param left Left source string.
   1394      * @param right Right source string.
   1395      * @param mode The normalization mode.
   1396      * @param options The normalization options, ORed together (0 for no options).
   1397      * @return result
   1398      *
   1399      * @see #concatenate
   1400      * @see #normalize
   1401      * @see #next
   1402      * @see #previous
   1403      * @see #concatenate
   1404      * @deprecated ICU 56 Use {@link Normalizer2} instead.
   1405      * @hide original deprecated declaration
   1406      */
   1407     @Deprecated
   1408     public static String concatenate(char[] left, char[] right,Mode mode, int options) {
   1409         StringBuilder dest=new StringBuilder(left.length+right.length+16).append(left);
   1410         return mode.getNormalizer2(options).append(dest, CharBuffer.wrap(right)).toString();
   1411     }
   1412 
   1413     /**
   1414      * Concatenate normalized strings, making sure that the result is normalized
   1415      * as well.
   1416      *
   1417      * If both the left and the right strings are in
   1418      * the normalization form according to "mode",
   1419      * then the result will be
   1420      *
   1421      * <code>
   1422      *     dest=normalize(left+right, mode)
   1423      * </code>
   1424      *
   1425      * With the input strings already being normalized,
   1426      * this function will use next() and previous()
   1427      * to find the adjacent end pieces of the input strings.
   1428      * Only the concatenation of these end pieces will be normalized and
   1429      * then concatenated with the remaining parts of the input strings.
   1430      *
   1431      * @param left Left source string.
   1432      * @param right Right source string.
   1433      * @param mode The normalization mode.
   1434      * @param options The normalization options, ORed together (0 for no options).
   1435      * @return result
   1436      *
   1437      * @see #concatenate
   1438      * @see #normalize
   1439      * @see #next
   1440      * @see #previous
   1441      * @see #concatenate
   1442      * @deprecated ICU 56 Use {@link Normalizer2} instead.
   1443      * @hide original deprecated declaration
   1444      */
   1445     @Deprecated
   1446     public static String concatenate(String left, String right, Mode mode, int options) {
   1447         StringBuilder dest=new StringBuilder(left.length()+right.length()+16).append(left);
   1448         return mode.getNormalizer2(options).append(dest, right).toString();
   1449     }
   1450 
   1451     /**
   1452      * Gets the FC_NFKC closure value.
   1453      * @param c The code point whose closure value is to be retrieved
   1454      * @param dest The char array to receive the closure value
   1455      * @return the length of the closure value; 0 if there is none
   1456      * @deprecated ICU 56
   1457      * @hide original deprecated declaration
   1458      */
   1459     @Deprecated
   1460     public static int getFC_NFKC_Closure(int c,char[] dest) {
   1461         String closure=getFC_NFKC_Closure(c);
   1462         int length=closure.length();
   1463         if(length!=0 && dest!=null && length<=dest.length) {
   1464             closure.getChars(0, length, dest, 0);
   1465         }
   1466         return length;
   1467     }
   1468     /**
   1469      * Gets the FC_NFKC closure value.
   1470      * @param c The code point whose closure value is to be retrieved
   1471      * @return String representation of the closure value; "" if there is none
   1472      * @deprecated ICU 56
   1473      * @hide original deprecated declaration
   1474      */
   1475     @Deprecated
   1476     public static String getFC_NFKC_Closure(int c) {
   1477         // Compute the FC_NFKC_Closure on the fly:
   1478         // We have the API for complete coverage of Unicode properties, although
   1479         // this value by itself is not useful via API.
   1480         // (What could be useful is a custom normalization table that combines
   1481         // case folding and NFKC.)
   1482         // For the derivation, see Unicode's DerivedNormalizationProps.txt.
   1483         Normalizer2 nfkc=NFKCModeImpl.INSTANCE.normalizer2;
   1484         UCaseProps csp=UCaseProps.INSTANCE;
   1485         // first: b = NFKC(Fold(a))
   1486         StringBuilder folded=new StringBuilder();
   1487         int folded1Length=csp.toFullFolding(c, folded, 0);
   1488         if(folded1Length<0) {
   1489             Normalizer2Impl nfkcImpl=((Norm2AllModes.Normalizer2WithImpl)nfkc).impl;
   1490             if(nfkcImpl.getCompQuickCheck(nfkcImpl.getNorm16(c))!=0) {
   1491                 return "";  // c does not change at all under CaseFolding+NFKC
   1492             }
   1493             folded.appendCodePoint(c);
   1494         } else {
   1495             if(folded1Length>UCaseProps.MAX_STRING_LENGTH) {
   1496                 folded.appendCodePoint(folded1Length);
   1497             }
   1498         }
   1499         String kc1=nfkc.normalize(folded);
   1500         // second: c = NFKC(Fold(b))
   1501         String kc2=nfkc.normalize(UCharacter.foldCase(kc1, 0));
   1502         // if (c != b) add the mapping from a to c
   1503         if(kc1.equals(kc2)) {
   1504             return "";
   1505         } else {
   1506             return kc2;
   1507         }
   1508     }
   1509 
   1510     //-------------------------------------------------------------------------
   1511     // Iteration API
   1512     //-------------------------------------------------------------------------
   1513 
   1514     /**
   1515      * Return the current character in the normalized text.
   1516      * @return The codepoint as an int
   1517      * @deprecated ICU 56
   1518      * @hide original deprecated declaration
   1519      */
   1520     @Deprecated
   1521     public int current() {
   1522         if(bufferPos<buffer.length() || nextNormalize()) {
   1523             return buffer.codePointAt(bufferPos);
   1524         } else {
   1525             return DONE;
   1526         }
   1527     }
   1528 
   1529     /**
   1530      * Return the next character in the normalized text and advance
   1531      * the iteration position by one.  If the end
   1532      * of the text has already been reached, {@link #DONE} is returned.
   1533      * @return The codepoint as an int
   1534      * @deprecated ICU 56
   1535      * @hide original deprecated declaration
   1536      */
   1537     @Deprecated
   1538     public int next() {
   1539         if(bufferPos<buffer.length() ||  nextNormalize()) {
   1540             int c=buffer.codePointAt(bufferPos);
   1541             bufferPos+=Character.charCount(c);
   1542             return c;
   1543         } else {
   1544             return DONE;
   1545         }
   1546     }
   1547 
   1548 
   1549     /**
   1550      * Return the previous character in the normalized text and decrement
   1551      * the iteration position by one.  If the beginning
   1552      * of the text has already been reached, {@link #DONE} is returned.
   1553      * @return The codepoint as an int
   1554      * @deprecated ICU 56
   1555      * @hide original deprecated declaration
   1556      */
   1557     @Deprecated
   1558     public int previous() {
   1559         if(bufferPos>0 || previousNormalize()) {
   1560             int c=buffer.codePointBefore(bufferPos);
   1561             bufferPos-=Character.charCount(c);
   1562             return c;
   1563         } else {
   1564             return DONE;
   1565         }
   1566     }
   1567 
   1568     /**
   1569      * Reset the index to the beginning of the text.
   1570      * This is equivalent to setIndexOnly(startIndex)).
   1571      * @deprecated ICU 56
   1572      * @hide original deprecated declaration
   1573      */
   1574     @Deprecated
   1575     public void reset() {
   1576         text.setToStart();
   1577         currentIndex=nextIndex=0;
   1578         clearBuffer();
   1579     }
   1580 
   1581     /**
   1582      * Set the iteration position in the input text that is being normalized,
   1583      * without any immediate normalization.
   1584      * After setIndexOnly(), getIndex() will return the same index that is
   1585      * specified here.
   1586      *
   1587      * @param index the desired index in the input text.
   1588      * @deprecated ICU 56
   1589      * @hide original deprecated declaration
   1590      */
   1591     @Deprecated
   1592     public void setIndexOnly(int index) {
   1593         text.setIndex(index);  // validates index
   1594         currentIndex=nextIndex=index;
   1595         clearBuffer();
   1596     }
   1597 
   1598     /**
   1599      * Set the iteration position in the input text that is being normalized
   1600      * and return the first normalized character at that position.
   1601      * <p>
   1602      * <b>Note:</b> This method sets the position in the <em>input</em> text,
   1603      * while {@link #next} and {@link #previous} iterate through characters
   1604      * in the normalized <em>output</em>.  This means that there is not
   1605      * necessarily a one-to-one correspondence between characters returned
   1606      * by <tt>next</tt> and <tt>previous</tt> and the indices passed to and
   1607      * returned from <tt>setIndex</tt> and {@link #getIndex}.
   1608      * <p>
   1609      * @param index the desired index in the input text.
   1610      *
   1611      * @return   the first normalized character that is the result of iterating
   1612      *            forward starting at the given index.
   1613      *
   1614      * @throws IllegalArgumentException if the given index is less than
   1615      *          {@link #getBeginIndex} or greater than {@link #getEndIndex}.
   1616      * @deprecated ICU 3.2
   1617      * @obsolete ICU 3.2
   1618      * @hide original deprecated declaration
   1619      */
   1620     @Deprecated
   1621      ///CLOVER:OFF
   1622      public int setIndex(int index) {
   1623          setIndexOnly(index);
   1624          return current();
   1625      }
   1626      ///CLOVER:ON
   1627     /**
   1628      * Retrieve the index of the start of the input text. This is the begin
   1629      * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
   1630      * <tt>String</tt> over which this <tt>Normalizer</tt> is iterating
   1631      * @deprecated ICU 2.2. Use startIndex() instead.
   1632      * @return The codepoint as an int
   1633      * @see #startIndex
   1634      * @hide original deprecated declaration
   1635      */
   1636     @Deprecated
   1637     public int getBeginIndex() {
   1638         return 0;
   1639     }
   1640 
   1641     /**
   1642      * Retrieve the index of the end of the input text.  This is the end index
   1643      * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
   1644      * over which this <tt>Normalizer</tt> is iterating
   1645      * @deprecated ICU 2.2. Use endIndex() instead.
   1646      * @return The codepoint as an int
   1647      * @see #endIndex
   1648      * @hide original deprecated declaration
   1649      */
   1650     @Deprecated
   1651     public int getEndIndex() {
   1652         return endIndex();
   1653     }
   1654     /**
   1655      * Return the first character in the normalized text.  This resets
   1656      * the <tt>Normalizer's</tt> position to the beginning of the text.
   1657      * @return The codepoint as an int
   1658      * @deprecated ICU 56
   1659      * @hide original deprecated declaration
   1660      */
   1661     @Deprecated
   1662     public int first() {
   1663         reset();
   1664         return next();
   1665     }
   1666 
   1667     /**
   1668      * Return the last character in the normalized text.  This resets
   1669      * the <tt>Normalizer's</tt> position to be just before the
   1670      * the input text corresponding to that normalized character.
   1671      * @return The codepoint as an int
   1672      * @deprecated ICU 56
   1673      * @hide original deprecated declaration
   1674      */
   1675     @Deprecated
   1676     public int last() {
   1677         text.setToLimit();
   1678         currentIndex=nextIndex=text.getIndex();
   1679         clearBuffer();
   1680         return previous();
   1681     }
   1682 
   1683     /**
   1684      * Retrieve the current iteration position in the input text that is
   1685      * being normalized.  This method is useful in applications such as
   1686      * searching, where you need to be able to determine the position in
   1687      * the input text that corresponds to a given normalized output character.
   1688      * <p>
   1689      * <b>Note:</b> This method sets the position in the <em>input</em>, while
   1690      * {@link #next} and {@link #previous} iterate through characters in the
   1691      * <em>output</em>.  This means that there is not necessarily a one-to-one
   1692      * correspondence between characters returned by <tt>next</tt> and
   1693      * <tt>previous</tt> and the indices passed to and returned from
   1694      * <tt>setIndex</tt> and {@link #getIndex}.
   1695      * @return The current iteration position
   1696      * @deprecated ICU 56
   1697      * @hide original deprecated declaration
   1698      */
   1699     @Deprecated
   1700     public int getIndex() {
   1701         if(bufferPos<buffer.length()) {
   1702             return currentIndex;
   1703         } else {
   1704             return nextIndex;
   1705         }
   1706     }
   1707 
   1708     /**
   1709      * Retrieve the index of the start of the input text. This is the begin
   1710      * index of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the
   1711      * <tt>String</tt> over which this <tt>Normalizer</tt> is iterating
   1712      * @return The current iteration position
   1713      * @deprecated ICU 56
   1714      * @hide original deprecated declaration
   1715      */
   1716     @Deprecated
   1717     public int startIndex() {
   1718         return 0;
   1719     }
   1720 
   1721     /**
   1722      * Retrieve the index of the end of the input text.  This is the end index
   1723      * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt>
   1724      * over which this <tt>Normalizer</tt> is iterating
   1725      * @return The current iteration position
   1726      * @deprecated ICU 56
   1727      * @hide original deprecated declaration
   1728      */
   1729     @Deprecated
   1730     public int endIndex() {
   1731         return text.getLength();
   1732     }
   1733 
   1734     //-------------------------------------------------------------------------
   1735     // Iterator attributes
   1736     //-------------------------------------------------------------------------
   1737     /**
   1738      * Set the normalization mode for this object.
   1739      * <p>
   1740      * <b>Note:</b>If the normalization mode is changed while iterating
   1741      * over a string, calls to {@link #next} and {@link #previous} may
   1742      * return previously buffers characters in the old normalization mode
   1743      * until the iteration is able to re-sync at the next base character.
   1744      * It is safest to call {@link #setText setText()}, {@link #first},
   1745      * {@link #last}, etc. after calling <tt>setMode</tt>.
   1746      * <p>
   1747      * @param newMode the new mode for this <tt>Normalizer</tt>.
   1748      * The supported modes are:
   1749      * <ul>
   1750      *  <li>{@link #NFC}    - Unicode canonical decompositiion
   1751      *                        followed by canonical composition.
   1752      *  <li>{@link #NFKC}   - Unicode compatibility decompositiion
   1753      *                        follwed by canonical composition.
   1754      *  <li>{@link #NFD}    - Unicode canonical decomposition
   1755      *  <li>{@link #NFKD}   - Unicode compatibility decomposition.
   1756      *  <li>{@link #NONE}   - Do nothing but return characters
   1757      *                        from the underlying input text.
   1758      * </ul>
   1759      *
   1760      * @see #getMode
   1761      * @deprecated ICU 56
   1762      * @hide original deprecated declaration
   1763      */
   1764     @Deprecated
   1765     public void setMode(Mode newMode) {
   1766         mode = newMode;
   1767         norm2 = mode.getNormalizer2(options);
   1768     }
   1769     /**
   1770      * Return the basic operation performed by this <tt>Normalizer</tt>
   1771      *
   1772      * @see #setMode
   1773      * @deprecated ICU 56
   1774      * @hide original deprecated declaration
   1775      */
   1776     @Deprecated
   1777     public Mode getMode() {
   1778         return mode;
   1779     }
   1780     /**
   1781      * Set options that affect this <tt>Normalizer</tt>'s operation.
   1782      * Options do not change the basic composition or decomposition operation
   1783      * that is being performed , but they control whether
   1784      * certain optional portions of the operation are done.
   1785      * Currently the only available option is:
   1786      *
   1787      * <ul>
   1788      *   <li>{@link #UNICODE_3_2} - Use Normalization conforming to Unicode version 3.2.
   1789      * </ul>
   1790      *
   1791      * @param   option  the option whose value is to be set.
   1792      * @param   value   the new setting for the option.  Use <tt>true</tt> to
   1793      *                  turn the option on and <tt>false</tt> to turn it off.
   1794      *
   1795      * @see #getOption
   1796      * @deprecated ICU 56
   1797      * @hide original deprecated declaration
   1798      */
   1799     @Deprecated
   1800     public void setOption(int option,boolean value) {
   1801         if (value) {
   1802             options |= option;
   1803         } else {
   1804             options &= (~option);
   1805         }
   1806         norm2 = mode.getNormalizer2(options);
   1807     }
   1808 
   1809     /**
   1810      * Determine whether an option is turned on or off.
   1811      * <p>
   1812      * @see #setOption
   1813      * @deprecated ICU 56
   1814      * @hide original deprecated declaration
   1815      */
   1816     @Deprecated
   1817     public int getOption(int option) {
   1818         if((options & option)!=0) {
   1819             return 1 ;
   1820         } else {
   1821             return 0;
   1822         }
   1823     }
   1824 
   1825     /**
   1826      * Gets the underlying text storage
   1827      * @param fillIn the char buffer to fill the UTF-16 units.
   1828      *         The length of the buffer should be equal to the length of the
   1829      *         underlying text storage
   1830      * @throws IndexOutOfBoundsException If the index passed for the array is invalid.
   1831      * @see   #getLength
   1832      * @deprecated ICU 56
   1833      * @hide original deprecated declaration
   1834      */
   1835     @Deprecated
   1836     public int getText(char[] fillIn) {
   1837         return text.getText(fillIn);
   1838     }
   1839 
   1840     /**
   1841      * Gets the length of underlying text storage
   1842      * @return the length
   1843      * @deprecated ICU 56
   1844      * @hide original deprecated declaration
   1845      */
   1846     @Deprecated
   1847     public int getLength() {
   1848         return text.getLength();
   1849     }
   1850 
   1851     /**
   1852      * Returns the text under iteration as a string
   1853      * @return a copy of the text under iteration.
   1854      * @deprecated ICU 56
   1855      * @hide original deprecated declaration
   1856      */
   1857     @Deprecated
   1858     public String getText() {
   1859         return text.getText();
   1860     }
   1861 
   1862     /**
   1863      * Set the input text over which this <tt>Normalizer</tt> will iterate.
   1864      * The iteration position is set to the beginning of the input text.
   1865      * @param newText   The new string to be normalized.
   1866      * @deprecated ICU 56
   1867      * @hide original deprecated declaration
   1868      */
   1869     @Deprecated
   1870     public void setText(StringBuffer newText) {
   1871         UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
   1872         if (newIter == null) {
   1873             throw new IllegalStateException("Could not create a new UCharacterIterator");
   1874         }
   1875         text = newIter;
   1876         reset();
   1877     }
   1878 
   1879     /**
   1880      * Set the input text over which this <tt>Normalizer</tt> will iterate.
   1881      * The iteration position is set to the beginning of the input text.
   1882      * @param newText   The new string to be normalized.
   1883      * @deprecated ICU 56
   1884      * @hide original deprecated declaration
   1885      */
   1886     @Deprecated
   1887     public void setText(char[] newText) {
   1888         UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
   1889         if (newIter == null) {
   1890             throw new IllegalStateException("Could not create a new UCharacterIterator");
   1891         }
   1892         text = newIter;
   1893         reset();
   1894     }
   1895 
   1896     /**
   1897      * Set the input text over which this <tt>Normalizer</tt> will iterate.
   1898      * The iteration position is set to the beginning of the input text.
   1899      * @param newText   The new string to be normalized.
   1900      * @deprecated ICU 56
   1901      * @hide original deprecated declaration
   1902      */
   1903     @Deprecated
   1904     public void setText(String newText) {
   1905         UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
   1906         if (newIter == null) {
   1907             throw new IllegalStateException("Could not create a new UCharacterIterator");
   1908         }
   1909         text = newIter;
   1910         reset();
   1911     }
   1912 
   1913     /**
   1914      * Set the input text over which this <tt>Normalizer</tt> will iterate.
   1915      * The iteration position is set to the beginning of the input text.
   1916      * @param newText   The new string to be normalized.
   1917      * @deprecated ICU 56
   1918      * @hide original deprecated declaration
   1919      */
   1920     @Deprecated
   1921     public void setText(CharacterIterator newText) {
   1922         UCharacterIterator newIter = UCharacterIterator.getInstance(newText);
   1923         if (newIter == null) {
   1924             throw new IllegalStateException("Could not create a new UCharacterIterator");
   1925         }
   1926         text = newIter;
   1927         reset();
   1928     }
   1929 
   1930     /**
   1931      * Set the input text over which this <tt>Normalizer</tt> will iterate.
   1932      * The iteration position is set to the beginning of the string.
   1933      * @param newText   The new string to be normalized.
   1934      * @deprecated ICU 56
   1935      * @hide original deprecated declaration
   1936      */
   1937     @Deprecated
   1938     public void setText(UCharacterIterator newText) {
   1939         try{
   1940             UCharacterIterator newIter = (UCharacterIterator)newText.clone();
   1941             if (newIter == null) {
   1942                 throw new IllegalStateException("Could not create a new UCharacterIterator");
   1943             }
   1944             text = newIter;
   1945             reset();
   1946         }catch(CloneNotSupportedException e) {
   1947             throw new ICUCloneNotSupportedException("Could not clone the UCharacterIterator", e);
   1948         }
   1949     }
   1950 
   1951     private void clearBuffer() {
   1952         buffer.setLength(0);
   1953         bufferPos=0;
   1954     }
   1955 
   1956     private boolean nextNormalize() {
   1957         clearBuffer();
   1958         currentIndex=nextIndex;
   1959         text.setIndex(nextIndex);
   1960         // Skip at least one character so we make progress.
   1961         int c=text.nextCodePoint();
   1962         if(c<0) {
   1963             return false;
   1964         }
   1965         StringBuilder segment=new StringBuilder().appendCodePoint(c);
   1966         while((c=text.nextCodePoint())>=0) {
   1967             if(norm2.hasBoundaryBefore(c)) {
   1968                 text.moveCodePointIndex(-1);
   1969                 break;
   1970             }
   1971             segment.appendCodePoint(c);
   1972         }
   1973         nextIndex=text.getIndex();
   1974         norm2.normalize(segment, buffer);
   1975         return buffer.length()!=0;
   1976     }
   1977 
   1978     private boolean previousNormalize() {
   1979         clearBuffer();
   1980         nextIndex=currentIndex;
   1981         text.setIndex(currentIndex);
   1982         StringBuilder segment=new StringBuilder();
   1983         int c;
   1984         while((c=text.previousCodePoint())>=0) {
   1985             if(c<=0xffff) {
   1986                 segment.insert(0, (char)c);
   1987             } else {
   1988                 segment.insert(0, Character.toChars(c));
   1989             }
   1990             if(norm2.hasBoundaryBefore(c)) {
   1991                 break;
   1992             }
   1993         }
   1994         currentIndex=text.getIndex();
   1995         norm2.normalize(segment, buffer);
   1996         bufferPos=buffer.length();
   1997         return buffer.length()!=0;
   1998     }
   1999 
   2000     /* compare canonically equivalent ------------------------------------------- */
   2001 
   2002     // TODO: Broaden the public compare(String, String, options) API like this. Ticket #7407
   2003     private static int internalCompare(CharSequence s1, CharSequence s2, int options) {
   2004         int normOptions=options>>>COMPARE_NORM_OPTIONS_SHIFT;
   2005         options|= COMPARE_EQUIV;
   2006 
   2007         /*
   2008          * UAX #21 Case Mappings, as fixed for Unicode version 4
   2009          * (see Jitterbug 2021), defines a canonical caseless match as
   2010          *
   2011          * A string X is a canonical caseless match
   2012          * for a string Y if and only if
   2013          * NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y)))
   2014          *
   2015          * For better performance, we check for FCD (or let the caller tell us that
   2016          * both strings are in FCD) for the inner normalization.
   2017          * BasicNormalizerTest::FindFoldFCDExceptions() makes sure that
   2018          * case-folding preserves the FCD-ness of a string.
   2019          * The outer normalization is then only performed by NormalizerImpl.cmpEquivFold()
   2020          * when there is a difference.
   2021          *
   2022          * Exception: When using the Turkic case-folding option, we do perform
   2023          * full NFD first. This is because in the Turkic case precomposed characters
   2024          * with 0049 capital I or 0069 small i fold differently whether they
   2025          * are first decomposed or not, so an FCD check - a check only for
   2026          * canonical order - is not sufficient.
   2027          */
   2028         if((options&INPUT_IS_FCD)==0 || (options&FOLD_CASE_EXCLUDE_SPECIAL_I)!=0) {
   2029             Normalizer2 n2;
   2030             if((options&FOLD_CASE_EXCLUDE_SPECIAL_I)!=0) {
   2031                 n2=NFD.getNormalizer2(normOptions);
   2032             } else {
   2033                 n2=FCD.getNormalizer2(normOptions);
   2034             }
   2035 
   2036             // check if s1 and/or s2 fulfill the FCD conditions
   2037             int spanQCYes1=n2.spanQuickCheckYes(s1);
   2038             int spanQCYes2=n2.spanQuickCheckYes(s2);
   2039 
   2040             /*
   2041              * ICU 2.4 had a further optimization:
   2042              * If both strings were not in FCD, then they were both NFD'ed,
   2043              * and the COMPARE_EQUIV option was turned off.
   2044              * It is not entirely clear that this is valid with the current
   2045              * definition of the canonical caseless match.
   2046              * Therefore, ICU 2.6 removes that optimization.
   2047              */
   2048 
   2049             if(spanQCYes1<s1.length()) {
   2050                 StringBuilder fcd1=new StringBuilder(s1.length()+16).append(s1, 0, spanQCYes1);
   2051                 s1=n2.normalizeSecondAndAppend(fcd1, s1.subSequence(spanQCYes1, s1.length()));
   2052             }
   2053             if(spanQCYes2<s2.length()) {
   2054                 StringBuilder fcd2=new StringBuilder(s2.length()+16).append(s2, 0, spanQCYes2);
   2055                 s2=n2.normalizeSecondAndAppend(fcd2, s2.subSequence(spanQCYes2, s2.length()));
   2056             }
   2057         }
   2058 
   2059         return cmpEquivFold(s1, s2, options);
   2060     }
   2061 
   2062     /*
   2063      * Compare two strings for canonical equivalence.
   2064      * Further options include case-insensitive comparison and
   2065      * code point order (as opposed to code unit order).
   2066      *
   2067      * In this function, canonical equivalence is optional as well.
   2068      * If canonical equivalence is tested, then both strings must fulfill
   2069      * the FCD check.
   2070      *
   2071      * Semantically, this is equivalent to
   2072      *   strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
   2073      * where code point order, NFD and foldCase are all optional.
   2074      *
   2075      * String comparisons almost always yield results before processing both strings
   2076      * completely.
   2077      * They are generally more efficient working incrementally instead of
   2078      * performing the sub-processing (strlen, normalization, case-folding)
   2079      * on the entire strings first.
   2080      *
   2081      * It is also unnecessary to not normalize identical characters.
   2082      *
   2083      * This function works in principle as follows:
   2084      *
   2085      * loop {
   2086      *   get one code unit c1 from s1 (-1 if end of source)
   2087      *   get one code unit c2 from s2 (-1 if end of source)
   2088      *
   2089      *   if(either string finished) {
   2090      *     return result;
   2091      *   }
   2092      *   if(c1==c2) {
   2093      *     continue;
   2094      *   }
   2095      *
   2096      *   // c1!=c2
   2097      *   try to decompose/case-fold c1/c2, and continue if one does;
   2098      *
   2099      *   // still c1!=c2 and neither decomposes/case-folds, return result
   2100      *   return c1-c2;
   2101      * }
   2102      *
   2103      * When a character decomposes, then the pointer for that source changes to
   2104      * the decomposition, pushing the previous pointer onto a stack.
   2105      * When the end of the decomposition is reached, then the code unit reader
   2106      * pops the previous source from the stack.
   2107      * (Same for case-folding.)
   2108      *
   2109      * This is complicated further by operating on variable-width UTF-16.
   2110      * The top part of the loop works on code units, while lookups for decomposition
   2111      * and case-folding need code points.
   2112      * Code points are assembled after the equality/end-of-source part.
   2113      * The source pointer is only advanced beyond all code units when the code point
   2114      * actually decomposes/case-folds.
   2115      *
   2116      * If we were on a trail surrogate unit when assembling a code point,
   2117      * and the code point decomposes/case-folds, then the decomposition/folding
   2118      * result must be compared with the part of the other string that corresponds to
   2119      * this string's lead surrogate.
   2120      * Since we only assemble a code point when hitting a trail unit when the
   2121      * preceding lead units were identical, we back up the other string by one unit
   2122      * in such a case.
   2123      *
   2124      * The optional code point order comparison at the end works with
   2125      * the same fix-up as the other code point order comparison functions.
   2126      * See ustring.c and the comment near the end of this function.
   2127      *
   2128      * Assumption: A decomposition or case-folding result string never contains
   2129      * a single surrogate. This is a safe assumption in the Unicode Standard.
   2130      * Therefore, we do not need to check for surrogate pairs across
   2131      * decomposition/case-folding boundaries.
   2132      *
   2133      * Further assumptions (see verifications tstnorm.cpp):
   2134      * The API function checks for FCD first, while the core function
   2135      * first case-folds and then decomposes. This requires that case-folding does not
   2136      * un-FCD any strings.
   2137      *
   2138      * The API function may also NFD the input and turn off decomposition.
   2139      * This requires that case-folding does not un-NFD strings either.
   2140      *
   2141      * TODO If any of the above two assumptions is violated,
   2142      * then this entire code must be re-thought.
   2143      * If this happens, then a simple solution is to case-fold both strings up front
   2144      * and to turn off UNORM_INPUT_IS_FCD.
   2145      * We already do this when not both strings are in FCD because makeFCD
   2146      * would be a partial NFD before the case folding, which does not work.
   2147      * Note that all of this is only a problem when case-folding _and_
   2148      * canonical equivalence come together.
   2149      * (Comments in unorm_compare() are more up to date than this TODO.)
   2150      */
   2151 
   2152     /* stack element for previous-level source/decomposition pointers */
   2153     private static final class CmpEquivLevel {
   2154         CharSequence cs;
   2155         int s;
   2156     };
   2157     private static final CmpEquivLevel[] createCmpEquivLevelStack() {
   2158         return new CmpEquivLevel[] {
   2159             new CmpEquivLevel(), new CmpEquivLevel()
   2160         };
   2161     }
   2162 
   2163     /**
   2164      * Internal option for unorm_cmpEquivFold() for decomposing.
   2165      * If not set, just do strcasecmp().
   2166      */
   2167     private static final int COMPARE_EQUIV=0x80000;
   2168 
   2169     /* internal function; package visibility for use by UTF16.StringComparator */
   2170     /*package*/ static int cmpEquivFold(CharSequence cs1, CharSequence cs2, int options) {
   2171         Normalizer2Impl nfcImpl;
   2172         UCaseProps csp;
   2173 
   2174         /* current-level start/limit - s1/s2 as current */
   2175         int s1, s2, limit1, limit2;
   2176 
   2177         /* decomposition and case folding variables */
   2178         int length;
   2179 
   2180         /* stacks of previous-level start/current/limit */
   2181         CmpEquivLevel[] stack1=null, stack2=null;
   2182 
   2183         /* buffers for algorithmic decompositions */
   2184         String decomp1, decomp2;
   2185 
   2186         /* case folding buffers, only use current-level start/limit */
   2187         StringBuilder fold1, fold2;
   2188 
   2189         /* track which is the current level per string */
   2190         int level1, level2;
   2191 
   2192         /* current code units, and code points for lookups */
   2193         int c1, c2, cp1, cp2;
   2194 
   2195         /* no argument error checking because this itself is not an API */
   2196 
   2197         /*
   2198          * assume that at least one of the options _COMPARE_EQUIV and U_COMPARE_IGNORE_CASE is set
   2199          * otherwise this function must behave exactly as uprv_strCompare()
   2200          * not checking for that here makes testing this function easier
   2201          */
   2202 
   2203         /* normalization/properties data loaded? */
   2204         if((options&COMPARE_EQUIV)!=0) {
   2205             nfcImpl=Norm2AllModes.getNFCInstance().impl;
   2206         } else {
   2207             nfcImpl=null;
   2208         }
   2209         if((options&COMPARE_IGNORE_CASE)!=0) {
   2210             csp=UCaseProps.INSTANCE;
   2211             fold1=new StringBuilder();
   2212             fold2=new StringBuilder();
   2213         } else {
   2214             csp=null;
   2215             fold1=fold2=null;
   2216         }
   2217 
   2218         /* initialize */
   2219         s1=0;
   2220         limit1=cs1.length();
   2221         s2=0;
   2222         limit2=cs2.length();
   2223 
   2224         level1=level2=0;
   2225         c1=c2=-1;
   2226 
   2227         /* comparison loop */
   2228         for(;;) {
   2229             /*
   2230              * here a code unit value of -1 means "get another code unit"
   2231              * below it will mean "this source is finished"
   2232              */
   2233 
   2234             if(c1<0) {
   2235                 /* get next code unit from string 1, post-increment */
   2236                 for(;;) {
   2237                     if(s1==limit1) {
   2238                         if(level1==0) {
   2239                             c1=-1;
   2240                             break;
   2241                         }
   2242                     } else {
   2243                         c1=cs1.charAt(s1++);
   2244                         break;
   2245                     }
   2246 
   2247                     /* reached end of level buffer, pop one level */
   2248                     do {
   2249                         --level1;
   2250                         cs1=stack1[level1].cs;
   2251                     } while(cs1==null);
   2252                     s1=stack1[level1].s;
   2253                     limit1=cs1.length();
   2254                 }
   2255             }
   2256 
   2257             if(c2<0) {
   2258                 /* get next code unit from string 2, post-increment */
   2259                 for(;;) {
   2260                     if(s2==limit2) {
   2261                         if(level2==0) {
   2262                             c2=-1;
   2263                             break;
   2264                         }
   2265                     } else {
   2266                         c2=cs2.charAt(s2++);
   2267                         break;
   2268                     }
   2269 
   2270                     /* reached end of level buffer, pop one level */
   2271                     do {
   2272                         --level2;
   2273                         cs2=stack2[level2].cs;
   2274                     } while(cs2==null);
   2275                     s2=stack2[level2].s;
   2276                     limit2=cs2.length();
   2277                 }
   2278             }
   2279 
   2280             /*
   2281              * compare c1 and c2
   2282              * either variable c1, c2 is -1 only if the corresponding string is finished
   2283              */
   2284             if(c1==c2) {
   2285                 if(c1<0) {
   2286                     return 0;   /* c1==c2==-1 indicating end of strings */
   2287                 }
   2288                 c1=c2=-1;       /* make us fetch new code units */
   2289                 continue;
   2290             } else if(c1<0) {
   2291                 return -1;      /* string 1 ends before string 2 */
   2292             } else if(c2<0) {
   2293                 return 1;       /* string 2 ends before string 1 */
   2294             }
   2295             /* c1!=c2 && c1>=0 && c2>=0 */
   2296 
   2297             /* get complete code points for c1, c2 for lookups if either is a surrogate */
   2298             cp1=c1;
   2299             if(UTF16.isSurrogate((char)c1)) {
   2300                 char c;
   2301 
   2302                 if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
   2303                     if(s1!=limit1 && Character.isLowSurrogate(c=cs1.charAt(s1))) {
   2304                         /* advance ++s1; only below if cp1 decomposes/case-folds */
   2305                         cp1=Character.toCodePoint((char)c1, c);
   2306                     }
   2307                 } else /* isTrail(c1) */ {
   2308                     if(0<=(s1-2) && Character.isHighSurrogate(c=cs1.charAt(s1-2))) {
   2309                         cp1=Character.toCodePoint(c, (char)c1);
   2310                     }
   2311                 }
   2312             }
   2313 
   2314             cp2=c2;
   2315             if(UTF16.isSurrogate((char)c2)) {
   2316                 char c;
   2317 
   2318                 if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
   2319                     if(s2!=limit2 && Character.isLowSurrogate(c=cs2.charAt(s2))) {
   2320                         /* advance ++s2; only below if cp2 decomposes/case-folds */
   2321                         cp2=Character.toCodePoint((char)c2, c);
   2322                     }
   2323                 } else /* isTrail(c2) */ {
   2324                     if(0<=(s2-2) && Character.isHighSurrogate(c=cs2.charAt(s2-2))) {
   2325                         cp2=Character.toCodePoint(c, (char)c2);
   2326                     }
   2327                 }
   2328             }
   2329 
   2330             /*
   2331              * go down one level for each string
   2332              * continue with the main loop as soon as there is a real change
   2333              */
   2334 
   2335             if( level1==0 && (options&COMPARE_IGNORE_CASE)!=0 &&
   2336                 (length=csp.toFullFolding(cp1, fold1, options))>=0
   2337             ) {
   2338                 /* cp1 case-folds to the code point "length" or to p[length] */
   2339                 if(UTF16.isSurrogate((char)c1)) {
   2340                     if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
   2341                         /* advance beyond source surrogate pair if it case-folds */
   2342                         ++s1;
   2343                     } else /* isTrail(c1) */ {
   2344                         /*
   2345                          * we got a supplementary code point when hitting its trail surrogate,
   2346                          * therefore the lead surrogate must have been the same as in the other string;
   2347                          * compare this decomposition with the lead surrogate in the other string
   2348                          * remember that this simulates bulk text replacement:
   2349                          * the decomposition would replace the entire code point
   2350                          */
   2351                         --s2;
   2352                         c2=cs2.charAt(s2-1);
   2353                     }
   2354                 }
   2355 
   2356                 /* push current level pointers */
   2357                 if(stack1==null) {
   2358                     stack1=createCmpEquivLevelStack();
   2359                 }
   2360                 stack1[0].cs=cs1;
   2361                 stack1[0].s=s1;
   2362                 ++level1;
   2363 
   2364                 /* copy the folding result to fold1[] */
   2365                 /* Java: the buffer was probably not empty, remove the old contents */
   2366                 if(length<=UCaseProps.MAX_STRING_LENGTH) {
   2367                     fold1.delete(0, fold1.length()-length);
   2368                 } else {
   2369                     fold1.setLength(0);
   2370                     fold1.appendCodePoint(length);
   2371                 }
   2372 
   2373                 /* set next level pointers to case folding */
   2374                 cs1=fold1;
   2375                 s1=0;
   2376                 limit1=fold1.length();
   2377 
   2378                 /* get ready to read from decomposition, continue with loop */
   2379                 c1=-1;
   2380                 continue;
   2381             }
   2382 
   2383             if( level2==0 && (options&COMPARE_IGNORE_CASE)!=0 &&
   2384                 (length=csp.toFullFolding(cp2, fold2, options))>=0
   2385             ) {
   2386                 /* cp2 case-folds to the code point "length" or to p[length] */
   2387                 if(UTF16.isSurrogate((char)c2)) {
   2388                     if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
   2389                         /* advance beyond source surrogate pair if it case-folds */
   2390                         ++s2;
   2391                     } else /* isTrail(c2) */ {
   2392                         /*
   2393                          * we got a supplementary code point when hitting its trail surrogate,
   2394                          * therefore the lead surrogate must have been the same as in the other string;
   2395                          * compare this decomposition with the lead surrogate in the other string
   2396                          * remember that this simulates bulk text replacement:
   2397                          * the decomposition would replace the entire code point
   2398                          */
   2399                         --s1;
   2400                         c1=cs1.charAt(s1-1);
   2401                     }
   2402                 }
   2403 
   2404                 /* push current level pointers */
   2405                 if(stack2==null) {
   2406                     stack2=createCmpEquivLevelStack();
   2407                 }
   2408                 stack2[0].cs=cs2;
   2409                 stack2[0].s=s2;
   2410                 ++level2;
   2411 
   2412                 /* copy the folding result to fold2[] */
   2413                 /* Java: the buffer was probably not empty, remove the old contents */
   2414                 if(length<=UCaseProps.MAX_STRING_LENGTH) {
   2415                     fold2.delete(0, fold2.length()-length);
   2416                 } else {
   2417                     fold2.setLength(0);
   2418                     fold2.appendCodePoint(length);
   2419                 }
   2420 
   2421                 /* set next level pointers to case folding */
   2422                 cs2=fold2;
   2423                 s2=0;
   2424                 limit2=fold2.length();
   2425 
   2426                 /* get ready to read from decomposition, continue with loop */
   2427                 c2=-1;
   2428                 continue;
   2429             }
   2430 
   2431             if( level1<2 && (options&COMPARE_EQUIV)!=0 &&
   2432                 (decomp1=nfcImpl.getDecomposition(cp1))!=null
   2433             ) {
   2434                 /* cp1 decomposes into p[length] */
   2435                 if(UTF16.isSurrogate((char)c1)) {
   2436                     if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c1)) {
   2437                         /* advance beyond source surrogate pair if it decomposes */
   2438                         ++s1;
   2439                     } else /* isTrail(c1) */ {
   2440                         /*
   2441                          * we got a supplementary code point when hitting its trail surrogate,
   2442                          * therefore the lead surrogate must have been the same as in the other string;
   2443                          * compare this decomposition with the lead surrogate in the other string
   2444                          * remember that this simulates bulk text replacement:
   2445                          * the decomposition would replace the entire code point
   2446                          */
   2447                         --s2;
   2448                         c2=cs2.charAt(s2-1);
   2449                     }
   2450                 }
   2451 
   2452                 /* push current level pointers */
   2453                 if(stack1==null) {
   2454                     stack1=createCmpEquivLevelStack();
   2455                 }
   2456                 stack1[level1].cs=cs1;
   2457                 stack1[level1].s=s1;
   2458                 ++level1;
   2459 
   2460                 /* set empty intermediate level if skipped */
   2461                 if(level1<2) {
   2462                     stack1[level1++].cs=null;
   2463                 }
   2464 
   2465                 /* set next level pointers to decomposition */
   2466                 cs1=decomp1;
   2467                 s1=0;
   2468                 limit1=decomp1.length();
   2469 
   2470                 /* get ready to read from decomposition, continue with loop */
   2471                 c1=-1;
   2472                 continue;
   2473             }
   2474 
   2475             if( level2<2 && (options&COMPARE_EQUIV)!=0 &&
   2476                 (decomp2=nfcImpl.getDecomposition(cp2))!=null
   2477             ) {
   2478                 /* cp2 decomposes into p[length] */
   2479                 if(UTF16.isSurrogate((char)c2)) {
   2480                     if(Normalizer2Impl.UTF16Plus.isSurrogateLead(c2)) {
   2481                         /* advance beyond source surrogate pair if it decomposes */
   2482                         ++s2;
   2483                     } else /* isTrail(c2) */ {
   2484                         /*
   2485                          * we got a supplementary code point when hitting its trail surrogate,
   2486                          * therefore the lead surrogate must have been the same as in the other string;
   2487                          * compare this decomposition with the lead surrogate in the other string
   2488                          * remember that this simulates bulk text replacement:
   2489                          * the decomposition would replace the entire code point
   2490                          */
   2491                         --s1;
   2492                         c1=cs1.charAt(s1-1);
   2493                     }
   2494                 }
   2495 
   2496                 /* push current level pointers */
   2497                 if(stack2==null) {
   2498                     stack2=createCmpEquivLevelStack();
   2499                 }
   2500                 stack2[level2].cs=cs2;
   2501                 stack2[level2].s=s2;
   2502                 ++level2;
   2503 
   2504                 /* set empty intermediate level if skipped */
   2505                 if(level2<2) {
   2506                     stack2[level2++].cs=null;
   2507                 }
   2508 
   2509                 /* set next level pointers to decomposition */
   2510                 cs2=decomp2;
   2511                 s2=0;
   2512                 limit2=decomp2.length();
   2513 
   2514                 /* get ready to read from decomposition, continue with loop */
   2515                 c2=-1;
   2516                 continue;
   2517             }
   2518 
   2519             /*
   2520              * no decomposition/case folding, max level for both sides:
   2521              * return difference result
   2522              *
   2523              * code point order comparison must not just return cp1-cp2
   2524              * because when single surrogates are present then the surrogate pairs
   2525              * that formed cp1 and cp2 may be from different string indexes
   2526              *
   2527              * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
   2528              * c1=d800 cp1=10001 c2=dc00 cp2=10000
   2529              * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
   2530              *
   2531              * therefore, use same fix-up as in ustring.c/uprv_strCompare()
   2532              * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
   2533              * so we have slightly different pointer/start/limit comparisons here
   2534              */
   2535 
   2536             if(c1>=0xd800 && c2>=0xd800 && (options&COMPARE_CODE_POINT_ORDER)!=0) {
   2537                 /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
   2538                 if(
   2539                     (c1<=0xdbff && s1!=limit1 && Character.isLowSurrogate(cs1.charAt(s1))) ||
   2540                     (Character.isLowSurrogate((char)c1) && 0!=(s1-1) && Character.isHighSurrogate(cs1.charAt(s1-2)))
   2541                 ) {
   2542                     /* part of a surrogate pair, leave >=d800 */
   2543                 } else {
   2544                     /* BMP code point - may be surrogate code point - make <d800 */
   2545                     c1-=0x2800;
   2546                 }
   2547 
   2548                 if(
   2549                     (c2<=0xdbff && s2!=limit2 && Character.isLowSurrogate(cs2.charAt(s2))) ||
   2550                     (Character.isLowSurrogate((char)c2) && 0!=(s2-1) && Character.isHighSurrogate(cs2.charAt(s2-2)))
   2551                 ) {
   2552                     /* part of a surrogate pair, leave >=d800 */
   2553                 } else {
   2554                     /* BMP code point - may be surrogate code point - make <d800 */
   2555                     c2-=0x2800;
   2556                 }
   2557             }
   2558 
   2559             return c1-c2;
   2560         }
   2561     }
   2562 
   2563     /**
   2564      * An Appendable that writes into a char array with a capacity that may be
   2565      * less than array.length.
   2566      * (By contrast, CharBuffer will write beyond destLimit all the way up to array.length.)
   2567      * <p>
   2568      * An overflow is only reported at the end, for the old Normalizer API functions that write
   2569      * to char arrays.
   2570      */
   2571     private static final class CharsAppendable implements Appendable {
   2572         public CharsAppendable(char[] dest, int destStart, int destLimit) {
   2573             chars=dest;
   2574             start=offset=destStart;
   2575             limit=destLimit;
   2576         }
   2577         public int length() {
   2578             int len=offset-start;
   2579             if(offset<=limit) {
   2580                 return len;
   2581             } else {
   2582                 throw new IndexOutOfBoundsException(Integer.toString(len));
   2583             }
   2584         }
   2585         @Override
   2586         public Appendable append(char c) {
   2587             if(offset<limit) {
   2588                 chars[offset]=c;
   2589             }
   2590             ++offset;
   2591             return this;
   2592         }
   2593         @Override
   2594         public Appendable append(CharSequence s) {
   2595             return append(s, 0, s.length());
   2596         }
   2597         @Override
   2598         public Appendable append(CharSequence s, int sStart, int sLimit) {
   2599             int len=sLimit-sStart;
   2600             if(len<=(limit-offset)) {
   2601                 while(sStart<sLimit) {  // TODO: Is there a better way to copy the characters?
   2602                     chars[offset++]=s.charAt(sStart++);
   2603                 }
   2604             } else {
   2605                 offset+=len;
   2606             }
   2607             return this;
   2608         }
   2609 
   2610         private final char[] chars;
   2611         private final int start, limit;
   2612         private int offset;
   2613     }
   2614 }
   2615