Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /**
      5 *******************************************************************************
      6 * Copyright (C) 1996-2016, International Business Machines Corporation and
      7 * others. All Rights Reserved.
      8 *******************************************************************************
      9 */
     10 package android.icu.text;
     11 
     12 import java.text.CharacterIterator;
     13 import java.util.HashMap;
     14 import java.util.Map;
     15 
     16 import android.icu.impl.CharacterIteratorWrapper;
     17 import android.icu.impl.coll.Collation;
     18 import android.icu.impl.coll.CollationData;
     19 import android.icu.impl.coll.CollationIterator;
     20 import android.icu.impl.coll.ContractionsAndExpansions;
     21 import android.icu.impl.coll.FCDIterCollationIterator;
     22 import android.icu.impl.coll.FCDUTF16CollationIterator;
     23 import android.icu.impl.coll.IterCollationIterator;
     24 import android.icu.impl.coll.UTF16CollationIterator;
     25 import android.icu.impl.coll.UVector32;
     26 
     27 /**
     28  * <code>CollationElementIterator</code> is an iterator created by
     29  * a RuleBasedCollator to walk through a string. The return result of
     30  * each iteration is a 32-bit collation element (CE) that defines the
     31  * ordering priority of the next character or sequence of characters
     32  * in the source string.
     33  *
     34  * <p>For illustration, consider the following in Slovak and in traditional Spanish collation:
     35  * <blockquote>
     36  * <pre>
     37  * "ca" -&gt; the first collation element is CE('c') and the second
     38  *         collation element is CE('a').
     39  * "cha" -&gt; the first collation element is CE('ch') and the second
     40  *          collation element is CE('a').
     41  * </pre>
     42  * </blockquote>
     43  * And in German phonebook collation,
     44  * <blockquote>
     45  * <pre>
     46  * Since the character '&#230;' is a composed character of 'a' and 'e', the
     47  * iterator returns two collation elements for the single character '&#230;'
     48  *
     49  * "&#230;b" -&gt; the first collation element is collation_element('a'), the
     50  *              second collation element is collation_element('e'), and the
     51  *              third collation element is collation_element('b').
     52  * </pre>
     53  * </blockquote>
     54  *
     55  * <p>For collation ordering comparison, the collation element results
     56  * can not be compared simply by using basic arithmetic operators,
     57  * e.g. &lt;, == or &gt;, further processing has to be done. Details
     58  * can be found in the ICU
     59  * <a href="http://userguide.icu-project.org/collation/architecture">
     60  * User Guide</a>. An example of using the CollationElementIterator
     61  * for collation ordering comparison is the class
     62  * {@link android.icu.text.StringSearch}.
     63  *
     64  * <p>To construct a CollationElementIterator object, users
     65  * call the method getCollationElementIterator() on a
     66  * RuleBasedCollator that defines the desired sorting order.
     67  *
     68  * <p> Example:
     69  * <blockquote>
     70  * <pre>
     71  *  String testString = "This is a test";
     72  *  RuleBasedCollator rbc = new RuleBasedCollator("&amp;a&lt;b");
     73  *  CollationElementIterator iterator = rbc.getCollationElementIterator(testString);
     74  *  int primaryOrder = iterator.IGNORABLE;
     75  *  while (primaryOrder != iterator.NULLORDER) {
     76  *      int order = iterator.next();
     77  *      if (order != iterator.IGNORABLE &amp;&amp;
     78  *          order != iterator.NULLORDER) {
     79  *          // order is valid, not ignorable and we have not passed the end
     80  *          // of the iteration, we do something
     81  *          primaryOrder = CollationElementIterator.primaryOrder(order);
     82  *          System.out.println("Next primary order 0x" +
     83  *                             Integer.toHexString(primaryOrder));
     84  *      }
     85  *  }
     86  * </pre>
     87  * </blockquote>
     88  * <p>
     89  * The method next() returns the collation order of the next character based on
     90  * the comparison level of the collator. The method previous() returns the
     91  * collation order of the previous character based on the comparison level of
     92  * the collator. The Collation Element Iterator moves only in one direction
     93  * between calls to reset(), setOffset(), or setText(). That is, next() and
     94  * previous() can not be inter-used. Whenever previous() is to be called after
     95  * next() or vice versa, reset(), setOffset() or setText() has to be called first
     96  * to reset the status, shifting current position to either the end or the start of
     97  * the string (reset() or setText()), or the specified position (setOffset()).
     98  * Hence at the next call of next() or previous(), the first or last collation order,
     99  * or collation order at the specified position will be returned. If a change of
    100  * direction is done without one of these calls, the result is undefined.
    101  * <p>
    102  * This class is not subclassable.
    103  * @see Collator
    104  * @see RuleBasedCollator
    105  * @see StringSearch
    106  * @author Syn Wee Quek
    107  */
    108 public final class CollationElementIterator
    109 {
    110     private CollationIterator iter_;  // owned
    111     private RuleBasedCollator rbc_;  // aliased
    112     private int otherHalf_;
    113     /**
    114      * &lt;0: backwards; 0: just after reset() (previous() begins from end);
    115      * 1: just after setOffset(); >1: forward
    116      */
    117     private byte dir_;
    118     /**
    119      * Stores offsets from expansions and from unsafe-backwards iteration,
    120      * so that getOffset() returns intermediate offsets for the CEs
    121      * that are consistent with forward iteration.
    122      */
    123     private UVector32 offsets_;
    124 
    125     private String string_;  // TODO: needed in Java? if so, then add a UCharacterIterator field too?
    126 
    127 
    128     /**
    129      * This constant is returned by the iterator in the methods
    130      * next() and previous() when the end or the beginning of the
    131      * source string has been reached, and there are no more valid
    132      * collation elements to return.
    133      *
    134      * <p>See class documentation for an example of use.
    135      * @see #next
    136      * @see #previous */
    137     public final static int NULLORDER = 0xffffffff;
    138 
    139     /**
    140      * This constant is returned by the iterator in the methods
    141      * next() and previous() when a collation element result is to be
    142      * ignored.
    143      *
    144      * <p>See class documentation for an example of use.
    145      * @see #next
    146      * @see #previous */
    147     public static final int IGNORABLE = 0;
    148 
    149     /**
    150      * Return the primary order of the specified collation element,
    151      * i.e. the first 16 bits.  This value is unsigned.
    152      * @param ce the collation element
    153      * @return the element's 16 bits primary order.
    154      */
    155     public final static int primaryOrder(int ce) {
    156         return (ce >>> 16) & 0xffff;
    157     }
    158 
    159     /**
    160      * Return the secondary order of the specified collation element,
    161      * i.e. the 16th to 23th bits, inclusive.  This value is unsigned.
    162      * @param ce the collation element
    163      * @return the element's 8 bits secondary order
    164      */
    165     public final static int secondaryOrder(int ce) {
    166         return (ce >>> 8) & 0xff;
    167     }
    168 
    169     /**
    170      * Return the tertiary order of the specified collation element, i.e. the last
    171      * 8 bits.  This value is unsigned.
    172      * @param ce the collation element
    173      * @return the element's 8 bits tertiary order
    174      */
    175     public final static int tertiaryOrder(int ce) {
    176         return ce & 0xff;
    177     }
    178 
    179 
    180     private static final int getFirstHalf(long p, int lower32) {
    181         return ((int)p & 0xffff0000) | ((lower32 >> 16) & 0xff00) | ((lower32 >> 8) & 0xff);
    182     }
    183 
    184     private static final int getSecondHalf(long p, int lower32) {
    185         return ((int)p << 16) | ((lower32 >> 8) & 0xff00) | (lower32 & 0x3f);
    186     }
    187 
    188     private static final boolean ceNeedsTwoParts(long ce) {
    189         return (ce & 0xffff00ff003fL) != 0;
    190     }
    191 
    192     private CollationElementIterator(RuleBasedCollator collator) {
    193         iter_ = null;
    194         rbc_ = collator;
    195         otherHalf_ = 0;
    196         dir_ = 0;
    197         offsets_ = null;
    198     }
    199 
    200     /**
    201      * CollationElementIterator constructor. This takes a source
    202      * string and a RuleBasedCollator. The iterator will walk through
    203      * the source string based on the rules defined by the
    204      * collator. If the source string is empty, NULLORDER will be
    205      * returned on the first call to next().
    206      *
    207      * @param source the source string.
    208      * @param collator the RuleBasedCollator
    209      */
    210     CollationElementIterator(String source, RuleBasedCollator collator) {
    211         this(collator);
    212         setText(source);
    213     }
    214     // Note: The constructors should take settings & tailoring, not a collator,
    215     // to avoid circular dependencies.
    216     // However, for equals() we would need to be able to compare tailoring data for equality
    217     // without making CollationData or CollationTailoring depend on TailoredSet.
    218     // (See the implementation of RuleBasedCollator.equals().)
    219     // That might require creating an intermediate class that would be used
    220     // by both CollationElementIterator and RuleBasedCollator
    221     // but only contain the part of RBC.equals() related to data and rules.
    222 
    223     /**
    224      * CollationElementIterator constructor. This takes a source
    225      * character iterator and a RuleBasedCollator. The iterator will
    226      * walk through the source string based on the rules defined by
    227      * the collator. If the source string is empty, NULLORDER will be
    228      * returned on the first call to next().
    229      *
    230      * @param source the source string iterator.
    231      * @param collator the RuleBasedCollator
    232      */
    233     CollationElementIterator(CharacterIterator source, RuleBasedCollator collator) {
    234         this(collator);
    235         setText(source);
    236     }
    237 
    238     /**
    239      * CollationElementIterator constructor. This takes a source
    240      * character iterator and a RuleBasedCollator. The iterator will
    241      * walk through the source string based on the rules defined by
    242      * the collator. If the source string is empty, NULLORDER will be
    243      * returned on the first call to next().
    244      *
    245      * @param source the source string iterator.
    246      * @param collator the RuleBasedCollator
    247      */
    248     CollationElementIterator(UCharacterIterator source, RuleBasedCollator collator) {
    249         this(collator);
    250         setText(source);
    251     }
    252 
    253     /**
    254      * Returns the character offset in the source string
    255      * corresponding to the next collation element. I.e., getOffset()
    256      * returns the position in the source string corresponding to the
    257      * collation element that will be returned by the next call to
    258      * next() or previous(). This value could be any of:
    259      * <ul>
    260      * <li> The index of the <b>first</b> character corresponding to
    261      * the next collation element. (This means that if
    262      * <code>setOffset(offset)</code> sets the index in the middle of
    263      * a contraction, <code>getOffset()</code> returns the index of
    264      * the first character in the contraction, which may not be equal
    265      * to the original offset that was set. Hence calling getOffset()
    266      * immediately after setOffset(offset) does not guarantee that the
    267      * original offset set will be returned.)
    268      * <li> If normalization is on, the index of the <b>immediate</b>
    269      * subsequent character, or composite character with the first
    270      * character, having a combining class of 0.
    271      * <li> The length of the source string, if iteration has reached
    272      * the end.
    273      *</ul>
    274      *
    275      * @return The character offset in the source string corresponding to the
    276      *         collation element that will be returned by the next call to
    277      *         next() or previous().
    278      */
    279     public int getOffset() {
    280         if (dir_ < 0 && offsets_ != null && !offsets_.isEmpty()) {
    281             // CollationIterator.previousCE() decrements the CEs length
    282             // while it pops CEs from its internal buffer.
    283             int i = iter_.getCEsLength();
    284             if (otherHalf_ != 0) {
    285                 // Return the trailing CE offset while we are in the middle of a 64-bit CE.
    286                 ++i;
    287             }
    288             assert (i < offsets_.size());
    289             return offsets_.elementAti(i);
    290         }
    291         return iter_.getOffset();
    292     }
    293 
    294     /**
    295      * Get the next collation element in the source string.
    296      *
    297      * <p>This iterator iterates over a sequence of collation elements
    298      * that were built from the string. Because there isn't
    299      * necessarily a one-to-one mapping from characters to collation
    300      * elements, this doesn't mean the same thing as "return the
    301      * collation element [or ordering priority] of the next character
    302      * in the string".
    303      *
    304      * <p>This function returns the collation element that the
    305      * iterator is currently pointing to, and then updates the
    306      * internal pointer to point to the next element.
    307      *
    308      * @return the next collation element or NULLORDER if the end of the
    309      *         iteration has been reached.
    310      */
    311     public int next() {
    312         if (dir_ > 1) {
    313             // Continue forward iteration. Test this first.
    314             if (otherHalf_ != 0) {
    315                 int oh = otherHalf_;
    316                 otherHalf_ = 0;
    317                 return oh;
    318             }
    319         } else if (dir_ == 1) {
    320             // next() after setOffset()
    321             dir_ = 2;
    322         } else if (dir_ == 0) {
    323             // The iter_ is already reset to the start of the text.
    324             dir_ = 2;
    325         } else /* dir_ < 0 */{
    326             // illegal change of direction
    327             throw new IllegalStateException("Illegal change of direction");
    328             // Java porting note: ICU4C sets U_INVALID_STATE_ERROR to the return status.
    329         }
    330         // No need to keep all CEs in the buffer when we iterate.
    331         iter_.clearCEsIfNoneRemaining();
    332         long ce = iter_.nextCE();
    333         if (ce == Collation.NO_CE) {
    334             return NULLORDER;
    335         }
    336         // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
    337         long p = ce >>> 32;
    338         int lower32 = (int) ce;
    339         int firstHalf = getFirstHalf(p, lower32);
    340         int secondHalf = getSecondHalf(p, lower32);
    341         if (secondHalf != 0) {
    342             otherHalf_ = secondHalf | 0xc0; // continuation CE
    343         }
    344         return firstHalf;
    345     }
    346 
    347     /**
    348      * Get the previous collation element in the source string.
    349      *
    350      * <p>This iterator iterates over a sequence of collation elements
    351      * that were built from the string. Because there isn't
    352      * necessarily a one-to-one mapping from characters to collation
    353      * elements, this doesn't mean the same thing as "return the
    354      * collation element [or ordering priority] of the previous
    355      * character in the string".
    356      *
    357      * <p>This function updates the iterator's internal pointer to
    358      * point to the collation element preceding the one it's currently
    359      * pointing to and then returns that element, while next() returns
    360      * the current element and then updates the pointer.
    361      *
    362      * @return the previous collation element, or NULLORDER when the start of
    363      *             the iteration has been reached.
    364      */
    365     public int previous() {
    366         if (dir_ < 0) {
    367             // Continue backwards iteration. Test this first.
    368             if (otherHalf_ != 0) {
    369                 int oh = otherHalf_;
    370                 otherHalf_ = 0;
    371                 return oh;
    372             }
    373         } else if (dir_ == 0) {
    374             iter_.resetToOffset(string_.length());
    375             dir_ = -1;
    376         } else if (dir_ == 1) {
    377             // previous() after setOffset()
    378             dir_ = -1;
    379         } else /* dir_ > 1 */{
    380             // illegal change of direction
    381             throw new IllegalStateException("Illegal change of direction");
    382             // Java porting note: ICU4C sets U_INVALID_STATE_ERROR to the return status.
    383         }
    384         if (offsets_ == null) {
    385             offsets_ = new UVector32();
    386         }
    387         // If we already have expansion CEs, then we also have offsets.
    388         // Otherwise remember the trailing offset in case we need to
    389         // write offsets for an artificial expansion.
    390         int limitOffset = iter_.getCEsLength() == 0 ? iter_.getOffset() : 0;
    391         long ce = iter_.previousCE(offsets_);
    392         if (ce == Collation.NO_CE) {
    393             return NULLORDER;
    394         }
    395         // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
    396         long p = ce >>> 32;
    397         int lower32 = (int) ce;
    398         int firstHalf = getFirstHalf(p, lower32);
    399         int secondHalf = getSecondHalf(p, lower32);
    400         if (secondHalf != 0) {
    401             if (offsets_.isEmpty()) {
    402                 // When we convert a single 64-bit CE into two 32-bit CEs,
    403                 // we need to make this artificial expansion behave like a normal expansion.
    404                 // See CollationIterator.previousCE().
    405                 offsets_.addElement(iter_.getOffset());
    406                 offsets_.addElement(limitOffset);
    407             }
    408             otherHalf_ = firstHalf;
    409             return secondHalf | 0xc0; // continuation CE
    410         }
    411         return firstHalf;
    412     }
    413 
    414     /**
    415      * Resets the cursor to the beginning of the string. The next
    416      * call to next() or previous() will return the first and last
    417      * collation element in the string, respectively.
    418      *
    419      * <p>If the RuleBasedCollator used by this iterator has had its
    420      * attributes changed, calling reset() will reinitialize the
    421      * iterator to use the new attributes.
    422      */
    423     public void reset() {
    424         iter_ .resetToOffset(0);
    425         otherHalf_ = 0;
    426         dir_ = 0;
    427     }
    428 
    429     /**
    430      * Sets the iterator to point to the collation element
    431      * corresponding to the character at the specified offset. The
    432      * value returned by the next call to next() will be the collation
    433      * element corresponding to the characters at offset.
    434      *
    435      * <p>If offset is in the middle of a contracting character
    436      * sequence, the iterator is adjusted to the start of the
    437      * contracting sequence. This means that getOffset() is not
    438      * guaranteed to return the same value set by this method.
    439      *
    440      * <p>If the decomposition mode is on, and offset is in the middle
    441      * of a decomposible range of source text, the iterator may not
    442      * return a correct result for the next forwards or backwards
    443      * iteration.  The user must ensure that the offset is not in the
    444      * middle of a decomposible range.
    445      *
    446      * @param newOffset the character offset into the original source string to
    447      *        set. Note that this is not an offset into the corresponding
    448      *        sequence of collation elements.
    449      */
    450     public void setOffset(int newOffset) {
    451         if (0 < newOffset && newOffset < string_.length()) {
    452             int offset = newOffset;
    453             do {
    454                 char c = string_.charAt(offset);
    455                 if (!rbc_.isUnsafe(c) ||
    456                         (Character.isHighSurrogate(c) && !rbc_.isUnsafe(string_.codePointAt(offset)))) {
    457                     break;
    458                 }
    459                 // Back up to before this unsafe character.
    460                 --offset;
    461             } while (offset > 0);
    462             if (offset < newOffset) {
    463                 // We might have backed up more than necessary.
    464                 // For example, contractions "ch" and "cu" make both 'h' and 'u' unsafe,
    465                 // but for text "chu" setOffset(2) should remain at 2
    466                 // although we initially back up to offset 0.
    467                 // Find the last safe offset no greater than newOffset by iterating forward.
    468                 int lastSafeOffset = offset;
    469                 do {
    470                     iter_.resetToOffset(lastSafeOffset);
    471                     do {
    472                         iter_.nextCE();
    473                     } while ((offset = iter_.getOffset()) == lastSafeOffset);
    474                     if (offset <= newOffset) {
    475                         lastSafeOffset = offset;
    476                     }
    477                 } while (offset < newOffset);
    478                 newOffset = lastSafeOffset;
    479             }
    480         }
    481         iter_.resetToOffset(newOffset);
    482         otherHalf_ = 0;
    483         dir_ = 1;
    484     }
    485 
    486     /**
    487      * Set a new source string for iteration, and reset the offset
    488      * to the beginning of the text.
    489      *
    490      * @param source the new source string for iteration.
    491      */
    492     public void setText(String source) {
    493         string_ = source; // TODO: do we need to remember the source string in a field?
    494         CollationIterator newIter;
    495         boolean numeric = rbc_.settings.readOnly().isNumeric();
    496         if (rbc_.settings.readOnly().dontCheckFCD()) {
    497             newIter = new UTF16CollationIterator(rbc_.data, numeric, string_, 0);
    498         } else {
    499             newIter = new FCDUTF16CollationIterator(rbc_.data, numeric, string_, 0);
    500         }
    501         iter_ = newIter;
    502         otherHalf_ = 0;
    503         dir_ = 0;
    504     }
    505 
    506     /**
    507      * Set a new source string iterator for iteration, and reset the
    508      * offset to the beginning of the text.
    509      *
    510      * <p>The source iterator's integrity will be preserved since a new copy
    511      * will be created for use.
    512      * @param source the new source string iterator for iteration.
    513      */
    514     public void setText(UCharacterIterator source) {
    515         string_ = source.getText(); // TODO: do we need to remember the source string in a field?
    516         // Note: In C++, we just setText(source.getText()).
    517         // In Java, we actually operate on a character iterator.
    518         // (The old code apparently did so only for a CharacterIterator;
    519         // for a UCharacterIterator it also just used source.getText()).
    520         // TODO: do we need to remember the cloned iterator in a field?
    521         UCharacterIterator src;
    522         try {
    523             src = (UCharacterIterator) source.clone();
    524         } catch (CloneNotSupportedException e) {
    525             // Fall back to ICU 52 behavior of iterating over the text contents
    526             // of the UCharacterIterator.
    527             setText(source.getText());
    528             return;
    529         }
    530         src.setToStart();
    531         CollationIterator newIter;
    532         boolean numeric = rbc_.settings.readOnly().isNumeric();
    533         if (rbc_.settings.readOnly().dontCheckFCD()) {
    534             newIter = new IterCollationIterator(rbc_.data, numeric, src);
    535         } else {
    536             newIter = new FCDIterCollationIterator(rbc_.data, numeric, src, 0);
    537         }
    538         iter_ = newIter;
    539         otherHalf_ = 0;
    540         dir_ = 0;
    541     }
    542 
    543     /**
    544      * Set a new source string iterator for iteration, and reset the
    545      * offset to the beginning of the text.
    546      *
    547      * @param source the new source string iterator for iteration.
    548      */
    549     public void setText(CharacterIterator source) {
    550         // Note: In C++, we just setText(source.getText()).
    551         // In Java, we actually operate on a character iterator.
    552         // TODO: do we need to remember the iterator in a field?
    553         // TODO: apparently we don't clone a CharacterIterator in Java,
    554         // we only clone the text for a UCharacterIterator?? see the old code in the constructors
    555         UCharacterIterator src = new CharacterIteratorWrapper(source);
    556         src.setToStart();
    557         string_ = src.getText(); // TODO: do we need to remember the source string in a field?
    558         CollationIterator newIter;
    559         boolean numeric = rbc_.settings.readOnly().isNumeric();
    560         if (rbc_.settings.readOnly().dontCheckFCD()) {
    561             newIter = new IterCollationIterator(rbc_.data, numeric, src);
    562         } else {
    563             newIter = new FCDIterCollationIterator(rbc_.data, numeric, src, 0);
    564         }
    565         iter_ = newIter;
    566         otherHalf_ = 0;
    567         dir_ = 0;
    568     }
    569 
    570     private static final class MaxExpSink implements ContractionsAndExpansions.CESink {
    571         MaxExpSink(Map<Integer, Integer> h) {
    572             maxExpansions = h;
    573         }
    574 
    575         @Override
    576         public void handleCE(long ce) {
    577         }
    578 
    579         @Override
    580         public void handleExpansion(long ces[], int start, int length) {
    581             if (length <= 1) {
    582                 // We do not need to add single CEs into the map.
    583                 return;
    584             }
    585             int count = 0; // number of CE "halves"
    586             for (int i = 0; i < length; ++i) {
    587                 count += ceNeedsTwoParts(ces[start + i]) ? 2 : 1;
    588             }
    589             // last "half" of the last CE
    590             long ce = ces[start + length - 1];
    591             long p = ce >>> 32;
    592             int lower32 = (int) ce;
    593             int lastHalf = getSecondHalf(p, lower32);
    594             if (lastHalf == 0) {
    595                 lastHalf = getFirstHalf(p, lower32);
    596                 assert (lastHalf != 0);
    597             } else {
    598                 lastHalf |= 0xc0; // old-style continuation CE
    599             }
    600             Integer oldCount = maxExpansions.get(lastHalf);
    601             if (oldCount == null || count > oldCount) {
    602                 maxExpansions.put(lastHalf, count);
    603             }
    604         }
    605 
    606         private Map<Integer, Integer> maxExpansions;
    607     }
    608 
    609     static final Map<Integer, Integer> computeMaxExpansions(CollationData data) {
    610         Map<Integer, Integer> maxExpansions = new HashMap<Integer, Integer>();
    611         MaxExpSink sink = new MaxExpSink(maxExpansions);
    612         new ContractionsAndExpansions(null, null, sink, true).forData(data);
    613         return maxExpansions;
    614     }
    615 
    616     /**
    617      * Returns the maximum length of any expansion sequence that ends with
    618      * the specified collation element. If there is no expansion with this
    619      * collation element as the last element, returns 1.
    620      *
    621      * @param ce a collation element returned by previous() or next().
    622      * @return the maximum length of any expansion sequence ending
    623      *         with the specified collation element.
    624      */
    625     public int getMaxExpansion(int ce) {
    626         return getMaxExpansion(rbc_.tailoring.maxExpansions, ce);
    627     }
    628 
    629     static int getMaxExpansion(Map<Integer, Integer> maxExpansions, int order) {
    630         if (order == 0) {
    631             return 1;
    632         }
    633         Integer max;
    634         if (maxExpansions != null && (max = maxExpansions.get(order)) != null) {
    635             return max;
    636         }
    637         if ((order & 0xc0) == 0xc0) {
    638             // old-style continuation CE
    639             return 2;
    640         } else {
    641             return 1;
    642         }
    643     }
    644 
    645     /** Normalizes dir_=1 (just after setOffset()) to dir_=0 (just after reset()). */
    646     private byte normalizeDir() {
    647         return dir_ == 1 ? 0 : dir_;
    648     }
    649 
    650     /**
    651      * Tests that argument object is equals to this CollationElementIterator.
    652      * Iterators are equal if the objects uses the same RuleBasedCollator,
    653      * the same source text and have the same current position in iteration.
    654      * @param that object to test if it is equals to this
    655      *             CollationElementIterator
    656      */
    657     @Override
    658     public boolean equals(Object that) {
    659         if (that == this) {
    660             return true;
    661         }
    662         if (that instanceof CollationElementIterator) {
    663             CollationElementIterator thatceiter = (CollationElementIterator) that;
    664             return rbc_.equals(thatceiter.rbc_)
    665                     && otherHalf_ == thatceiter.otherHalf_
    666                     && normalizeDir() == thatceiter.normalizeDir()
    667                     && string_.equals(thatceiter.string_)
    668                     && iter_.equals(thatceiter.iter_);
    669         }
    670         return false;
    671     }
    672 
    673     /**
    674      * Mock implementation of hashCode(). This implementation always returns a constant
    675      * value. When Java assertion is enabled, this method triggers an assertion failure.
    676      * @deprecated This API is ICU internal only.
    677      * @hide original deprecated declaration
    678      * @hide draft / provisional / internal are hidden on Android
    679      */
    680     @Override
    681     @Deprecated
    682     public int hashCode() {
    683         assert false : "hashCode not designed";
    684         return 42;
    685     }
    686 
    687     /**
    688      * @deprecated This API is ICU internal only.
    689      * @hide original deprecated declaration
    690      * @hide draft / provisional / internal are hidden on Android
    691      */
    692     @Deprecated
    693     public RuleBasedCollator getRuleBasedCollator() {
    694         return rbc_;
    695     }
    696 }
    697