Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  * Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
      4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
      5  *
      6  * This code is free software; you can redistribute it and/or modify it
      7  * under the terms of the GNU General Public License version 2 only, as
      8  * published by the Free Software Foundation.  Oracle designates this
      9  * particular file as subject to the "Classpath" exception as provided
     10  * by Oracle in the LICENSE file that accompanied this code.
     11  *
     12  * This code is distributed in the hope that it will be useful, but WITHOUT
     13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15  * version 2 for more details (a copy is included in the LICENSE file that
     16  * accompanied this code).
     17  *
     18  * You should have received a copy of the GNU General Public License version
     19  * 2 along with this work; if not, write to the Free Software Foundation,
     20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
     21  *
     22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
     23  * or visit www.oracle.com if you need additional information or have any
     24  * questions.
     25  */
     26 
     27 /*
     28  * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
     29  * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
     30  *
     31  *   The original version of this source code and documentation is copyrighted
     32  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
     33  * materials are provided under terms of a License Agreement between Taligent
     34  * and Sun. This technology is protected by multiple US and International
     35  * patents. This notice and attribution to Taligent may not be removed.
     36  *   Taligent is a registered trademark of Taligent, Inc.
     37  *
     38  */
     39 
     40 package java.text;
     41 
     42 /**
     43  * The <code>CollationElementIterator</code> class is used as an iterator
     44  * to walk through each character of an international string. Use the iterator
     45  * to return the ordering priority of the positioned character. The ordering
     46  * priority of a character, which we refer to as a key, defines how a character
     47  * is collated in the given collation object.
     48  *
     49  * <p>
     50  * For example, consider the following in Spanish:
     51  * <blockquote>
     52  * <pre>
     53  * "ca" &rarr; the first key is key('c') and second key is key('a').
     54  * "cha" &rarr; the first key is key('ch') and second key is key('a').
     55  * </pre>
     56  * </blockquote>
     57  * And in German,
     58  * <blockquote>
     59  * <pre>
     60  * "\u00e4b" &rarr; the first key is key('a'), the second key is key('e'), and
     61  * the third key is key('b').
     62  * </pre>
     63  * </blockquote>
     64  * The key of a character is an integer composed of primary order(short),
     65  * secondary order(byte), and tertiary order(byte). Java strictly defines
     66  * the size and signedness of its primitive data types. Therefore, the static
     67  * functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and
     68  * <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>,
     69  * and <code>short</code> respectively to ensure the correctness of the key
     70  * value.
     71  *
     72  * <p>
     73  * Example of the iterator usage,
     74  * <blockquote>
     75  * <pre>
     76  *
     77  *  String testString = "This is a test";
     78  *  Collator col = Collator.getInstance();
     79  *  if (col instanceof RuleBasedCollator) {
     80  *      RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)col;
     81  *      CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString);
     82  *      int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next());
     83  *          :
     84  *  }
     85  * </pre>
     86  * </blockquote>
     87  *
     88  * <p>
     89  * <code>CollationElementIterator.next</code> returns the collation order
     90  * of the next character. A collation order consists of primary order,
     91  * secondary order and tertiary order. The data type of the collation
     92  * order is <strong>int</strong>. The first 16 bits of a collation order
     93  * is its primary order; the next 8 bits is the secondary order and the
     94  * last 8 bits is the tertiary order.
     95  *
     96  * <p><b>Note:</b> <code>CollationElementIterator</code> is a part of
     97  * <code>RuleBasedCollator</code> implementation. It is only usable
     98  * with <code>RuleBasedCollator</code> instances.
     99  *
    100  * @see                Collator
    101  * @see                RuleBasedCollator
    102  * @author             Helena Shih, Laura Werner, Richard Gillam
    103  */
    104 public final class CollationElementIterator
    105 {
    106     /**
    107      * Null order which indicates the end of string is reached by the
    108      * cursor.
    109      */
    110     public final static int NULLORDER = android.icu.text.CollationElementIterator.NULLORDER;
    111 
    112     private android.icu.text.CollationElementIterator icuIterator;
    113 
    114     CollationElementIterator(android.icu.text.CollationElementIterator iterator) {
    115         icuIterator = iterator;
    116     }
    117 
    118     /**
    119      * Resets the cursor to the beginning of the string.  The next call
    120      * to next() will return the first collation element in the string.
    121      */
    122     public void reset()
    123     {
    124         icuIterator.reset();
    125     }
    126 
    127     /**
    128      * Get the next collation element in the string.  <p>This iterator iterates
    129      * over a sequence of collation elements that were built from the string.
    130      * Because there isn't necessarily a one-to-one mapping from characters to
    131      * collation elements, this doesn't mean the same thing as "return the
    132      * collation element [or ordering priority] of the next character in the
    133      * string".</p>
    134      * <p>This function returns the collation element that the iterator is currently
    135      * pointing to and then updates the internal pointer to point to the next element.
    136      * previous() updates the pointer first and then returns the element.  This
    137      * means that when you change direction while iterating (i.e., call next() and
    138      * then call previous(), or call previous() and then call next()), you'll get
    139      * back the same element twice.</p>
    140      *
    141      * @return the next collation element
    142      */
    143     public int next()
    144     {
    145         return icuIterator.next();
    146     }
    147 
    148     /**
    149      * Get the previous collation element in the string.  <p>This iterator iterates
    150      * over a sequence of collation elements that were built from the string.
    151      * Because there isn't necessarily a one-to-one mapping from characters to
    152      * collation elements, this doesn't mean the same thing as "return the
    153      * collation element [or ordering priority] of the previous character in the
    154      * string".</p>
    155      * <p>This function updates the iterator's internal pointer to point to the
    156      * collation element preceding the one it's currently pointing to and then
    157      * returns that element, while next() returns the current element and then
    158      * updates the pointer.  This means that when you change direction while
    159      * iterating (i.e., call next() and then call previous(), or call previous()
    160      * and then call next()), you'll get back the same element twice.</p>
    161      *
    162      * @return the previous collation element
    163      * @since 1.2
    164      */
    165     public int previous()
    166     {
    167         return icuIterator.previous();
    168     }
    169 
    170     /**
    171      * Return the primary component of a collation element.
    172      * @param order the collation element
    173      * @return the element's primary component
    174      */
    175     public final static int primaryOrder(int order)
    176     {
    177         return android.icu.text.CollationElementIterator.primaryOrder(order);
    178     }
    179     /**
    180      * Return the secondary component of a collation element.
    181      * @param order the collation element
    182      * @return the element's secondary component
    183      */
    184     public final static short secondaryOrder(int order)
    185     {
    186        return (short) android.icu.text.CollationElementIterator.secondaryOrder(order);
    187     }
    188     /**
    189      * Return the tertiary component of a collation element.
    190      * @param order the collation element
    191      * @return the element's tertiary component
    192      */
    193     public final static short tertiaryOrder(int order)
    194     {
    195         return (short) android.icu.text.CollationElementIterator.tertiaryOrder(order);
    196     }
    197 
    198     /**
    199      * Sets the iterator to point to the collation element corresponding to
    200      * the specified character (the parameter is a CHARACTER offset in the
    201      * original string, not an offset into its corresponding sequence of
    202      * collation elements).  The value returned by the next call to next()
    203      * will be the collation element corresponding to the specified position
    204      * in the text.  If that position is in the middle of a contracting
    205      * character sequence, the result of the next call to next() is the
    206      * collation element for that sequence.  This means that getOffset()
    207      * is not guaranteed to return the same value as was passed to a preceding
    208      * call to setOffset().
    209      *
    210      * @param newOffset The new character offset into the original text.
    211      * @since 1.2
    212      */
    213     @SuppressWarnings("deprecation") // getBeginIndex, getEndIndex and setIndex are deprecated
    214     public void setOffset(int newOffset)
    215     {
    216         icuIterator.setOffset(newOffset);
    217     }
    218 
    219     /**
    220      * Returns the character offset in the original text corresponding to the next
    221      * collation element.  (That is, getOffset() returns the position in the text
    222      * corresponding to the collation element that will be returned by the next
    223      * call to next().)  This value will always be the index of the FIRST character
    224      * corresponding to the collation element (a contracting character sequence is
    225      * when two or more characters all correspond to the same collation element).
    226      * This means if you do setOffset(x) followed immediately by getOffset(), getOffset()
    227      * won't necessarily return x.
    228      *
    229      * @return The character offset in the original text corresponding to the collation
    230      * element that will be returned by the next call to next().
    231      * @since 1.2
    232      */
    233     public int getOffset()
    234     {
    235         return icuIterator.getOffset();
    236     }
    237 
    238     /**
    239      * Return the maximum length of any expansion sequences that end
    240      * with the specified comparison order.
    241      * @param order a collation order returned by previous or next.
    242      * @return the maximum length of any expansion sequences ending
    243      *         with the specified order.
    244      * @since 1.2
    245      */
    246     public int getMaxExpansion(int order)
    247     {
    248         return icuIterator.getMaxExpansion(order);
    249     }
    250 
    251     /**
    252      * Set a new string over which to iterate.
    253      *
    254      * @param source  the new source text
    255      * @since 1.2
    256      */
    257     public void setText(String source)
    258     {
    259         icuIterator.setText(source);
    260     }
    261 
    262     /**
    263      * Set a new string over which to iterate.
    264      *
    265      * @param source  the new source text.
    266      * @since 1.2
    267      */
    268     public void setText(CharacterIterator source)
    269     {
    270         icuIterator.setText(source);
    271     }
    272 }
    273