Home | History | Annotate | Download | only in text
      1 /* GENERATED SOURCE. DO NOT MODIFY. */
      2 //  2016 and later: Unicode, Inc. and others.
      3 // License & terms of use: http://www.unicode.org/copyright.html#License
      4 /*
      5  *******************************************************************************
      6  * Copyright (C) 1996-2014, International Business Machines Corporation and    *
      7  * others. All Rights Reserved.                                                *
      8  *******************************************************************************
      9  */
     10 package android.icu.text;
     11 
     12 import java.util.Iterator;
     13 
     14 /**
     15  * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
     16  * iterates over either code points or code point ranges.  After all
     17  * code points or ranges have been returned, it returns the
     18  * multicharacter strings of the UnicodSet, if any.
     19  *
     20  * <p>To iterate over code points and multicharacter strings,
     21  * use a loop like this:
     22  * <pre>
     23  * for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
     24  *   processString(it.getString());
     25  * }
     26  * </pre>
     27  *
     28  * <p>To iterate over code point ranges, use a loop like this:
     29  * <pre>
     30  * for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.nextRange();) {
     31  *   if (it.codepoint != UnicodeSetIterator.IS_STRING) {
     32  *     processCodepointRange(it.codepoint, it.codepointEnd);
     33  *   } else {
     34  *     processString(it.getString());
     35  *   }
     36  * }
     37  * </pre>
     38  * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
     39  * Do not alter the UnicodeSet while iterating.
     40  * @author M. Davis
     41  */
     42 public class UnicodeSetIterator {
     43 
     44     /**
     45      * Value of <tt>codepoint</tt> if the iterator points to a string.
     46      * If <tt>codepoint == IS_STRING</tt>, then examine
     47      * <tt>string</tt> for the current iteration result.
     48      */
     49     public static int IS_STRING = -1;
     50 
     51     /**
     52      * Current code point, or the special value <tt>IS_STRING</tt>, if
     53      * the iterator points to a string.
     54      */
     55     public int codepoint;
     56 
     57     /**
     58      * When iterating over ranges using <tt>nextRange()</tt>,
     59      * <tt>codepointEnd</tt> contains the inclusive end of the
     60      * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
     61      * iterating over code points using <tt>next()</tt>, or if
     62      * <tt>codepoint == IS_STRING</tt>, then the value of
     63      * <tt>codepointEnd</tt> is undefined.
     64      */
     65     public int codepointEnd;
     66 
     67     /**
     68      * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
     69      * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
     70      * value of <tt>string</tt> is undefined.
     71      */
     72     public String string;
     73 
     74     /**
     75      * Create an iterator over the given set.
     76      * @param set set to iterate over
     77      */
     78     public UnicodeSetIterator(UnicodeSet set) {
     79         reset(set);
     80     }
     81 
     82     /**
     83      * Create an iterator over nothing.  <tt>next()</tt> and
     84      * <tt>nextRange()</tt> return false. This is a convenience
     85      * constructor allowing the target to be set later.
     86      */
     87     public UnicodeSetIterator() {
     88         reset(new UnicodeSet());
     89     }
     90 
     91     /**
     92      * Returns the next element in the set, either a single code point
     93      * or a string.  If there are no more elements in the set, return
     94      * false.  If <tt>codepoint == IS_STRING</tt>, the value is a
     95      * string in the <tt>string</tt> field.  Otherwise the value is a
     96      * single code point in the <tt>codepoint</tt> field.
     97      *
     98      * <p>The order of iteration is all code points in sorted order,
     99      * followed by all strings sorted order.  <tt>codepointEnd</tt> is
    100      * undefined after calling this method.  <tt>string</tt> is
    101      * undefined unless <tt>codepoint == IS_STRING</tt>.  Do not mix
    102      * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
    103      * calling <tt>reset()</tt> between them.  The results of doing so
    104      * are undefined.
    105      * <p><b>Warning: </b>For speed, UnicodeSet iteration does not check for concurrent modification.
    106      * Do not alter the UnicodeSet while iterating.
    107      * @return true if there was another element in the set and this
    108      * object contains the element.
    109      */
    110     public boolean next() {
    111         if (nextElement <= endElement) {
    112             codepoint = codepointEnd = nextElement++;
    113             return true;
    114         }
    115         if (range < endRange) {
    116             loadRange(++range);
    117             codepoint = codepointEnd = nextElement++;
    118             return true;
    119         }
    120 
    121         // stringIterator == null iff there are no string elements remaining
    122 
    123         if (stringIterator == null) {
    124             return false;
    125         }
    126         codepoint = IS_STRING; // signal that value is actually a string
    127         string = stringIterator.next();
    128         if (!stringIterator.hasNext()) {
    129             stringIterator = null;
    130         }
    131         return true;
    132     }
    133 
    134     /**
    135      * Returns the next element in the set, either a code point range
    136      * or a string.  If there are no more elements in the set, return
    137      * false.  If <tt>codepoint == IS_STRING</tt>, the value is a
    138      * string in the <tt>string</tt> field.  Otherwise the value is a
    139      * range of one or more code points from <tt>codepoint</tt> to
    140      * <tt>codepointeEnd</tt> inclusive.
    141      *
    142      * <p>The order of iteration is all code points ranges in sorted
    143      * order, followed by all strings sorted order.  Ranges are
    144      * disjoint and non-contiguous.  <tt>string</tt> is undefined
    145      * unless <tt>codepoint == IS_STRING</tt>.  Do not mix calls to
    146      * <tt>next()</tt> and <tt>nextRange()</tt> without calling
    147      * <tt>reset()</tt> between them.  The results of doing so are
    148      * undefined.
    149      *
    150      * @return true if there was another element in the set and this
    151      * object contains the element.
    152      */
    153     public boolean nextRange() {
    154         if (nextElement <= endElement) {
    155             codepointEnd = endElement;
    156             codepoint = nextElement;
    157             nextElement = endElement+1;
    158             return true;
    159         }
    160         if (range < endRange) {
    161             loadRange(++range);
    162             codepointEnd = endElement;
    163             codepoint = nextElement;
    164             nextElement = endElement+1;
    165             return true;
    166         }
    167 
    168         // stringIterator == null iff there are no string elements remaining
    169 
    170         if (stringIterator == null) {
    171             return false;
    172         }
    173         codepoint = IS_STRING; // signal that value is actually a string
    174         string = stringIterator.next();
    175         if (!stringIterator.hasNext()) {
    176             stringIterator = null;
    177         }
    178         return true;
    179     }
    180 
    181     /**
    182      * Sets this iterator to visit the elements of the given set and
    183      * resets it to the start of that set.  The iterator is valid only
    184      * so long as <tt>set</tt> is valid.
    185      * @param uset the set to iterate over.
    186      */
    187     public void reset(UnicodeSet uset) {
    188         set = uset;
    189         reset();
    190     }
    191 
    192     /**
    193      * Resets this iterator to the start of the set.
    194      */
    195     public void reset() {
    196         endRange = set.getRangeCount() - 1;
    197         range = 0;
    198         endElement = -1;
    199         nextElement = 0;
    200         if (endRange >= 0) {
    201             loadRange(range);
    202         }
    203         stringIterator = null;
    204         if (set.strings != null) {
    205             stringIterator = set.strings.iterator();
    206             if (!stringIterator.hasNext()) {
    207                 stringIterator = null;
    208             }
    209         }
    210     }
    211 
    212     /**
    213      * Gets the current string from the iterator. Only use after calling next(), not nextRange().
    214      */
    215     public String getString() {
    216         if (codepoint != IS_STRING) {
    217             return UTF16.valueOf(codepoint);
    218         }
    219         return string;
    220     }
    221 
    222     // ======================= PRIVATES ===========================
    223 
    224     private UnicodeSet set;
    225     private int endRange = 0;
    226     private int range = 0;
    227 
    228     /**
    229      * @deprecated This API is ICU internal only.
    230      * @hide original deprecated declaration
    231      * @hide draft / provisional / internal are hidden on Android
    232      */
    233     @Deprecated
    234     public UnicodeSet getSet() {
    235         return set;
    236     }
    237 
    238     /**
    239      * @deprecated This API is ICU internal only.
    240      * @hide original deprecated declaration
    241      * @hide draft / provisional / internal are hidden on Android
    242      */
    243     @Deprecated
    244     protected int endElement;
    245     /**
    246      * @deprecated This API is ICU internal only.
    247      * @hide original deprecated declaration
    248      * @hide draft / provisional / internal are hidden on Android
    249      */
    250     @Deprecated
    251     protected int nextElement;
    252     private Iterator<String> stringIterator = null;
    253 
    254     /**
    255      * Invariant: stringIterator is null when there are no (more) strings remaining
    256      */
    257 
    258     /**
    259      * @deprecated This API is ICU internal only.
    260      * @hide original deprecated declaration
    261      * @hide draft / provisional / internal are hidden on Android
    262      */
    263     @Deprecated
    264     protected void loadRange(int aRange) {
    265         nextElement = set.getRangeStart(aRange);
    266         endElement = set.getRangeEnd(aRange);
    267     }
    268 }
    269