Home | History | Annotate | Download | only in unicode
      1 /*
      2 **********************************************************************
      3 * Copyright (c) 2002-2014, International Business Machines
      4 * Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 */
      7 #ifndef USETITER_H
      8 #define USETITER_H
      9 
     10 #include "unicode/utypes.h"
     11 #include "unicode/uobject.h"
     12 #include "unicode/unistr.h"
     13 
     14 /**
     15  * \file
     16  * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
     17  */
     18 
     19 U_NAMESPACE_BEGIN
     20 
     21 class UnicodeSet;
     22 class UnicodeString;
     23 
     24 /**
     25  *
     26  * UnicodeSetIterator iterates over the contents of a UnicodeSet.  It
     27  * iterates over either code points or code point ranges.  After all
     28  * code points or ranges have been returned, it returns the
     29  * multicharacter strings of the UnicodeSet, if any.
     30  *
     31  * This class is not intended to be subclassed.  Consider any fields
     32  *  or methods declared as "protected" to be private.  The use of
     33  *  protected in this class is an artifact of history.
     34  *
     35  * <p>To iterate over code points and strings, use a loop like this:
     36  * <pre>
     37  * UnicodeSetIterator it(set);
     38  * while (it.next()) {
     39  *     processItem(it.getString());
     40  * }
     41  * </pre>
     42  * <p>Each item in the set is accessed as a string.  Set elements
     43  *    consisting of single code points are returned as strings containing
     44  *    just the one code point.
     45  *
     46  * <p>To iterate over code point ranges, instead of individual code points,
     47  *    use a loop like this:
     48  * <pre>
     49  * UnicodeSetIterator it(set);
     50  * while (it.nextRange()) {
     51  *   if (it.isString()) {
     52  *     processString(it.getString());
     53  *   } else {
     54  *     processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
     55  *   }
     56  * }
     57  * </pre>
     58  * @author M. Davis
     59  * @stable ICU 2.4
     60  */
     61 class U_COMMON_API UnicodeSetIterator : public UObject {
     62 
     63  protected:
     64 
     65     /**
     66      * Value of <tt>codepoint</tt> if the iterator points to a string.
     67      * If <tt>codepoint == IS_STRING</tt>, then examine
     68      * <tt>string</tt> for the current iteration result.
     69      * @stable ICU 2.4
     70      */
     71     enum { IS_STRING = -1 };
     72 
     73     /**
     74      * Current code point, or the special value <tt>IS_STRING</tt>, if
     75      * the iterator points to a string.
     76      * @stable ICU 2.4
     77      */
     78     UChar32 codepoint;
     79 
     80     /**
     81      * When iterating over ranges using <tt>nextRange()</tt>,
     82      * <tt>codepointEnd</tt> contains the inclusive end of the
     83      * iteration range, if <tt>codepoint != IS_STRING</tt>.  If
     84      * iterating over code points using <tt>next()</tt>, or if
     85      * <tt>codepoint == IS_STRING</tt>, then the value of
     86      * <tt>codepointEnd</tt> is undefined.
     87      * @stable ICU 2.4
     88      */
     89     UChar32 codepointEnd;
     90 
     91     /**
     92      * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
     93      * to the current string.  If <tt>codepoint != IS_STRING</tt>, the
     94      * value of <tt>string</tt> is undefined.
     95      * @stable ICU 2.4
     96      */
     97     const UnicodeString* string;
     98 
     99  public:
    100 
    101     /**
    102      * Create an iterator over the given set.  The iterator is valid
    103      * only so long as <tt>set</tt> is valid.
    104      * @param set set to iterate over
    105      * @stable ICU 2.4
    106      */
    107     UnicodeSetIterator(const UnicodeSet& set);
    108 
    109     /**
    110      * Create an iterator over nothing.  <tt>next()</tt> and
    111      * <tt>nextRange()</tt> return false. This is a convenience
    112      * constructor allowing the target to be set later.
    113      * @stable ICU 2.4
    114      */
    115     UnicodeSetIterator();
    116 
    117     /**
    118      * Destructor.
    119      * @stable ICU 2.4
    120      */
    121     virtual ~UnicodeSetIterator();
    122 
    123     /**
    124      * Returns true if the current element is a string.  If so, the
    125      * caller can retrieve it with <tt>getString()</tt>.  If this
    126      * method returns false, the current element is a code point or
    127      * code point range, depending on whether <tt>next()</tt> or
    128      * <tt>nextRange()</tt> was called.
    129      * Elements of types string and codepoint can both be retrieved
    130      * with the function <tt>getString()</tt>.
    131      * Elements of type codepoint can also be retrieved with
    132      * <tt>getCodepoint()</tt>.
    133      * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
    134      * of the range, and <tt>getCodepointEnd()</tt> returns the end
    135      * of the range.
    136      * @stable ICU 2.4
    137      */
    138     inline UBool isString() const;
    139 
    140     /**
    141      * Returns the current code point, if <tt>isString()</tt> returned
    142      * false.  Otherwise returns an undefined result.
    143      * @stable ICU 2.4
    144      */
    145     inline UChar32 getCodepoint() const;
    146 
    147     /**
    148      * Returns the end of the current code point range, if
    149      * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
    150      * called.  Otherwise returns an undefined result.
    151      * @stable ICU 2.4
    152      */
    153     inline UChar32 getCodepointEnd() const;
    154 
    155     /**
    156      * Returns the current string, if <tt>isString()</tt> returned
    157      * true.  If the current iteration item is a code point, a UnicodeString
    158      * containing that single code point is returned.
    159      *
    160      * Ownership of the returned string remains with the iterator.
    161      * The string is guaranteed to remain valid only until the iterator is
    162      *   advanced to the next item, or until the iterator is deleted.
    163      *
    164      * @stable ICU 2.4
    165      */
    166     const UnicodeString& getString();
    167 
    168     /**
    169      * Advances the iteration position to the next element in the set,
    170      * which can be either a single code point or a string.
    171      * If there are no more elements in the set, return false.
    172      *
    173      * <p>
    174      * If <tt>isString() == TRUE</tt>, the value is a
    175      * string, otherwise the value is a
    176      * single code point.  Elements of either type can be retrieved
    177      * with the function <tt>getString()</tt>, while elements of
    178      * consisting of a single code point can be retrieved with
    179      * <tt>getCodepoint()</tt>
    180      *
    181      * <p>The order of iteration is all code points in sorted order,
    182      * followed by all strings sorted order.    Do not mix
    183      * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
    184      * calling <tt>reset()</tt> between them.  The results of doing so
    185      * are undefined.
    186      *
    187      * @return true if there was another element in the set.
    188      * @stable ICU 2.4
    189      */
    190     UBool next();
    191 
    192     /**
    193      * Returns the next element in the set, either a code point range
    194      * or a string.  If there are no more elements in the set, return
    195      * false.  If <tt>isString() == TRUE</tt>, the value is a
    196      * string and can be accessed with <tt>getString()</tt>.  Otherwise the value is a
    197      * range of one or more code points from <tt>getCodepoint()</tt> to
    198      * <tt>getCodepointeEnd()</tt> inclusive.
    199      *
    200      * <p>The order of iteration is all code points ranges in sorted
    201      * order, followed by all strings sorted order.  Ranges are
    202      * disjoint and non-contiguous.  The value returned from <tt>getString()</tt>
    203      * is undefined unless <tt>isString() == TRUE</tt>.  Do not mix calls to
    204      * <tt>next()</tt> and <tt>nextRange()</tt> without calling
    205      * <tt>reset()</tt> between them.  The results of doing so are
    206      * undefined.
    207      *
    208      * @return true if there was another element in the set.
    209      * @stable ICU 2.4
    210      */
    211     UBool nextRange();
    212 
    213     /**
    214      * Sets this iterator to visit the elements of the given set and
    215      * resets it to the start of that set.  The iterator is valid only
    216      * so long as <tt>set</tt> is valid.
    217      * @param set the set to iterate over.
    218      * @stable ICU 2.4
    219      */
    220     void reset(const UnicodeSet& set);
    221 
    222     /**
    223      * Resets this iterator to the start of the set.
    224      * @stable ICU 2.4
    225      */
    226     void reset();
    227 
    228     /**
    229      * ICU "poor man's RTTI", returns a UClassID for this class.
    230      *
    231      * @stable ICU 2.4
    232      */
    233     static UClassID U_EXPORT2 getStaticClassID();
    234 
    235     /**
    236      * ICU "poor man's RTTI", returns a UClassID for the actual class.
    237      *
    238      * @stable ICU 2.4
    239      */
    240     virtual UClassID getDynamicClassID() const;
    241 
    242     // ======================= PRIVATES ===========================
    243 
    244  protected:
    245 
    246     // endElement and nextElements are really UChar32's, but we keep
    247     // them as signed int32_t's so we can do comparisons with
    248     // endElement set to -1.  Leave them as int32_t's.
    249     /** The set
    250      * @stable ICU 2.4
    251      */
    252     const UnicodeSet* set;
    253     /** End range
    254      * @stable ICU 2.4
    255      */
    256     int32_t endRange;
    257     /** Range
    258      * @stable ICU 2.4
    259      */
    260     int32_t range;
    261     /** End element
    262      * @stable ICU 2.4
    263      */
    264     int32_t endElement;
    265     /** Next element
    266      * @stable ICU 2.4
    267      */
    268     int32_t nextElement;
    269     //UBool abbreviated;
    270     /** Next string
    271      * @stable ICU 2.4
    272      */
    273     int32_t nextString;
    274     /** String count
    275      * @stable ICU 2.4
    276      */
    277     int32_t stringCount;
    278 
    279     /**
    280      *  Points to the string to use when the caller asks for a
    281      *  string and the current iteration item is a code point, not a string.
    282      *  @internal
    283      */
    284     UnicodeString *cpString;
    285 
    286     /** Copy constructor. Disallowed.
    287      * @stable ICU 2.4
    288      */
    289     UnicodeSetIterator(const UnicodeSetIterator&); // disallow
    290 
    291     /** Assignment operator. Disallowed.
    292      * @stable ICU 2.4
    293      */
    294     UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
    295 
    296     /** Load range
    297      * @stable ICU 2.4
    298      */
    299     virtual void loadRange(int32_t range);
    300 
    301 };
    302 
    303 inline UBool UnicodeSetIterator::isString() const {
    304     return codepoint == (UChar32)IS_STRING;
    305 }
    306 
    307 inline UChar32 UnicodeSetIterator::getCodepoint() const {
    308     return codepoint;
    309 }
    310 
    311 inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
    312     return codepointEnd;
    313 }
    314 
    315 
    316 U_NAMESPACE_END
    317 
    318 #endif
    319