Home | History | Annotate | Download | only in unicode
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 1998-2005, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 **********************************************************************
      8 */
      9 
     10 #ifndef UCHRITER_H
     11 #define UCHRITER_H
     12 
     13 #include "unicode/utypes.h"
     14 #include "unicode/chariter.h"
     15 
     16 /**
     17  * \file
     18  * \brief C++ API: char16_t Character Iterator
     19  */
     20 
     21 U_NAMESPACE_BEGIN
     22 
     23 /**
     24  * A concrete subclass of CharacterIterator that iterates over the
     25  * characters (code units or code points) in a char16_t array.
     26  * It's possible not only to create an
     27  * iterator that iterates over an entire char16_t array, but also to
     28  * create one that iterates over only a subrange of a char16_t array
     29  * (iterators over different subranges of the same char16_t array don't
     30  * compare equal).
     31  * @see CharacterIterator
     32  * @see ForwardCharacterIterator
     33  * @stable ICU 2.0
     34  */
     35 class U_COMMON_API UCharCharacterIterator : public CharacterIterator {
     36 public:
     37   /**
     38    * Create an iterator over the char16_t array referred to by "textPtr".
     39    * The iteration range is 0 to <code>length-1</code>.
     40    * text is only aliased, not adopted (the
     41    * destructor will not delete it).
     42    * @param textPtr The char16_t array to be iterated over
     43    * @param length The length of the char16_t array
     44    * @stable ICU 2.0
     45    */
     46   UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length);
     47 
     48   /**
     49    * Create an iterator over the char16_t array referred to by "textPtr".
     50    * The iteration range is 0 to <code>length-1</code>.
     51    * text is only aliased, not adopted (the
     52    * destructor will not delete it).
     53    * The starting
     54    * position is specified by "position". If "position" is outside the valid
     55    * iteration range, the behavior of this object is undefined.
     56    * @param textPtr The char16_t array to be iteratd over
     57    * @param length The length of the char16_t array
     58    * @param position The starting position of the iteration
     59    * @stable ICU 2.0
     60    */
     61   UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length,
     62                          int32_t position);
     63 
     64   /**
     65    * Create an iterator over the char16_t array referred to by "textPtr".
     66    * The iteration range is 0 to <code>end-1</code>.
     67    * text is only aliased, not adopted (the
     68    * destructor will not delete it).
     69    * The starting
     70    * position is specified by "position". If begin and end do not
     71    * form a valid iteration range or "position" is outside the valid
     72    * iteration range, the behavior of this object is undefined.
     73    * @param textPtr The char16_t array to be iterated over
     74    * @param length The length of the char16_t array
     75    * @param textBegin  The begin position of the iteration range
     76    * @param textEnd    The end position of the iteration range
     77    * @param position    The starting position of the iteration
     78    * @stable ICU 2.0
     79    */
     80   UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length,
     81                          int32_t textBegin,
     82                          int32_t textEnd,
     83                          int32_t position);
     84 
     85   /**
     86    * Copy constructor.  The new iterator iterates over the same range
     87    * of the same string as "that", and its initial position is the
     88    * same as "that"'s current position.
     89    * @param that The UCharCharacterIterator to be copied
     90    * @stable ICU 2.0
     91    */
     92   UCharCharacterIterator(const UCharCharacterIterator&  that);
     93 
     94   /**
     95    * Destructor.
     96    * @stable ICU 2.0
     97    */
     98   virtual ~UCharCharacterIterator();
     99 
    100   /**
    101    * Assignment operator.  *this is altered to iterate over the sane
    102    * range of the same string as "that", and refers to the same
    103    * character within that string as "that" does.
    104    * @param that The object to be copied
    105    * @return the newly created object
    106    * @stable ICU 2.0
    107    */
    108   UCharCharacterIterator&
    109   operator=(const UCharCharacterIterator&    that);
    110 
    111   /**
    112    * Returns true if the iterators iterate over the same range of the
    113    * same string and are pointing at the same character.
    114    * @param that The ForwardCharacterIterator used to be compared for equality
    115    * @return true if the iterators iterate over the same range of the
    116    * same string and are pointing at the same character.
    117    * @stable ICU 2.0
    118    */
    119   virtual UBool          operator==(const ForwardCharacterIterator& that) const;
    120 
    121   /**
    122    * Generates a hash code for this iterator.
    123    * @return the hash code.
    124    * @stable ICU 2.0
    125    */
    126   virtual int32_t         hashCode(void) const;
    127 
    128   /**
    129    * Returns a new UCharCharacterIterator referring to the same
    130    * character in the same range of the same string as this one.  The
    131    * caller must delete the new iterator.
    132    * @return the CharacterIterator newly created
    133    * @stable ICU 2.0
    134    */
    135   virtual CharacterIterator* clone(void) const;
    136 
    137   /**
    138    * Sets the iterator to refer to the first code unit in its
    139    * iteration range, and returns that code unit.
    140    * This can be used to begin an iteration with next().
    141    * @return the first code unit in its iteration range.
    142    * @stable ICU 2.0
    143    */
    144   virtual char16_t         first(void);
    145 
    146   /**
    147    * Sets the iterator to refer to the first code unit in its
    148    * iteration range, returns that code unit, and moves the position
    149    * to the second code unit. This is an alternative to setToStart()
    150    * for forward iteration with nextPostInc().
    151    * @return the first code unit in its iteration range
    152    * @stable ICU 2.0
    153    */
    154   virtual char16_t         firstPostInc(void);
    155 
    156   /**
    157    * Sets the iterator to refer to the first code point in its
    158    * iteration range, and returns that code unit,
    159    * This can be used to begin an iteration with next32().
    160    * Note that an iteration with next32PostInc(), beginning with,
    161    * e.g., setToStart() or firstPostInc(), is more efficient.
    162    * @return the first code point in its iteration range
    163    * @stable ICU 2.0
    164    */
    165   virtual UChar32       first32(void);
    166 
    167   /**
    168    * Sets the iterator to refer to the first code point in its
    169    * iteration range, returns that code point, and moves the position
    170    * to the second code point. This is an alternative to setToStart()
    171    * for forward iteration with next32PostInc().
    172    * @return the first code point in its iteration range.
    173    * @stable ICU 2.0
    174    */
    175   virtual UChar32       first32PostInc(void);
    176 
    177   /**
    178    * Sets the iterator to refer to the last code unit in its
    179    * iteration range, and returns that code unit.
    180    * This can be used to begin an iteration with previous().
    181    * @return the last code unit in its iteration range.
    182    * @stable ICU 2.0
    183    */
    184   virtual char16_t         last(void);
    185 
    186   /**
    187    * Sets the iterator to refer to the last code point in its
    188    * iteration range, and returns that code unit.
    189    * This can be used to begin an iteration with previous32().
    190    * @return the last code point in its iteration range.
    191    * @stable ICU 2.0
    192    */
    193   virtual UChar32       last32(void);
    194 
    195   /**
    196    * Sets the iterator to refer to the "position"-th code unit
    197    * in the text-storage object the iterator refers to, and
    198    * returns that code unit.
    199    * @param position the position within the text-storage object
    200    * @return the code unit
    201    * @stable ICU 2.0
    202    */
    203   virtual char16_t         setIndex(int32_t position);
    204 
    205   /**
    206    * Sets the iterator to refer to the beginning of the code point
    207    * that contains the "position"-th code unit
    208    * in the text-storage object the iterator refers to, and
    209    * returns that code point.
    210    * The current position is adjusted to the beginning of the code point
    211    * (its first code unit).
    212    * @param position the position within the text-storage object
    213    * @return the code unit
    214    * @stable ICU 2.0
    215    */
    216   virtual UChar32       setIndex32(int32_t position);
    217 
    218   /**
    219    * Returns the code unit the iterator currently refers to.
    220    * @return the code unit the iterator currently refers to.
    221    * @stable ICU 2.0
    222    */
    223   virtual char16_t         current(void) const;
    224 
    225   /**
    226    * Returns the code point the iterator currently refers to.
    227    * @return the code point the iterator currently refers to.
    228    * @stable ICU 2.0
    229    */
    230   virtual UChar32       current32(void) const;
    231 
    232   /**
    233    * Advances to the next code unit in the iteration range (toward
    234    * endIndex()), and returns that code unit.  If there are no more
    235    * code units to return, returns DONE.
    236    * @return the next code unit in the iteration range.
    237    * @stable ICU 2.0
    238    */
    239   virtual char16_t         next(void);
    240 
    241   /**
    242    * Gets the current code unit for returning and advances to the next code unit
    243    * in the iteration range
    244    * (toward endIndex()).  If there are
    245    * no more code units to return, returns DONE.
    246    * @return the current code unit.
    247    * @stable ICU 2.0
    248    */
    249   virtual char16_t         nextPostInc(void);
    250 
    251   /**
    252    * Advances to the next code point in the iteration range (toward
    253    * endIndex()), and returns that code point.  If there are no more
    254    * code points to return, returns DONE.
    255    * Note that iteration with "pre-increment" semantics is less
    256    * efficient than iteration with "post-increment" semantics
    257    * that is provided by next32PostInc().
    258    * @return the next code point in the iteration range.
    259    * @stable ICU 2.0
    260    */
    261   virtual UChar32       next32(void);
    262 
    263   /**
    264    * Gets the current code point for returning and advances to the next code point
    265    * in the iteration range
    266    * (toward endIndex()).  If there are
    267    * no more code points to return, returns DONE.
    268    * @return the current point.
    269    * @stable ICU 2.0
    270    */
    271   virtual UChar32       next32PostInc(void);
    272 
    273   /**
    274    * Returns FALSE if there are no more code units or code points
    275    * at or after the current position in the iteration range.
    276    * This is used with nextPostInc() or next32PostInc() in forward
    277    * iteration.
    278    * @return FALSE if there are no more code units or code points
    279    * at or after the current position in the iteration range.
    280    * @stable ICU 2.0
    281    */
    282   virtual UBool        hasNext();
    283 
    284   /**
    285    * Advances to the previous code unit in the iteration range (toward
    286    * startIndex()), and returns that code unit.  If there are no more
    287    * code units to return, returns DONE.
    288    * @return the previous code unit in the iteration range.
    289    * @stable ICU 2.0
    290    */
    291   virtual char16_t         previous(void);
    292 
    293   /**
    294    * Advances to the previous code point in the iteration range (toward
    295    * startIndex()), and returns that code point.  If there are no more
    296    * code points to return, returns DONE.
    297    * @return the previous code point in the iteration range.
    298    * @stable ICU 2.0
    299    */
    300   virtual UChar32       previous32(void);
    301 
    302   /**
    303    * Returns FALSE if there are no more code units or code points
    304    * before the current position in the iteration range.
    305    * This is used with previous() or previous32() in backward
    306    * iteration.
    307    * @return FALSE if there are no more code units or code points
    308    * before the current position in the iteration range.
    309    * @stable ICU 2.0
    310    */
    311   virtual UBool        hasPrevious();
    312 
    313   /**
    314    * Moves the current position relative to the start or end of the
    315    * iteration range, or relative to the current position itself.
    316    * The movement is expressed in numbers of code units forward
    317    * or backward by specifying a positive or negative delta.
    318    * @param delta the position relative to origin. A positive delta means forward;
    319    * a negative delta means backward.
    320    * @param origin Origin enumeration {kStart, kCurrent, kEnd}
    321    * @return the new position
    322    * @stable ICU 2.0
    323    */
    324   virtual int32_t      move(int32_t delta, EOrigin origin);
    325 
    326   /**
    327    * Moves the current position relative to the start or end of the
    328    * iteration range, or relative to the current position itself.
    329    * The movement is expressed in numbers of code points forward
    330    * or backward by specifying a positive or negative delta.
    331    * @param delta the position relative to origin. A positive delta means forward;
    332    * a negative delta means backward.
    333    * @param origin Origin enumeration {kStart, kCurrent, kEnd}
    334    * @return the new position
    335    * @stable ICU 2.0
    336    */
    337 #ifdef move32
    338    // One of the system headers right now is sometimes defining a conflicting macro we don't use
    339 #undef move32
    340 #endif
    341   virtual int32_t      move32(int32_t delta, EOrigin origin);
    342 
    343   /**
    344    * Sets the iterator to iterate over a new range of text
    345    * @stable ICU 2.0
    346    */
    347   void setText(ConstChar16Ptr newText, int32_t newTextLength);
    348 
    349   /**
    350    * Copies the char16_t array under iteration into the UnicodeString
    351    * referred to by "result".  Even if this iterator iterates across
    352    * only a part of this string, the whole string is copied.
    353    * @param result Receives a copy of the text under iteration.
    354    * @stable ICU 2.0
    355    */
    356   virtual void            getText(UnicodeString& result);
    357 
    358   /**
    359    * Return a class ID for this class (not really public)
    360    * @return a class ID for this class
    361    * @stable ICU 2.0
    362    */
    363   static UClassID         U_EXPORT2 getStaticClassID(void);
    364 
    365   /**
    366    * Return a class ID for this object (not really public)
    367    * @return a class ID for this object.
    368    * @stable ICU 2.0
    369    */
    370   virtual UClassID        getDynamicClassID(void) const;
    371 
    372 protected:
    373   /**
    374    * Protected constructor
    375    * @stable ICU 2.0
    376    */
    377   UCharCharacterIterator();
    378   /**
    379    * Protected member text
    380    * @stable ICU 2.0
    381    */
    382   const char16_t*            text;
    383 
    384 };
    385 
    386 U_NAMESPACE_END
    387 #endif
    388