Home | History | Annotate | Download | only in unicode
      1 /*
      2  ******************************************************************************
      3  *   Copyright (C) 1997-2008, International Business Machines
      4  *   Corporation and others.  All Rights Reserved.
      5  ******************************************************************************
      6  */
      7 
      8 /**
      9  * \file
     10  * \brief C++ API: Collation Element Iterator.
     11  */
     12 
     13 /**
     14 * File coleitr.h
     15 *
     16 *
     17 *
     18 * Created by: Helena Shih
     19 *
     20 * Modification History:
     21 *
     22 *  Date       Name        Description
     23 *
     24 *  8/18/97    helena      Added internal API documentation.
     25 * 08/03/98    erm         Synched with 1.2 version CollationElementIterator.java
     26 * 12/10/99    aliu        Ported Thai collation support from Java.
     27 * 01/25/01    swquek      Modified into a C++ wrapper calling C APIs (ucoliter.h)
     28 * 02/19/01    swquek      Removed CollationElementsIterator() since it is
     29 *                         private constructor and no calls are made to it
     30 */
     31 
     32 #ifndef COLEITR_H
     33 #define COLEITR_H
     34 
     35 #include "unicode/utypes.h"
     36 
     37 
     38 #if !UCONFIG_NO_COLLATION
     39 
     40 #include "unicode/uobject.h"
     41 #include "unicode/tblcoll.h"
     42 #include "unicode/ucoleitr.h"
     43 
     44 /**
     45  * The UCollationElements struct.
     46  * For usage in C programs.
     47  * @stable ICU 2.0
     48  */
     49 typedef struct UCollationElements UCollationElements;
     50 
     51 U_NAMESPACE_BEGIN
     52 
     53 /**
     54 * The CollationElementIterator class is used as an iterator to walk through
     55 * each character of an international string. Use the iterator to return the
     56 * ordering priority of the positioned character. The ordering priority of a
     57 * character, which we refer to as a key, defines how a character is collated in
     58 * the given collation object.
     59 * For example, consider the following in Spanish:
     60 * <pre>
     61 *        "ca" -> the first key is key('c') and second key is key('a').
     62 *        "cha" -> the first key is key('ch') and second key is key('a').</pre>
     63 * And in German,
     64 * <pre> \htmlonly       "&#x00E6;b"-> the first key is key('a'), the second key is key('e'), and
     65 *        the third key is key('b'). \endhtmlonly </pre>
     66 * The key of a character, is an integer composed of primary order(short),
     67 * secondary order(char), and tertiary order(char). Java strictly defines the
     68 * size and signedness of its primitive data types. Therefore, the static
     69 * functions primaryOrder(), secondaryOrder(), and tertiaryOrder() return
     70 * int32_t to ensure the correctness of the key value.
     71 * <p>Example of the iterator usage: (without error checking)
     72 * <pre>
     73 * \code
     74 *   void CollationElementIterator_Example()
     75 *   {
     76 *       UnicodeString str = "This is a test";
     77 *       UErrorCode success = U_ZERO_ERROR;
     78 *       RuleBasedCollator* rbc =
     79 *           (RuleBasedCollator*) RuleBasedCollator::createInstance(success);
     80 *       CollationElementIterator* c =
     81 *           rbc->createCollationElementIterator( str );
     82 *       int32_t order = c->next(success);
     83 *       c->reset();
     84 *       order = c->previous(success);
     85 *       delete c;
     86 *       delete rbc;
     87 *   }
     88 * \endcode
     89 * </pre>
     90 * <p>
     91 * CollationElementIterator::next returns the collation order of the next
     92 * character based on the comparison level of the collator.
     93 * CollationElementIterator::previous returns the collation order of the
     94 * previous character based on the comparison level of the collator.
     95 * The Collation Element Iterator moves only in one direction between calls to
     96 * CollationElementIterator::reset. That is, CollationElementIterator::next()
     97 * and CollationElementIterator::previous can not be inter-used. Whenever
     98 * CollationElementIterator::previous is to be called after
     99 * CollationElementIterator::next() or vice versa,
    100 * CollationElementIterator::reset has to be called first to reset the status,
    101 * shifting pointers to either the end or the start of the string. Hence at the
    102 * next call of CollationElementIterator::previous or
    103 * CollationElementIterator::next(), the first or last collation order will be
    104 * returned.
    105 * If a change of direction is done without a CollationElementIterator::reset(),
    106 * the result is undefined.
    107 * The result of a forward iterate (CollationElementIterator::next) and
    108 * reversed result of the backward iterate (CollationElementIterator::previous)
    109 * on the same string are equivalent, if collation orders with the value
    110 * UCOL_IGNORABLE are ignored.
    111 * Character based on the comparison level of the collator.  A collation order
    112 * consists of primary order, secondary order and tertiary order.  The data
    113 * type of the collation order is <strong>t_int32</strong>.
    114 *
    115 * Note, CollationElementIterator should not be subclassed.
    116 * @see     Collator
    117 * @see     RuleBasedCollator
    118 * @version 1.8 Jan 16 2001
    119 */
    120 class U_I18N_API CollationElementIterator : public UObject {
    121 public:
    122 
    123     // CollationElementIterator public data member ------------------------------
    124 
    125     enum {
    126         /**
    127          * NULLORDER indicates that an error has occured while processing
    128          * @stable ICU 2.0
    129          */
    130         NULLORDER = (int32_t)0xffffffff
    131     };
    132 
    133     // CollationElementIterator public constructor/destructor -------------------
    134 
    135     /**
    136     * Copy constructor.
    137     *
    138     * @param other    the object to be copied from
    139     * @stable ICU 2.0
    140     */
    141     CollationElementIterator(const CollationElementIterator& other);
    142 
    143     /**
    144     * Destructor
    145     * @stable ICU 2.0
    146     */
    147     virtual ~CollationElementIterator();
    148 
    149     // CollationElementIterator public methods ----------------------------------
    150 
    151     /**
    152     * Returns true if "other" is the same as "this"
    153     *
    154     * @param other    the object to be compared
    155     * @return         true if "other" is the same as "this"
    156     * @stable ICU 2.0
    157     */
    158     UBool operator==(const CollationElementIterator& other) const;
    159 
    160     /**
    161     * Returns true if "other" is not the same as "this".
    162     *
    163     * @param other    the object to be compared
    164     * @return         true if "other" is not the same as "this"
    165     * @stable ICU 2.0
    166     */
    167     UBool operator!=(const CollationElementIterator& other) const;
    168 
    169     /**
    170     * Resets the cursor to the beginning of the string.
    171     * @stable ICU 2.0
    172     */
    173     void reset(void);
    174 
    175     /**
    176     * Gets the ordering priority of the next character in the string.
    177     * @param status the error code status.
    178     * @return the next character's ordering. otherwise returns NULLORDER if an
    179     *         error has occured or if the end of string has been reached
    180     * @stable ICU 2.0
    181     */
    182     int32_t next(UErrorCode& status);
    183 
    184     /**
    185     * Get the ordering priority of the previous collation element in the string.
    186     * @param status the error code status.
    187     * @return the previous element's ordering. otherwise returns NULLORDER if an
    188     *         error has occured or if the start of string has been reached
    189     * @stable ICU 2.0
    190     */
    191     int32_t previous(UErrorCode& status);
    192 
    193     /**
    194     * Gets the primary order of a collation order.
    195     * @param order the collation order
    196     * @return the primary order of a collation order.
    197     * @stable ICU 2.0
    198     */
    199     static inline int32_t primaryOrder(int32_t order);
    200 
    201     /**
    202     * Gets the secondary order of a collation order.
    203     * @param order the collation order
    204     * @return the secondary order of a collation order.
    205     * @stable ICU 2.0
    206     */
    207     static inline int32_t secondaryOrder(int32_t order);
    208 
    209     /**
    210     * Gets the tertiary order of a collation order.
    211     * @param order the collation order
    212     * @return the tertiary order of a collation order.
    213     * @stable ICU 2.0
    214     */
    215     static inline int32_t tertiaryOrder(int32_t order);
    216 
    217     /**
    218     * Return the maximum length of any expansion sequences that end with the
    219     * specified comparison order.
    220     * @param order a collation order returned by previous or next.
    221     * @return maximum size of the expansion sequences ending with the collation
    222     *         element or 1 if collation element does not occur at the end of any
    223     *         expansion sequence
    224     * @stable ICU 2.0
    225     */
    226     int32_t getMaxExpansion(int32_t order) const;
    227 
    228     /**
    229     * Gets the comparison order in the desired strength. Ignore the other
    230     * differences.
    231     * @param order The order value
    232     * @stable ICU 2.0
    233     */
    234     int32_t strengthOrder(int32_t order) const;
    235 
    236     /**
    237     * Sets the source string.
    238     * @param str the source string.
    239     * @param status the error code status.
    240     * @stable ICU 2.0
    241     */
    242     void setText(const UnicodeString& str, UErrorCode& status);
    243 
    244     /**
    245     * Sets the source string.
    246     * @param str the source character iterator.
    247     * @param status the error code status.
    248     * @stable ICU 2.0
    249     */
    250     void setText(CharacterIterator& str, UErrorCode& status);
    251 
    252     /**
    253     * Checks if a comparison order is ignorable.
    254     * @param order the collation order.
    255     * @return TRUE if a character is ignorable, FALSE otherwise.
    256     * @stable ICU 2.0
    257     */
    258     static inline UBool isIgnorable(int32_t order);
    259 
    260     /**
    261     * Gets the offset of the currently processed character in the source string.
    262     * @return the offset of the character.
    263     * @stable ICU 2.0
    264     */
    265     int32_t getOffset(void) const;
    266 
    267     /**
    268     * Sets the offset of the currently processed character in the source string.
    269     * @param newOffset the new offset.
    270     * @param status the error code status.
    271     * @return the offset of the character.
    272     * @stable ICU 2.0
    273     */
    274     void setOffset(int32_t newOffset, UErrorCode& status);
    275 
    276     /**
    277     * ICU "poor man's RTTI", returns a UClassID for the actual class.
    278     *
    279     * @stable ICU 2.2
    280     */
    281     virtual UClassID getDynamicClassID() const;
    282 
    283     /**
    284     * ICU "poor man's RTTI", returns a UClassID for this class.
    285     *
    286     * @stable ICU 2.2
    287     */
    288     static UClassID U_EXPORT2 getStaticClassID();
    289 
    290 protected:
    291 
    292     // CollationElementIterator protected constructors --------------------------
    293     /**
    294     * @stable ICU 2.0
    295     */
    296     friend class RuleBasedCollator;
    297 
    298     /**
    299     * CollationElementIterator constructor. This takes the source string and the
    300     * collation object. The cursor will walk thru the source string based on the
    301     * predefined collation rules. If the source string is empty, NULLORDER will
    302     * be returned on the calls to next().
    303     * @param sourceText    the source string.
    304     * @param order         the collation object.
    305     * @param status        the error code status.
    306     * @stable ICU 2.0
    307     */
    308     CollationElementIterator(const UnicodeString& sourceText,
    309         const RuleBasedCollator* order, UErrorCode& status);
    310 
    311     /**
    312     * CollationElementIterator constructor. This takes the source string and the
    313     * collation object.  The cursor will walk thru the source string based on the
    314     * predefined collation rules.  If the source string is empty, NULLORDER will
    315     * be returned on the calls to next().
    316     * @param sourceText    the source string.
    317     * @param order         the collation object.
    318     * @param status        the error code status.
    319     * @stable ICU 2.0
    320     */
    321     CollationElementIterator(const CharacterIterator& sourceText,
    322         const RuleBasedCollator* order, UErrorCode& status);
    323 
    324     // CollationElementIterator protected methods -------------------------------
    325 
    326     /**
    327     * Assignment operator
    328     *
    329     * @param other    the object to be copied
    330     * @stable ICU 2.0
    331     */
    332     const CollationElementIterator&
    333         operator=(const CollationElementIterator& other);
    334 
    335 private:
    336     CollationElementIterator(); // default constructor not implemented
    337 
    338     // CollationElementIterator private data members ----------------------------
    339 
    340     /**
    341     * Data wrapper for collation elements
    342     */
    343     UCollationElements *m_data_;
    344 
    345     /**
    346     * Indicates if m_data_ belongs to this object.
    347     */
    348     UBool isDataOwned_;
    349 
    350 };
    351 
    352 // CollationElementIterator inline method defination --------------------------
    353 
    354 /**
    355 * Get the primary order of a collation order.
    356 * @param order the collation order
    357 * @return the primary order of a collation order.
    358 */
    359 inline int32_t CollationElementIterator::primaryOrder(int32_t order)
    360 {
    361     order &= RuleBasedCollator::PRIMARYORDERMASK;
    362     return (order >> RuleBasedCollator::PRIMARYORDERSHIFT);
    363 }
    364 
    365 /**
    366 * Get the secondary order of a collation order.
    367 * @param order the collation order
    368 * @return the secondary order of a collation order.
    369 */
    370 inline int32_t CollationElementIterator::secondaryOrder(int32_t order)
    371 {
    372     order = order & RuleBasedCollator::SECONDARYORDERMASK;
    373     return (order >> RuleBasedCollator::SECONDARYORDERSHIFT);
    374 }
    375 
    376 /**
    377 * Get the tertiary order of a collation order.
    378 * @param order the collation order
    379 * @return the tertiary order of a collation order.
    380 */
    381 inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
    382 {
    383     return (order &= RuleBasedCollator::TERTIARYORDERMASK);
    384 }
    385 
    386 inline int32_t CollationElementIterator::getMaxExpansion(int32_t order) const
    387 {
    388     return ucol_getMaxExpansion(m_data_, (uint32_t)order);
    389 }
    390 
    391 inline UBool CollationElementIterator::isIgnorable(int32_t order)
    392 {
    393     return (primaryOrder(order) == RuleBasedCollator::PRIMIGNORABLE);
    394 }
    395 
    396 U_NAMESPACE_END
    397 
    398 #endif /* #if !UCONFIG_NO_COLLATION */
    399 
    400 #endif
    401