Home | History | Annotate | Download | only in unicode
      1 /*
      2  ********************************************************************
      3  * COPYRIGHT:
      4  * Copyright (c) 1996-2006, International Business Machines Corporation and
      5  * others. All Rights Reserved.
      6  ********************************************************************
      7  */
      8 
      9 #ifndef NORMLZR_H
     10 #define NORMLZR_H
     11 
     12 #include "unicode/utypes.h"
     13 
     14 /**
     15  * \file
     16  * \brief C++ API: Unicode Normalization
     17  */
     18 
     19 #if !UCONFIG_NO_NORMALIZATION
     20 
     21 #include "unicode/uobject.h"
     22 #include "unicode/unistr.h"
     23 #include "unicode/chariter.h"
     24 #include "unicode/unorm.h"
     25 
     26 
     27 struct UCharIterator;
     28 typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
     29 
     30 U_NAMESPACE_BEGIN
     31 /**
     32  * The Normalizer class supports the standard normalization forms described in
     33  * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
     34  * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
     35  *
     36  * The Normalizer class consists of two parts:
     37  * - static functions that normalize strings or test if strings are normalized
     38  * - a Normalizer object is an iterator that takes any kind of text and
     39  *   provides iteration over its normalized form
     40  *
     41  * The Normalizer class is not suitable for subclassing.
     42  *
     43  * The static functions are basically wrappers around the C implementation,
     44  * using UnicodeString instead of UChar*.
     45  * For basic information about normalization forms and details about the C API
     46  * please see the documentation in unorm.h.
     47  *
     48  * The iterator API with the Normalizer constructors and the non-static functions
     49  * uses a CharacterIterator as input. It is possible to pass a string which
     50  * is then internally wrapped in a CharacterIterator.
     51  * The input text is not normalized all at once, but incrementally where needed
     52  * (providing efficient random access).
     53  * This allows to pass in a large text but spend only a small amount of time
     54  * normalizing a small part of that text.
     55  * However, if the entire text is normalized, then the iterator will be
     56  * slower than normalizing the entire text at once and iterating over the result.
     57  * A possible use of the Normalizer iterator is also to report an index into the
     58  * original text that is close to where the normalized characters come from.
     59  *
     60  * <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0.
     61  * The earlier implementation reported the getIndex() inconsistently,
     62  * and previous() could not be used after setIndex(), next(), first(), and current().
     63  *
     64  * Normalizer allows to start normalizing from anywhere in the input text by
     65  * calling setIndexOnly(), first(), or last().
     66  * Without calling any of these, the iterator will start at the beginning of the text.
     67  *
     68  * At any time, next() returns the next normalized code point (UChar32),
     69  * with post-increment semantics (like CharacterIterator::next32PostInc()).
     70  * previous() returns the previous normalized code point (UChar32),
     71  * with pre-decrement semantics (like CharacterIterator::previous32()).
     72  *
     73  * current() returns the current code point
     74  * (respectively the one at the newly set index) without moving
     75  * the getIndex(). Note that if the text at the current position
     76  * needs to be normalized, then these functions will do that.
     77  * (This is why current() is not const.)
     78  * It is more efficient to call setIndexOnly() instead, which does not
     79  * normalize.
     80  *
     81  * getIndex() always refers to the position in the input text where the normalized
     82  * code points are returned from. It does not always change with each returned
     83  * code point.
     84  * The code point that is returned from any of the functions
     85  * corresponds to text at or after getIndex(), according to the
     86  * function's iteration semantics (post-increment or pre-decrement).
     87  *
     88  * next() returns a code point from at or after the getIndex()
     89  * from before the next() call. After the next() call, the getIndex()
     90  * might have moved to where the next code point will be returned from
     91  * (from a next() or current() call).
     92  * This is semantically equivalent to array access with array[index++]
     93  * (post-increment semantics).
     94  *
     95  * previous() returns a code point from at or after the getIndex()
     96  * from after the previous() call.
     97  * This is semantically equivalent to array access with array[--index]
     98  * (pre-decrement semantics).
     99  *
    100  * Internally, the Normalizer iterator normalizes a small piece of text
    101  * starting at the getIndex() and ending at a following "safe" index.
    102  * The normalized results is stored in an internal string buffer, and
    103  * the code points are iterated from there.
    104  * With multiple iteration calls, this is repeated until the next piece
    105  * of text needs to be normalized, and the getIndex() needs to be moved.
    106  *
    107  * The following "safe" index, the internal buffer, and the secondary
    108  * iteration index into that buffer are not exposed on the API.
    109  * This also means that it is currently not practical to return to
    110  * a particular, arbitrary position in the text because one would need to
    111  * know, and be able to set, in addition to the getIndex(), at least also the
    112  * current index into the internal buffer.
    113  * It is currently only possible to observe when getIndex() changes
    114  * (with careful consideration of the iteration semantics),
    115  * at which time the internal index will be 0.
    116  * For example, if getIndex() is different after next() than before it,
    117  * then the internal index is 0 and one can return to this getIndex()
    118  * later with setIndexOnly().
    119  *
    120  * @author Laura Werner, Mark Davis, Markus Scherer
    121  * @stable ICU 2.0
    122  */
    123 class U_COMMON_API Normalizer : public UObject {
    124 public:
    125   /**
    126    * If DONE is returned from an iteration function that returns a code point,
    127    * then there are no more normalization results available.
    128    * @stable ICU 2.0
    129    */
    130   enum {
    131       DONE=0xffff
    132   };
    133 
    134   // Constructors
    135 
    136   /**
    137    * Creates a new <code>Normalizer</code> object for iterating over the
    138    * normalized form of a given string.
    139    * <p>
    140    * @param str   The string to be normalized.  The normalization
    141    *              will start at the beginning of the string.
    142    *
    143    * @param mode  The normalization mode.
    144    * @stable ICU 2.0
    145    */
    146   Normalizer(const UnicodeString& str, UNormalizationMode mode);
    147 
    148   /**
    149    * Creates a new <code>Normalizer</code> object for iterating over the
    150    * normalized form of a given string.
    151    * <p>
    152    * @param str   The string to be normalized.  The normalization
    153    *              will start at the beginning of the string.
    154    *
    155    * @param length Length of the string, or -1 if NUL-terminated.
    156    * @param mode  The normalization mode.
    157    * @stable ICU 2.0
    158    */
    159   Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
    160 
    161   /**
    162    * Creates a new <code>Normalizer</code> object for iterating over the
    163    * normalized form of the given text.
    164    * <p>
    165    * @param iter  The input text to be normalized.  The normalization
    166    *              will start at the beginning of the string.
    167    *
    168    * @param mode  The normalization mode.
    169    * @stable ICU 2.0
    170    */
    171   Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
    172 
    173   /**
    174    * Copy constructor.
    175    * @param copy The object to be copied.
    176    * @stable ICU 2.0
    177    */
    178   Normalizer(const Normalizer& copy);
    179 
    180   /**
    181    * Destructor
    182    * @stable ICU 2.0
    183    */
    184   virtual ~Normalizer();
    185 
    186 
    187   //-------------------------------------------------------------------------
    188   // Static utility methods
    189   //-------------------------------------------------------------------------
    190 
    191   /**
    192    * Normalizes a <code>UnicodeString</code> according to the specified normalization mode.
    193    * This is a wrapper for unorm_normalize(), using UnicodeString's.
    194    *
    195    * The <code>options</code> parameter specifies which optional
    196    * <code>Normalizer</code> features are to be enabled for this operation.
    197    *
    198    * @param source    the input string to be normalized.
    199    * @param mode      the normalization mode
    200    * @param options   the optional features to be enabled (0 for no options)
    201    * @param result    The normalized string (on output).
    202    * @param status    The error code.
    203    * @stable ICU 2.0
    204    */
    205   static void U_EXPORT2 normalize(const UnicodeString& source,
    206                         UNormalizationMode mode, int32_t options,
    207                         UnicodeString& result,
    208                         UErrorCode &status);
    209 
    210   /**
    211    * Compose a <code>UnicodeString</code>.
    212    * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
    213    * This is a wrapper for unorm_normalize(), using UnicodeString's.
    214    *
    215    * The <code>options</code> parameter specifies which optional
    216    * <code>Normalizer</code> features are to be enabled for this operation.
    217    *
    218    * @param source    the string to be composed.
    219    * @param compat    Perform compatibility decomposition before composition.
    220    *                  If this argument is <code>FALSE</code>, only canonical
    221    *                  decomposition will be performed.
    222    * @param options   the optional features to be enabled (0 for no options)
    223    * @param result    The composed string (on output).
    224    * @param status    The error code.
    225    * @stable ICU 2.0
    226    */
    227   static void U_EXPORT2 compose(const UnicodeString& source,
    228                       UBool compat, int32_t options,
    229                       UnicodeString& result,
    230                       UErrorCode &status);
    231 
    232   /**
    233    * Static method to decompose a <code>UnicodeString</code>.
    234    * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
    235    * This is a wrapper for unorm_normalize(), using UnicodeString's.
    236    *
    237    * The <code>options</code> parameter specifies which optional
    238    * <code>Normalizer</code> features are to be enabled for this operation.
    239    *
    240    * @param source    the string to be decomposed.
    241    * @param compat    Perform compatibility decomposition.
    242    *                  If this argument is <code>FALSE</code>, only canonical
    243    *                  decomposition will be performed.
    244    * @param options   the optional features to be enabled (0 for no options)
    245    * @param result    The decomposed string (on output).
    246    * @param status    The error code.
    247    * @stable ICU 2.0
    248    */
    249   static void U_EXPORT2 decompose(const UnicodeString& source,
    250                         UBool compat, int32_t options,
    251                         UnicodeString& result,
    252                         UErrorCode &status);
    253 
    254   /**
    255    * Performing quick check on a string, to quickly determine if the string is
    256    * in a particular normalization format.
    257    * This is a wrapper for unorm_quickCheck(), using a UnicodeString.
    258    *
    259    * Three types of result can be returned UNORM_YES, UNORM_NO or
    260    * UNORM_MAYBE. Result UNORM_YES indicates that the argument
    261    * string is in the desired normalized format, UNORM_NO determines that
    262    * argument string is not in the desired normalized format. A
    263    * UNORM_MAYBE result indicates that a more thorough check is required,
    264    * the user may have to put the string in its normalized form and compare the
    265    * results.
    266    * @param source       string for determining if it is in a normalized format
    267    * @param mode         normalization format
    268    * @param status A reference to a UErrorCode to receive any errors
    269    * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
    270    *
    271    * @see isNormalized
    272    * @stable ICU 2.0
    273    */
    274   static inline UNormalizationCheckResult
    275   quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
    276 
    277   /**
    278    * Performing quick check on a string; same as the other version of quickCheck
    279    * but takes an extra options parameter like most normalization functions.
    280    *
    281    * @param source       string for determining if it is in a normalized format
    282    * @param mode         normalization format
    283    * @param options      the optional features to be enabled (0 for no options)
    284    * @param status A reference to a UErrorCode to receive any errors
    285    * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
    286    *
    287    * @see isNormalized
    288    * @stable ICU 2.6
    289    */
    290   static inline UNormalizationCheckResult
    291   quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
    292 
    293   /**
    294    * Test if a string is in a given normalization form.
    295    * This is semantically equivalent to source.equals(normalize(source, mode)) .
    296    *
    297    * Unlike unorm_quickCheck(), this function returns a definitive result,
    298    * never a "maybe".
    299    * For NFD, NFKD, and FCD, both functions work exactly the same.
    300    * For NFC and NFKC where quickCheck may return "maybe", this function will
    301    * perform further tests to arrive at a TRUE/FALSE result.
    302    *
    303    * @param src        String that is to be tested if it is in a normalization format.
    304    * @param mode       Which normalization form to test for.
    305    * @param errorCode  ICU error code in/out parameter.
    306    *                   Must fulfill U_SUCCESS before the function call.
    307    * @return Boolean value indicating whether the source string is in the
    308    *         "mode" normalization form.
    309    *
    310    * @see quickCheck
    311    * @stable ICU 2.2
    312    */
    313   static inline UBool
    314   isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
    315 
    316   /**
    317    * Test if a string is in a given normalization form; same as the other version of isNormalized
    318    * but takes an extra options parameter like most normalization functions.
    319    *
    320    * @param src        String that is to be tested if it is in a normalization format.
    321    * @param mode       Which normalization form to test for.
    322    * @param options      the optional features to be enabled (0 for no options)
    323    * @param errorCode  ICU error code in/out parameter.
    324    *                   Must fulfill U_SUCCESS before the function call.
    325    * @return Boolean value indicating whether the source string is in the
    326    *         "mode" normalization form.
    327    *
    328    * @see quickCheck
    329    * @stable ICU 2.6
    330    */
    331   static inline UBool
    332   isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
    333 
    334   /**
    335    * Concatenate normalized strings, making sure that the result is normalized as well.
    336    *
    337    * If both the left and the right strings are in
    338    * the normalization form according to "mode/options",
    339    * then the result will be
    340    *
    341    * \code
    342    *     dest=normalize(left+right, mode, options)
    343    * \endcode
    344    *
    345    * For details see unorm_concatenate in unorm.h.
    346    *
    347    * @param left Left source string.
    348    * @param right Right source string.
    349    * @param result The output string.
    350    * @param mode The normalization mode.
    351    * @param options A bit set of normalization options.
    352    * @param errorCode ICU error code in/out parameter.
    353    *                   Must fulfill U_SUCCESS before the function call.
    354    * @return result
    355    *
    356    * @see unorm_concatenate
    357    * @see normalize
    358    * @see unorm_next
    359    * @see unorm_previous
    360    *
    361    * @stable ICU 2.1
    362    */
    363   static UnicodeString &
    364   U_EXPORT2 concatenate(UnicodeString &left, UnicodeString &right,
    365               UnicodeString &result,
    366               UNormalizationMode mode, int32_t options,
    367               UErrorCode &errorCode);
    368 
    369   /**
    370    * Compare two strings for canonical equivalence.
    371    * Further options include case-insensitive comparison and
    372    * code point order (as opposed to code unit order).
    373    *
    374    * Canonical equivalence between two strings is defined as their normalized
    375    * forms (NFD or NFC) being identical.
    376    * This function compares strings incrementally instead of normalizing
    377    * (and optionally case-folding) both strings entirely,
    378    * improving performance significantly.
    379    *
    380    * Bulk normalization is only necessary if the strings do not fulfill the FCD
    381    * conditions. Only in this case, and only if the strings are relatively long,
    382    * is memory allocated temporarily.
    383    * For FCD strings and short non-FCD strings there is no memory allocation.
    384    *
    385    * Semantically, this is equivalent to
    386    *   strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
    387    * where code point order and foldCase are all optional.
    388    *
    389    * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
    390    * the case folding must be performed first, then the normalization.
    391    *
    392    * @param s1 First source string.
    393    * @param s2 Second source string.
    394    *
    395    * @param options A bit set of options:
    396    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    397    *     Case-sensitive comparison in code unit order, and the input strings
    398    *     are quick-checked for FCD.
    399    *
    400    *   - UNORM_INPUT_IS_FCD
    401    *     Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
    402    *     If not set, the function will quickCheck for FCD
    403    *     and normalize if necessary.
    404    *
    405    *   - U_COMPARE_CODE_POINT_ORDER
    406    *     Set to choose code point order instead of code unit order
    407    *     (see u_strCompare for details).
    408    *
    409    *   - U_COMPARE_IGNORE_CASE
    410    *     Set to compare strings case-insensitively using case folding,
    411    *     instead of case-sensitively.
    412    *     If set, then the following case folding options are used.
    413    *
    414    *   - Options as used with case-insensitive comparisons, currently:
    415    *
    416    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    417    *    (see u_strCaseCompare for details)
    418    *
    419    *   - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
    420    *
    421    * @param errorCode ICU error code in/out parameter.
    422    *                  Must fulfill U_SUCCESS before the function call.
    423    * @return <0 or 0 or >0 as usual for string comparisons
    424    *
    425    * @see unorm_compare
    426    * @see normalize
    427    * @see UNORM_FCD
    428    * @see u_strCompare
    429    * @see u_strCaseCompare
    430    *
    431    * @stable ICU 2.2
    432    */
    433   static inline int32_t
    434   compare(const UnicodeString &s1, const UnicodeString &s2,
    435           uint32_t options,
    436           UErrorCode &errorCode);
    437 
    438   //-------------------------------------------------------------------------
    439   // Iteration API
    440   //-------------------------------------------------------------------------
    441 
    442   /**
    443    * Return the current character in the normalized text.
    444    * current() may need to normalize some text at getIndex().
    445    * The getIndex() is not changed.
    446    *
    447    * @return the current normalized code point
    448    * @stable ICU 2.0
    449    */
    450   UChar32              current(void);
    451 
    452   /**
    453    * Return the first character in the normalized text.
    454    * This is equivalent to setIndexOnly(startIndex()) followed by next().
    455    * (Post-increment semantics.)
    456    *
    457    * @return the first normalized code point
    458    * @stable ICU 2.0
    459    */
    460   UChar32              first(void);
    461 
    462   /**
    463    * Return the last character in the normalized text.
    464    * This is equivalent to setIndexOnly(endIndex()) followed by previous().
    465    * (Pre-decrement semantics.)
    466    *
    467    * @return the last normalized code point
    468    * @stable ICU 2.0
    469    */
    470   UChar32              last(void);
    471 
    472   /**
    473    * Return the next character in the normalized text.
    474    * (Post-increment semantics.)
    475    * If the end of the text has already been reached, DONE is returned.
    476    * The DONE value could be confused with a U+FFFF non-character code point
    477    * in the text. If this is possible, you can test getIndex()<endIndex()
    478    * before calling next(), or (getIndex()<endIndex() || last()!=DONE)
    479    * after calling next(). (Calling last() will change the iterator state!)
    480    *
    481    * The C API unorm_next() is more efficient and does not have this ambiguity.
    482    *
    483    * @return the next normalized code point
    484    * @stable ICU 2.0
    485    */
    486   UChar32              next(void);
    487 
    488   /**
    489    * Return the previous character in the normalized text and decrement.
    490    * (Pre-decrement semantics.)
    491    * If the beginning of the text has already been reached, DONE is returned.
    492    * The DONE value could be confused with a U+FFFF non-character code point
    493    * in the text. If this is possible, you can test
    494    * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change
    495    * the iterator state!)
    496    *
    497    * The C API unorm_previous() is more efficient and does not have this ambiguity.
    498    *
    499    * @return the previous normalized code point
    500    * @stable ICU 2.0
    501    */
    502   UChar32              previous(void);
    503 
    504   /**
    505    * Set the iteration position in the input text that is being normalized,
    506    * without any immediate normalization.
    507    * After setIndexOnly(), getIndex() will return the same index that is
    508    * specified here.
    509    *
    510    * @param index the desired index in the input text.
    511    * @stable ICU 2.0
    512    */
    513   void                 setIndexOnly(int32_t index);
    514 
    515   /**
    516    * Reset the index to the beginning of the text.
    517    * This is equivalent to setIndexOnly(startIndex)).
    518    * @stable ICU 2.0
    519    */
    520   void                reset(void);
    521 
    522   /**
    523    * Retrieve the current iteration position in the input text that is
    524    * being normalized.
    525    *
    526    * A following call to next() will return a normalized code point from
    527    * the input text at or after this index.
    528    *
    529    * After a call to previous(), getIndex() will point at or before the
    530    * position in the input text where the normalized code point
    531    * was returned from with previous().
    532    *
    533    * @return the current index in the input text
    534    * @stable ICU 2.0
    535    */
    536   int32_t            getIndex(void) const;
    537 
    538   /**
    539    * Retrieve the index of the start of the input text. This is the begin index
    540    * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string
    541    * over which this <code>Normalizer</code> is iterating.
    542    *
    543    * @return the smallest index in the input text where the Normalizer operates
    544    * @stable ICU 2.0
    545    */
    546   int32_t            startIndex(void) const;
    547 
    548   /**
    549    * Retrieve the index of the end of the input text. This is the end index
    550    * of the <code>CharacterIterator</code> or the length of the string
    551    * over which this <code>Normalizer</code> is iterating.
    552    * This end index is exclusive, i.e., the Normalizer operates only on characters
    553    * before this index.
    554    *
    555    * @return the first index in the input text where the Normalizer does not operate
    556    * @stable ICU 2.0
    557    */
    558   int32_t            endIndex(void) const;
    559 
    560   /**
    561    * Returns TRUE when both iterators refer to the same character in the same
    562    * input text.
    563    *
    564    * @param that a Normalizer object to compare this one to
    565    * @return comparison result
    566    * @stable ICU 2.0
    567    */
    568   UBool        operator==(const Normalizer& that) const;
    569 
    570   /**
    571    * Returns FALSE when both iterators refer to the same character in the same
    572    * input text.
    573    *
    574    * @param that a Normalizer object to compare this one to
    575    * @return comparison result
    576    * @stable ICU 2.0
    577    */
    578   inline UBool        operator!=(const Normalizer& that) const;
    579 
    580   /**
    581    * Returns a pointer to a new Normalizer that is a clone of this one.
    582    * The caller is responsible for deleting the new clone.
    583    * @return a pointer to a new Normalizer
    584    * @stable ICU 2.0
    585    */
    586   Normalizer*        clone(void) const;
    587 
    588   /**
    589    * Generates a hash code for this iterator.
    590    *
    591    * @return the hash code
    592    * @stable ICU 2.0
    593    */
    594   int32_t                hashCode(void) const;
    595 
    596   //-------------------------------------------------------------------------
    597   // Property access methods
    598   //-------------------------------------------------------------------------
    599 
    600   /**
    601    * Set the normalization mode for this object.
    602    * <p>
    603    * <b>Note:</b>If the normalization mode is changed while iterating
    604    * over a string, calls to {@link #next() } and {@link #previous() } may
    605    * return previously buffers characters in the old normalization mode
    606    * until the iteration is able to re-sync at the next base character.
    607    * It is safest to call {@link #setIndexOnly }, {@link #reset() },
    608    * {@link #setText }, {@link #first() },
    609    * {@link #last() }, etc. after calling <code>setMode</code>.
    610    * <p>
    611    * @param newMode the new mode for this <code>Normalizer</code>.
    612    * @see #getUMode
    613    * @stable ICU 2.0
    614    */
    615   void setMode(UNormalizationMode newMode);
    616 
    617   /**
    618    * Return the normalization mode for this object.
    619    *
    620    * This is an unusual name because there used to be a getMode() that
    621    * returned a different type.
    622    *
    623    * @return the mode for this <code>Normalizer</code>
    624    * @see #setMode
    625    * @stable ICU 2.0
    626    */
    627   UNormalizationMode getUMode(void) const;
    628 
    629   /**
    630    * Set options that affect this <code>Normalizer</code>'s operation.
    631    * Options do not change the basic composition or decomposition operation
    632    * that is being performed, but they control whether
    633    * certain optional portions of the operation are done.
    634    * Currently the only available option is obsolete.
    635    *
    636    * It is possible to specify multiple options that are all turned on or off.
    637    *
    638    * @param   option  the option(s) whose value is/are to be set.
    639    * @param   value   the new setting for the option.  Use <code>TRUE</code> to
    640    *                  turn the option(s) on and <code>FALSE</code> to turn it/them off.
    641    *
    642    * @see #getOption
    643    * @stable ICU 2.0
    644    */
    645   void setOption(int32_t option,
    646          UBool value);
    647 
    648   /**
    649    * Determine whether an option is turned on or off.
    650    * If multiple options are specified, then the result is TRUE if any
    651    * of them are set.
    652    * <p>
    653    * @param option the option(s) that are to be checked
    654    * @return TRUE if any of the option(s) are set
    655    * @see #setOption
    656    * @stable ICU 2.0
    657    */
    658   UBool getOption(int32_t option) const;
    659 
    660   /**
    661    * Set the input text over which this <code>Normalizer</code> will iterate.
    662    * The iteration position is set to the beginning.
    663    *
    664    * @param newText a string that replaces the current input text
    665    * @param status a UErrorCode
    666    * @stable ICU 2.0
    667    */
    668   void setText(const UnicodeString& newText,
    669            UErrorCode &status);
    670 
    671   /**
    672    * Set the input text over which this <code>Normalizer</code> will iterate.
    673    * The iteration position is set to the beginning.
    674    *
    675    * @param newText a CharacterIterator object that replaces the current input text
    676    * @param status a UErrorCode
    677    * @stable ICU 2.0
    678    */
    679   void setText(const CharacterIterator& newText,
    680            UErrorCode &status);
    681 
    682   /**
    683    * Set the input text over which this <code>Normalizer</code> will iterate.
    684    * The iteration position is set to the beginning.
    685    *
    686    * @param newText a string that replaces the current input text
    687    * @param length the length of the string, or -1 if NUL-terminated
    688    * @param status a UErrorCode
    689    * @stable ICU 2.0
    690    */
    691   void setText(const UChar* newText,
    692                     int32_t length,
    693             UErrorCode &status);
    694   /**
    695    * Copies the input text into the UnicodeString argument.
    696    *
    697    * @param result Receives a copy of the text under iteration.
    698    * @stable ICU 2.0
    699    */
    700   void            getText(UnicodeString&  result);
    701 
    702   /**
    703    * ICU "poor man's RTTI", returns a UClassID for this class.
    704    * @returns a UClassID for this class.
    705    * @stable ICU 2.2
    706    */
    707   static UClassID U_EXPORT2 getStaticClassID();
    708 
    709   /**
    710    * ICU "poor man's RTTI", returns a UClassID for the actual class.
    711    * @return a UClassID for the actual class.
    712    * @stable ICU 2.2
    713    */
    714   virtual UClassID getDynamicClassID() const;
    715 
    716 private:
    717   //-------------------------------------------------------------------------
    718   // Private functions
    719   //-------------------------------------------------------------------------
    720 
    721   Normalizer(); // default constructor not implemented
    722   Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
    723 
    724   // Private utility methods for iteration
    725   // For documentation, see the source code
    726   UBool nextNormalize();
    727   UBool previousNormalize();
    728 
    729   void    init(CharacterIterator *iter);
    730   void    clearBuffer(void);
    731 
    732   //-------------------------------------------------------------------------
    733   // Private data
    734   //-------------------------------------------------------------------------
    735 
    736   UNormalizationMode  fUMode;
    737   int32_t             fOptions;
    738 
    739   // The input text and our position in it
    740   UCharIterator       *text;
    741 
    742   // The normalization buffer is the result of normalization
    743   // of the source in [currentIndex..nextIndex[ .
    744   int32_t         currentIndex, nextIndex;
    745 
    746   // A buffer for holding intermediate results
    747   UnicodeString       buffer;
    748   int32_t         bufferPos;
    749 
    750 };
    751 
    752 //-------------------------------------------------------------------------
    753 // Inline implementations
    754 //-------------------------------------------------------------------------
    755 
    756 inline UBool
    757 Normalizer::operator!= (const Normalizer& other) const
    758 { return ! operator==(other); }
    759 
    760 inline UNormalizationCheckResult
    761 Normalizer::quickCheck(const UnicodeString& source,
    762                        UNormalizationMode mode,
    763                        UErrorCode &status) {
    764     if(U_FAILURE(status)) {
    765         return UNORM_MAYBE;
    766     }
    767 
    768     return unorm_quickCheck(source.getBuffer(), source.length(),
    769                             mode, &status);
    770 }
    771 
    772 inline UNormalizationCheckResult
    773 Normalizer::quickCheck(const UnicodeString& source,
    774                        UNormalizationMode mode, int32_t options,
    775                        UErrorCode &status) {
    776     if(U_FAILURE(status)) {
    777         return UNORM_MAYBE;
    778     }
    779 
    780     return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
    781                                        mode, options, &status);
    782 }
    783 
    784 inline UBool
    785 Normalizer::isNormalized(const UnicodeString& source,
    786                          UNormalizationMode mode,
    787                          UErrorCode &status) {
    788     if(U_FAILURE(status)) {
    789         return FALSE;
    790     }
    791 
    792     return unorm_isNormalized(source.getBuffer(), source.length(),
    793                               mode, &status);
    794 }
    795 
    796 inline UBool
    797 Normalizer::isNormalized(const UnicodeString& source,
    798                          UNormalizationMode mode, int32_t options,
    799                          UErrorCode &status) {
    800     if(U_FAILURE(status)) {
    801         return FALSE;
    802     }
    803 
    804     return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
    805                                          mode, options, &status);
    806 }
    807 
    808 inline int32_t
    809 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
    810                     uint32_t options,
    811                     UErrorCode &errorCode) {
    812   // all argument checking is done in unorm_compare
    813   return unorm_compare(s1.getBuffer(), s1.length(),
    814                        s2.getBuffer(), s2.length(),
    815                        options,
    816                        &errorCode);
    817 }
    818 
    819 U_NAMESPACE_END
    820 
    821 #endif /* #if !UCONFIG_NO_NORMALIZATION */
    822 
    823 #endif // NORMLZR_H
    824