Home | History | Annotate | Download | only in unicode
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2009-2013, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  normalizer2.h
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2009nov22
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #ifndef __NORMALIZER2_H__
     18 #define __NORMALIZER2_H__
     19 
     20 /**
     21  * \file
     22  * \brief C++ API: New API for Unicode Normalization.
     23  */
     24 
     25 #include "unicode/utypes.h"
     26 
     27 #if !UCONFIG_NO_NORMALIZATION
     28 
     29 #include "unicode/uniset.h"
     30 #include "unicode/unistr.h"
     31 #include "unicode/unorm2.h"
     32 
     33 U_NAMESPACE_BEGIN
     34 
     35 /**
     36  * Unicode normalization functionality for standard Unicode normalization or
     37  * for using custom mapping tables.
     38  * All instances of this class are unmodifiable/immutable.
     39  * Instances returned by getInstance() are singletons that must not be deleted by the caller.
     40  * The Normalizer2 class is not intended for public subclassing.
     41  *
     42  * The primary functions are to produce a normalized string and to detect whether
     43  * a string is already normalized.
     44  * The most commonly used normalization forms are those defined in
     45  * http://www.unicode.org/unicode/reports/tr15/
     46  * However, this API supports additional normalization forms for specialized purposes.
     47  * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
     48  * and can be used in implementations of UTS #46.
     49  *
     50  * Not only are the standard compose and decompose modes supplied,
     51  * but additional modes are provided as documented in the Mode enum.
     52  *
     53  * Some of the functions in this class identify normalization boundaries.
     54  * At a normalization boundary, the portions of the string
     55  * before it and starting from it do not interact and can be handled independently.
     56  *
     57  * The spanQuickCheckYes() stops at a normalization boundary.
     58  * When the goal is a normalized string, then the text before the boundary
     59  * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
     60  *
     61  * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
     62  * a character is guaranteed to be at a normalization boundary,
     63  * regardless of context.
     64  * This is used for moving from one normalization boundary to the next
     65  * or preceding boundary, and for performing iterative normalization.
     66  *
     67  * Iterative normalization is useful when only a small portion of a
     68  * longer string needs to be processed.
     69  * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
     70  * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
     71  * (to process only the substring for which sort key bytes are computed).
     72  *
     73  * The set of normalization boundaries returned by these functions may not be
     74  * complete: There may be more boundaries that could be returned.
     75  * Different functions may return different boundaries.
     76  * @stable ICU 4.4
     77  */
     78 class U_COMMON_API Normalizer2 : public UObject {
     79 public:
     80     /**
     81      * Destructor.
     82      * @stable ICU 4.4
     83      */
     84     ~Normalizer2();
     85 
     86     /**
     87      * Returns a Normalizer2 instance for Unicode NFC normalization.
     88      * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
     89      * Returns an unmodifiable singleton instance. Do not delete it.
     90      * @param errorCode Standard ICU error code. Its input value must
     91      *                  pass the U_SUCCESS() test, or else the function returns
     92      *                  immediately. Check for U_FAILURE() on output or use with
     93      *                  function chaining. (See User Guide for details.)
     94      * @return the requested Normalizer2, if successful
     95      * @stable ICU 49
     96      */
     97     static const Normalizer2 *
     98     getNFCInstance(UErrorCode &errorCode);
     99 
    100     /**
    101      * Returns a Normalizer2 instance for Unicode NFD normalization.
    102      * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
    103      * Returns an unmodifiable singleton instance. Do not delete it.
    104      * @param errorCode Standard ICU error code. Its input value must
    105      *                  pass the U_SUCCESS() test, or else the function returns
    106      *                  immediately. Check for U_FAILURE() on output or use with
    107      *                  function chaining. (See User Guide for details.)
    108      * @return the requested Normalizer2, if successful
    109      * @stable ICU 49
    110      */
    111     static const Normalizer2 *
    112     getNFDInstance(UErrorCode &errorCode);
    113 
    114     /**
    115      * Returns a Normalizer2 instance for Unicode NFKC normalization.
    116      * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
    117      * Returns an unmodifiable singleton instance. Do not delete it.
    118      * @param errorCode Standard ICU error code. Its input value must
    119      *                  pass the U_SUCCESS() test, or else the function returns
    120      *                  immediately. Check for U_FAILURE() on output or use with
    121      *                  function chaining. (See User Guide for details.)
    122      * @return the requested Normalizer2, if successful
    123      * @stable ICU 49
    124      */
    125     static const Normalizer2 *
    126     getNFKCInstance(UErrorCode &errorCode);
    127 
    128     /**
    129      * Returns a Normalizer2 instance for Unicode NFKD normalization.
    130      * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
    131      * Returns an unmodifiable singleton instance. Do not delete it.
    132      * @param errorCode Standard ICU error code. Its input value must
    133      *                  pass the U_SUCCESS() test, or else the function returns
    134      *                  immediately. Check for U_FAILURE() on output or use with
    135      *                  function chaining. (See User Guide for details.)
    136      * @return the requested Normalizer2, if successful
    137      * @stable ICU 49
    138      */
    139     static const Normalizer2 *
    140     getNFKDInstance(UErrorCode &errorCode);
    141 
    142     /**
    143      * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
    144      * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
    145      * Returns an unmodifiable singleton instance. Do not delete it.
    146      * @param errorCode Standard ICU error code. Its input value must
    147      *                  pass the U_SUCCESS() test, or else the function returns
    148      *                  immediately. Check for U_FAILURE() on output or use with
    149      *                  function chaining. (See User Guide for details.)
    150      * @return the requested Normalizer2, if successful
    151      * @stable ICU 49
    152      */
    153     static const Normalizer2 *
    154     getNFKCCasefoldInstance(UErrorCode &errorCode);
    155 
    156     /**
    157      * Returns a Normalizer2 instance which uses the specified data file
    158      * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
    159      * and which composes or decomposes text according to the specified mode.
    160      * Returns an unmodifiable singleton instance. Do not delete it.
    161      *
    162      * Use packageName=NULL for data files that are part of ICU's own data.
    163      * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
    164      * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
    165      * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
    166      *
    167      * @param packageName NULL for ICU built-in data, otherwise application data package name
    168      * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
    169      * @param mode normalization mode (compose or decompose etc.)
    170      * @param errorCode Standard ICU error code. Its input value must
    171      *                  pass the U_SUCCESS() test, or else the function returns
    172      *                  immediately. Check for U_FAILURE() on output or use with
    173      *                  function chaining. (See User Guide for details.)
    174      * @return the requested Normalizer2, if successful
    175      * @stable ICU 4.4
    176      */
    177     static const Normalizer2 *
    178     getInstance(const char *packageName,
    179                 const char *name,
    180                 UNormalization2Mode mode,
    181                 UErrorCode &errorCode);
    182 
    183     /**
    184      * Returns the normalized form of the source string.
    185      * @param src source string
    186      * @param errorCode Standard ICU error code. Its input value must
    187      *                  pass the U_SUCCESS() test, or else the function returns
    188      *                  immediately. Check for U_FAILURE() on output or use with
    189      *                  function chaining. (See User Guide for details.)
    190      * @return normalized src
    191      * @stable ICU 4.4
    192      */
    193     UnicodeString
    194     normalize(const UnicodeString &src, UErrorCode &errorCode) const {
    195         UnicodeString result;
    196         normalize(src, result, errorCode);
    197         return result;
    198     }
    199     /**
    200      * Writes the normalized form of the source string to the destination string
    201      * (replacing its contents) and returns the destination string.
    202      * The source and destination strings must be different objects.
    203      * @param src source string
    204      * @param dest destination string; its contents is replaced with normalized src
    205      * @param errorCode Standard ICU error code. Its input value must
    206      *                  pass the U_SUCCESS() test, or else the function returns
    207      *                  immediately. Check for U_FAILURE() on output or use with
    208      *                  function chaining. (See User Guide for details.)
    209      * @return dest
    210      * @stable ICU 4.4
    211      */
    212     virtual UnicodeString &
    213     normalize(const UnicodeString &src,
    214               UnicodeString &dest,
    215               UErrorCode &errorCode) const = 0;
    216     /**
    217      * Appends the normalized form of the second string to the first string
    218      * (merging them at the boundary) and returns the first string.
    219      * The result is normalized if the first string was normalized.
    220      * The first and second strings must be different objects.
    221      * @param first string, should be normalized
    222      * @param second string, will be normalized
    223      * @param errorCode Standard ICU error code. Its input value must
    224      *                  pass the U_SUCCESS() test, or else the function returns
    225      *                  immediately. Check for U_FAILURE() on output or use with
    226      *                  function chaining. (See User Guide for details.)
    227      * @return first
    228      * @stable ICU 4.4
    229      */
    230     virtual UnicodeString &
    231     normalizeSecondAndAppend(UnicodeString &first,
    232                              const UnicodeString &second,
    233                              UErrorCode &errorCode) const = 0;
    234     /**
    235      * Appends the second string to the first string
    236      * (merging them at the boundary) and returns the first string.
    237      * The result is normalized if both the strings were normalized.
    238      * The first and second strings must be different objects.
    239      * @param first string, should be normalized
    240      * @param second string, should be normalized
    241      * @param errorCode Standard ICU error code. Its input value must
    242      *                  pass the U_SUCCESS() test, or else the function returns
    243      *                  immediately. Check for U_FAILURE() on output or use with
    244      *                  function chaining. (See User Guide for details.)
    245      * @return first
    246      * @stable ICU 4.4
    247      */
    248     virtual UnicodeString &
    249     append(UnicodeString &first,
    250            const UnicodeString &second,
    251            UErrorCode &errorCode) const = 0;
    252 
    253     /**
    254      * Gets the decomposition mapping of c.
    255      * Roughly equivalent to normalizing the String form of c
    256      * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
    257      * returns FALSE and does not write a string
    258      * if c does not have a decomposition mapping in this instance's data.
    259      * This function is independent of the mode of the Normalizer2.
    260      * @param c code point
    261      * @param decomposition String object which will be set to c's
    262      *                      decomposition mapping, if there is one.
    263      * @return TRUE if c has a decomposition, otherwise FALSE
    264      * @stable ICU 4.6
    265      */
    266     virtual UBool
    267     getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
    268 
    269     /**
    270      * Gets the raw decomposition mapping of c.
    271      *
    272      * This is similar to the getDecomposition() method but returns the
    273      * raw decomposition mapping as specified in UnicodeData.txt or
    274      * (for custom data) in the mapping files processed by the gennorm2 tool.
    275      * By contrast, getDecomposition() returns the processed,
    276      * recursively-decomposed version of this mapping.
    277      *
    278      * When used on a standard NFKC Normalizer2 instance,
    279      * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
    280      *
    281      * When used on a standard NFC Normalizer2 instance,
    282      * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
    283      * in this case, the result contains either one or two code points (=1..4 UChars).
    284      *
    285      * This function is independent of the mode of the Normalizer2.
    286      * The default implementation returns FALSE.
    287      * @param c code point
    288      * @param decomposition String object which will be set to c's
    289      *                      raw decomposition mapping, if there is one.
    290      * @return TRUE if c has a decomposition, otherwise FALSE
    291      * @stable ICU 49
    292      */
    293     virtual UBool
    294     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
    295 
    296     /**
    297      * Performs pairwise composition of a & b and returns the composite if there is one.
    298      *
    299      * Returns a composite code point c only if c has a two-way mapping to a+b.
    300      * In standard Unicode normalization, this means that
    301      * c has a canonical decomposition to a+b
    302      * and c does not have the Full_Composition_Exclusion property.
    303      *
    304      * This function is independent of the mode of the Normalizer2.
    305      * The default implementation returns a negative value.
    306      * @param a A (normalization starter) code point.
    307      * @param b Another code point.
    308      * @return The non-negative composite code point if there is one; otherwise a negative value.
    309      * @stable ICU 49
    310      */
    311     virtual UChar32
    312     composePair(UChar32 a, UChar32 b) const;
    313 
    314     /**
    315      * Gets the combining class of c.
    316      * The default implementation returns 0
    317      * but all standard implementations return the Unicode Canonical_Combining_Class value.
    318      * @param c code point
    319      * @return c's combining class
    320      * @stable ICU 49
    321      */
    322     virtual uint8_t
    323     getCombiningClass(UChar32 c) const;
    324 
    325     /**
    326      * Tests if the string is normalized.
    327      * Internally, in cases where the quickCheck() method would return "maybe"
    328      * (which is only possible for the two COMPOSE modes) this method
    329      * resolves to "yes" or "no" to provide a definitive result,
    330      * at the cost of doing more work in those cases.
    331      * @param s input string
    332      * @param errorCode Standard ICU error code. Its input value must
    333      *                  pass the U_SUCCESS() test, or else the function returns
    334      *                  immediately. Check for U_FAILURE() on output or use with
    335      *                  function chaining. (See User Guide for details.)
    336      * @return TRUE if s is normalized
    337      * @stable ICU 4.4
    338      */
    339     virtual UBool
    340     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
    341 
    342     /**
    343      * Tests if the string is normalized.
    344      * For the two COMPOSE modes, the result could be "maybe" in cases that
    345      * would take a little more work to resolve definitively.
    346      * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
    347      * combination of quick check + normalization, to avoid
    348      * re-checking the "yes" prefix.
    349      * @param s input string
    350      * @param errorCode Standard ICU error code. Its input value must
    351      *                  pass the U_SUCCESS() test, or else the function returns
    352      *                  immediately. Check for U_FAILURE() on output or use with
    353      *                  function chaining. (See User Guide for details.)
    354      * @return UNormalizationCheckResult
    355      * @stable ICU 4.4
    356      */
    357     virtual UNormalizationCheckResult
    358     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
    359 
    360     /**
    361      * Returns the end of the normalized substring of the input string.
    362      * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
    363      * the substring <code>UnicodeString(s, 0, end)</code>
    364      * will pass the quick check with a "yes" result.
    365      *
    366      * The returned end index is usually one or more characters before the
    367      * "no" or "maybe" character: The end index is at a normalization boundary.
    368      * (See the class documentation for more about normalization boundaries.)
    369      *
    370      * When the goal is a normalized string and most input strings are expected
    371      * to be normalized already, then call this method,
    372      * and if it returns a prefix shorter than the input string,
    373      * copy that prefix and use normalizeSecondAndAppend() for the remainder.
    374      * @param s input string
    375      * @param errorCode Standard ICU error code. Its input value must
    376      *                  pass the U_SUCCESS() test, or else the function returns
    377      *                  immediately. Check for U_FAILURE() on output or use with
    378      *                  function chaining. (See User Guide for details.)
    379      * @return "yes" span end index
    380      * @stable ICU 4.4
    381      */
    382     virtual int32_t
    383     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
    384 
    385     /**
    386      * Tests if the character always has a normalization boundary before it,
    387      * regardless of context.
    388      * If true, then the character does not normalization-interact with
    389      * preceding characters.
    390      * In other words, a string containing this character can be normalized
    391      * by processing portions before this character and starting from this
    392      * character independently.
    393      * This is used for iterative normalization. See the class documentation for details.
    394      * @param c character to test
    395      * @return TRUE if c has a normalization boundary before it
    396      * @stable ICU 4.4
    397      */
    398     virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
    399 
    400     /**
    401      * Tests if the character always has a normalization boundary after it,
    402      * regardless of context.
    403      * If true, then the character does not normalization-interact with
    404      * following characters.
    405      * In other words, a string containing this character can be normalized
    406      * by processing portions up to this character and after this
    407      * character independently.
    408      * This is used for iterative normalization. See the class documentation for details.
    409      * Note that this operation may be significantly slower than hasBoundaryBefore().
    410      * @param c character to test
    411      * @return TRUE if c has a normalization boundary after it
    412      * @stable ICU 4.4
    413      */
    414     virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
    415 
    416     /**
    417      * Tests if the character is normalization-inert.
    418      * If true, then the character does not change, nor normalization-interact with
    419      * preceding or following characters.
    420      * In other words, a string containing this character can be normalized
    421      * by processing portions before this character and after this
    422      * character independently.
    423      * This is used for iterative normalization. See the class documentation for details.
    424      * Note that this operation may be significantly slower than hasBoundaryBefore().
    425      * @param c character to test
    426      * @return TRUE if c is normalization-inert
    427      * @stable ICU 4.4
    428      */
    429     virtual UBool isInert(UChar32 c) const = 0;
    430 };
    431 
    432 /**
    433  * Normalization filtered by a UnicodeSet.
    434  * Normalizes portions of the text contained in the filter set and leaves
    435  * portions not contained in the filter set unchanged.
    436  * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
    437  * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
    438  * This class implements all of (and only) the Normalizer2 API.
    439  * An instance of this class is unmodifiable/immutable but is constructed and
    440  * must be destructed by the owner.
    441  * @stable ICU 4.4
    442  */
    443 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
    444 public:
    445     /**
    446      * Constructs a filtered normalizer wrapping any Normalizer2 instance
    447      * and a filter set.
    448      * Both are aliased and must not be modified or deleted while this object
    449      * is used.
    450      * The filter set should be frozen; otherwise the performance will suffer greatly.
    451      * @param n2 wrapped Normalizer2 instance
    452      * @param filterSet UnicodeSet which determines the characters to be normalized
    453      * @stable ICU 4.4
    454      */
    455     FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
    456             norm2(n2), set(filterSet) {}
    457 
    458     /**
    459      * Destructor.
    460      * @stable ICU 4.4
    461      */
    462     ~FilteredNormalizer2();
    463 
    464     /**
    465      * Writes the normalized form of the source string to the destination string
    466      * (replacing its contents) and returns the destination string.
    467      * The source and destination strings must be different objects.
    468      * @param src source string
    469      * @param dest destination string; its contents is replaced with normalized src
    470      * @param errorCode Standard ICU error code. Its input value must
    471      *                  pass the U_SUCCESS() test, or else the function returns
    472      *                  immediately. Check for U_FAILURE() on output or use with
    473      *                  function chaining. (See User Guide for details.)
    474      * @return dest
    475      * @stable ICU 4.4
    476      */
    477     virtual UnicodeString &
    478     normalize(const UnicodeString &src,
    479               UnicodeString &dest,
    480               UErrorCode &errorCode) const;
    481     /**
    482      * Appends the normalized form of the second string to the first string
    483      * (merging them at the boundary) and returns the first string.
    484      * The result is normalized if the first string was normalized.
    485      * The first and second strings must be different objects.
    486      * @param first string, should be normalized
    487      * @param second string, will be normalized
    488      * @param errorCode Standard ICU error code. Its input value must
    489      *                  pass the U_SUCCESS() test, or else the function returns
    490      *                  immediately. Check for U_FAILURE() on output or use with
    491      *                  function chaining. (See User Guide for details.)
    492      * @return first
    493      * @stable ICU 4.4
    494      */
    495     virtual UnicodeString &
    496     normalizeSecondAndAppend(UnicodeString &first,
    497                              const UnicodeString &second,
    498                              UErrorCode &errorCode) const;
    499     /**
    500      * Appends the second string to the first string
    501      * (merging them at the boundary) and returns the first string.
    502      * The result is normalized if both the strings were normalized.
    503      * The first and second strings must be different objects.
    504      * @param first string, should be normalized
    505      * @param second string, should be normalized
    506      * @param errorCode Standard ICU error code. Its input value must
    507      *                  pass the U_SUCCESS() test, or else the function returns
    508      *                  immediately. Check for U_FAILURE() on output or use with
    509      *                  function chaining. (See User Guide for details.)
    510      * @return first
    511      * @stable ICU 4.4
    512      */
    513     virtual UnicodeString &
    514     append(UnicodeString &first,
    515            const UnicodeString &second,
    516            UErrorCode &errorCode) const;
    517 
    518     /**
    519      * Gets the decomposition mapping of c.
    520      * For details see the base class documentation.
    521      *
    522      * This function is independent of the mode of the Normalizer2.
    523      * @param c code point
    524      * @param decomposition String object which will be set to c's
    525      *                      decomposition mapping, if there is one.
    526      * @return TRUE if c has a decomposition, otherwise FALSE
    527      * @stable ICU 4.6
    528      */
    529     virtual UBool
    530     getDecomposition(UChar32 c, UnicodeString &decomposition) const;
    531 
    532     /**
    533      * Gets the raw decomposition mapping of c.
    534      * For details see the base class documentation.
    535      *
    536      * This function is independent of the mode of the Normalizer2.
    537      * @param c code point
    538      * @param decomposition String object which will be set to c's
    539      *                      raw decomposition mapping, if there is one.
    540      * @return TRUE if c has a decomposition, otherwise FALSE
    541      * @stable ICU 49
    542      */
    543     virtual UBool
    544     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
    545 
    546     /**
    547      * Performs pairwise composition of a & b and returns the composite if there is one.
    548      * For details see the base class documentation.
    549      *
    550      * This function is independent of the mode of the Normalizer2.
    551      * @param a A (normalization starter) code point.
    552      * @param b Another code point.
    553      * @return The non-negative composite code point if there is one; otherwise a negative value.
    554      * @stable ICU 49
    555      */
    556     virtual UChar32
    557     composePair(UChar32 a, UChar32 b) const;
    558 
    559     /**
    560      * Gets the combining class of c.
    561      * The default implementation returns 0
    562      * but all standard implementations return the Unicode Canonical_Combining_Class value.
    563      * @param c code point
    564      * @return c's combining class
    565      * @stable ICU 49
    566      */
    567     virtual uint8_t
    568     getCombiningClass(UChar32 c) const;
    569 
    570     /**
    571      * Tests if the string is normalized.
    572      * For details see the Normalizer2 base class documentation.
    573      * @param s input string
    574      * @param errorCode Standard ICU error code. Its input value must
    575      *                  pass the U_SUCCESS() test, or else the function returns
    576      *                  immediately. Check for U_FAILURE() on output or use with
    577      *                  function chaining. (See User Guide for details.)
    578      * @return TRUE if s is normalized
    579      * @stable ICU 4.4
    580      */
    581     virtual UBool
    582     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
    583     /**
    584      * Tests if the string is normalized.
    585      * For details see the Normalizer2 base class documentation.
    586      * @param s input string
    587      * @param errorCode Standard ICU error code. Its input value must
    588      *                  pass the U_SUCCESS() test, or else the function returns
    589      *                  immediately. Check for U_FAILURE() on output or use with
    590      *                  function chaining. (See User Guide for details.)
    591      * @return UNormalizationCheckResult
    592      * @stable ICU 4.4
    593      */
    594     virtual UNormalizationCheckResult
    595     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
    596     /**
    597      * Returns the end of the normalized substring of the input string.
    598      * For details see the Normalizer2 base class documentation.
    599      * @param s input string
    600      * @param errorCode Standard ICU error code. Its input value must
    601      *                  pass the U_SUCCESS() test, or else the function returns
    602      *                  immediately. Check for U_FAILURE() on output or use with
    603      *                  function chaining. (See User Guide for details.)
    604      * @return "yes" span end index
    605      * @stable ICU 4.4
    606      */
    607     virtual int32_t
    608     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
    609 
    610     /**
    611      * Tests if the character always has a normalization boundary before it,
    612      * regardless of context.
    613      * For details see the Normalizer2 base class documentation.
    614      * @param c character to test
    615      * @return TRUE if c has a normalization boundary before it
    616      * @stable ICU 4.4
    617      */
    618     virtual UBool hasBoundaryBefore(UChar32 c) const;
    619 
    620     /**
    621      * Tests if the character always has a normalization boundary after it,
    622      * regardless of context.
    623      * For details see the Normalizer2 base class documentation.
    624      * @param c character to test
    625      * @return TRUE if c has a normalization boundary after it
    626      * @stable ICU 4.4
    627      */
    628     virtual UBool hasBoundaryAfter(UChar32 c) const;
    629 
    630     /**
    631      * Tests if the character is normalization-inert.
    632      * For details see the Normalizer2 base class documentation.
    633      * @param c character to test
    634      * @return TRUE if c is normalization-inert
    635      * @stable ICU 4.4
    636      */
    637     virtual UBool isInert(UChar32 c) const;
    638 private:
    639     UnicodeString &
    640     normalize(const UnicodeString &src,
    641               UnicodeString &dest,
    642               USetSpanCondition spanCondition,
    643               UErrorCode &errorCode) const;
    644 
    645     UnicodeString &
    646     normalizeSecondAndAppend(UnicodeString &first,
    647                              const UnicodeString &second,
    648                              UBool doNormalize,
    649                              UErrorCode &errorCode) const;
    650 
    651     const Normalizer2 &norm2;
    652     const UnicodeSet &set;
    653 };
    654 
    655 U_NAMESPACE_END
    656 
    657 #endif  // !UCONFIG_NO_NORMALIZATION
    658 #endif  // __NORMALIZER2_H__
    659