Home | History | Annotate | Download | only in unicode
      1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 2009-2015, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  unorm2.h
     11 *   encoding:   US-ASCII
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 2009dec15
     16 *   created by: Markus W. Scherer
     17 */
     18 
     19 #ifndef __UNORM2_H__
     20 #define __UNORM2_H__
     21 
     22 /**
     23  * \file
     24  * \brief C API: New API for Unicode Normalization.
     25  *
     26  * Unicode normalization functionality for standard Unicode normalization or
     27  * for using custom mapping tables.
     28  * All instances of UNormalizer2 are unmodifiable/immutable.
     29  * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller.
     30  * For more details see the Normalizer2 C++ class.
     31  */
     32 
     33 #include "unicode/utypes.h"
     34 #include "unicode/localpointer.h"
     35 #include "unicode/uset.h"
     36 
     37 /**
     38  * Constants for normalization modes.
     39  * For details about standard Unicode normalization forms
     40  * and about the algorithms which are also used with custom mapping tables
     41  * see http://www.unicode.org/unicode/reports/tr15/
     42  * @stable ICU 4.4
     43  */
     44 typedef enum {
     45     /**
     46      * Decomposition followed by composition.
     47      * Same as standard NFC when using an "nfc" instance.
     48      * Same as standard NFKC when using an "nfkc" instance.
     49      * For details about standard Unicode normalization forms
     50      * see http://www.unicode.org/unicode/reports/tr15/
     51      * @stable ICU 4.4
     52      */
     53     UNORM2_COMPOSE,
     54     /**
     55      * Map, and reorder canonically.
     56      * Same as standard NFD when using an "nfc" instance.
     57      * Same as standard NFKD when using an "nfkc" instance.
     58      * For details about standard Unicode normalization forms
     59      * see http://www.unicode.org/unicode/reports/tr15/
     60      * @stable ICU 4.4
     61      */
     62     UNORM2_DECOMPOSE,
     63     /**
     64      * "Fast C or D" form.
     65      * If a string is in this form, then further decomposition <i>without reordering</i>
     66      * would yield the same form as DECOMPOSE.
     67      * Text in "Fast C or D" form can be processed efficiently with data tables
     68      * that are "canonically closed", that is, that provide equivalent data for
     69      * equivalent text, without having to be fully normalized.
     70      * Not a standard Unicode normalization form.
     71      * Not a unique form: Different FCD strings can be canonically equivalent.
     72      * For details see http://www.unicode.org/notes/tn5/#FCD
     73      * @stable ICU 4.4
     74      */
     75     UNORM2_FCD,
     76     /**
     77      * Compose only contiguously.
     78      * Also known as "FCC" or "Fast C Contiguous".
     79      * The result will often but not always be in NFC.
     80      * The result will conform to FCD which is useful for processing.
     81      * Not a standard Unicode normalization form.
     82      * For details see http://www.unicode.org/notes/tn5/#FCC
     83      * @stable ICU 4.4
     84      */
     85     UNORM2_COMPOSE_CONTIGUOUS
     86 } UNormalization2Mode;
     87 
     88 /**
     89  * Result values for normalization quick check functions.
     90  * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
     91  * @stable ICU 2.0
     92  */
     93 typedef enum UNormalizationCheckResult {
     94   /**
     95    * The input string is not in the normalization form.
     96    * @stable ICU 2.0
     97    */
     98   UNORM_NO,
     99   /**
    100    * The input string is in the normalization form.
    101    * @stable ICU 2.0
    102    */
    103   UNORM_YES,
    104   /**
    105    * The input string may or may not be in the normalization form.
    106    * This value is only returned for composition forms like NFC and FCC,
    107    * when a backward-combining character is found for which the surrounding text
    108    * would have to be analyzed further.
    109    * @stable ICU 2.0
    110    */
    111   UNORM_MAYBE
    112 } UNormalizationCheckResult;
    113 
    114 /**
    115  * Opaque C service object type for the new normalization API.
    116  * @stable ICU 4.4
    117  */
    118 struct UNormalizer2;
    119 typedef struct UNormalizer2 UNormalizer2;  /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */
    120 
    121 #if !UCONFIG_NO_NORMALIZATION
    122 
    123 /**
    124  * Returns a UNormalizer2 instance for Unicode NFC normalization.
    125  * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode).
    126  * Returns an unmodifiable singleton instance. Do not delete it.
    127  * @param pErrorCode Standard ICU error code. Its input value must
    128  *                  pass the U_SUCCESS() test, or else the function returns
    129  *                  immediately. Check for U_FAILURE() on output or use with
    130  *                  function chaining. (See User Guide for details.)
    131  * @return the requested Normalizer2, if successful
    132  * @stable ICU 49
    133  */
    134 U_STABLE const UNormalizer2 * U_EXPORT2
    135 unorm2_getNFCInstance(UErrorCode *pErrorCode);
    136 
    137 /**
    138  * Returns a UNormalizer2 instance for Unicode NFD normalization.
    139  * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode).
    140  * Returns an unmodifiable singleton instance. Do not delete it.
    141  * @param pErrorCode Standard ICU error code. Its input value must
    142  *                  pass the U_SUCCESS() test, or else the function returns
    143  *                  immediately. Check for U_FAILURE() on output or use with
    144  *                  function chaining. (See User Guide for details.)
    145  * @return the requested Normalizer2, if successful
    146  * @stable ICU 49
    147  */
    148 U_STABLE const UNormalizer2 * U_EXPORT2
    149 unorm2_getNFDInstance(UErrorCode *pErrorCode);
    150 
    151 /**
    152  * Returns a UNormalizer2 instance for Unicode NFKC normalization.
    153  * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode).
    154  * Returns an unmodifiable singleton instance. Do not delete it.
    155  * @param pErrorCode Standard ICU error code. Its input value must
    156  *                  pass the U_SUCCESS() test, or else the function returns
    157  *                  immediately. Check for U_FAILURE() on output or use with
    158  *                  function chaining. (See User Guide for details.)
    159  * @return the requested Normalizer2, if successful
    160  * @stable ICU 49
    161  */
    162 U_STABLE const UNormalizer2 * U_EXPORT2
    163 unorm2_getNFKCInstance(UErrorCode *pErrorCode);
    164 
    165 /**
    166  * Returns a UNormalizer2 instance for Unicode NFKD normalization.
    167  * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode).
    168  * Returns an unmodifiable singleton instance. Do not delete it.
    169  * @param pErrorCode Standard ICU error code. Its input value must
    170  *                  pass the U_SUCCESS() test, or else the function returns
    171  *                  immediately. Check for U_FAILURE() on output or use with
    172  *                  function chaining. (See User Guide for details.)
    173  * @return the requested Normalizer2, if successful
    174  * @stable ICU 49
    175  */
    176 U_STABLE const UNormalizer2 * U_EXPORT2
    177 unorm2_getNFKDInstance(UErrorCode *pErrorCode);
    178 
    179 /**
    180  * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
    181  * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
    182  * Returns an unmodifiable singleton instance. Do not delete it.
    183  * @param pErrorCode Standard ICU error code. Its input value must
    184  *                  pass the U_SUCCESS() test, or else the function returns
    185  *                  immediately. Check for U_FAILURE() on output or use with
    186  *                  function chaining. (See User Guide for details.)
    187  * @return the requested Normalizer2, if successful
    188  * @stable ICU 49
    189  */
    190 U_STABLE const UNormalizer2 * U_EXPORT2
    191 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
    192 
    193 /**
    194  * Returns a UNormalizer2 instance which uses the specified data file
    195  * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
    196  * and which composes or decomposes text according to the specified mode.
    197  * Returns an unmodifiable singleton instance. Do not delete it.
    198  *
    199  * Use packageName=NULL for data files that are part of ICU's own data.
    200  * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
    201  * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
    202  * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
    203  *
    204  * @param packageName NULL for ICU built-in data, otherwise application data package name
    205  * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
    206  * @param mode normalization mode (compose or decompose etc.)
    207  * @param pErrorCode Standard ICU error code. Its input value must
    208  *                  pass the U_SUCCESS() test, or else the function returns
    209  *                  immediately. Check for U_FAILURE() on output or use with
    210  *                  function chaining. (See User Guide for details.)
    211  * @return the requested UNormalizer2, if successful
    212  * @stable ICU 4.4
    213  */
    214 U_STABLE const UNormalizer2 * U_EXPORT2
    215 unorm2_getInstance(const char *packageName,
    216                    const char *name,
    217                    UNormalization2Mode mode,
    218                    UErrorCode *pErrorCode);
    219 
    220 /**
    221  * Constructs a filtered normalizer wrapping any UNormalizer2 instance
    222  * and a filter set.
    223  * Both are aliased and must not be modified or deleted while this object
    224  * is used.
    225  * The filter set should be frozen; otherwise the performance will suffer greatly.
    226  * @param norm2 wrapped UNormalizer2 instance
    227  * @param filterSet USet which determines the characters to be normalized
    228  * @param pErrorCode Standard ICU error code. Its input value must
    229  *                   pass the U_SUCCESS() test, or else the function returns
    230  *                   immediately. Check for U_FAILURE() on output or use with
    231  *                   function chaining. (See User Guide for details.)
    232  * @return the requested UNormalizer2, if successful
    233  * @stable ICU 4.4
    234  */
    235 U_STABLE UNormalizer2 * U_EXPORT2
    236 unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
    237 
    238 /**
    239  * Closes a UNormalizer2 instance from unorm2_openFiltered().
    240  * Do not close instances from unorm2_getInstance()!
    241  * @param norm2 UNormalizer2 instance to be closed
    242  * @stable ICU 4.4
    243  */
    244 U_STABLE void U_EXPORT2
    245 unorm2_close(UNormalizer2 *norm2);
    246 
    247 #if U_SHOW_CPLUSPLUS_API
    248 
    249 U_NAMESPACE_BEGIN
    250 
    251 /**
    252  * \class LocalUNormalizer2Pointer
    253  * "Smart pointer" class, closes a UNormalizer2 via unorm2_close().
    254  * For most methods see the LocalPointerBase base class.
    255  *
    256  * @see LocalPointerBase
    257  * @see LocalPointer
    258  * @stable ICU 4.4
    259  */
    260 U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close);
    261 
    262 U_NAMESPACE_END
    263 
    264 #endif
    265 
    266 /**
    267  * Writes the normalized form of the source string to the destination string
    268  * (replacing its contents) and returns the length of the destination string.
    269  * The source and destination strings must be different buffers.
    270  * @param norm2 UNormalizer2 instance
    271  * @param src source string
    272  * @param length length of the source string, or -1 if NUL-terminated
    273  * @param dest destination string; its contents is replaced with normalized src
    274  * @param capacity number of UChars that can be written to dest
    275  * @param pErrorCode Standard ICU error code. Its input value must
    276  *                   pass the U_SUCCESS() test, or else the function returns
    277  *                   immediately. Check for U_FAILURE() on output or use with
    278  *                   function chaining. (See User Guide for details.)
    279  * @return dest
    280  * @stable ICU 4.4
    281  */
    282 U_STABLE int32_t U_EXPORT2
    283 unorm2_normalize(const UNormalizer2 *norm2,
    284                  const UChar *src, int32_t length,
    285                  UChar *dest, int32_t capacity,
    286                  UErrorCode *pErrorCode);
    287 /**
    288  * Appends the normalized form of the second string to the first string
    289  * (merging them at the boundary) and returns the length of the first string.
    290  * The result is normalized if the first string was normalized.
    291  * The first and second strings must be different buffers.
    292  * @param norm2 UNormalizer2 instance
    293  * @param first string, should be normalized
    294  * @param firstLength length of the first string, or -1 if NUL-terminated
    295  * @param firstCapacity number of UChars that can be written to first
    296  * @param second string, will be normalized
    297  * @param secondLength length of the source string, or -1 if NUL-terminated
    298  * @param pErrorCode Standard ICU error code. Its input value must
    299  *                   pass the U_SUCCESS() test, or else the function returns
    300  *                   immediately. Check for U_FAILURE() on output or use with
    301  *                   function chaining. (See User Guide for details.)
    302  * @return first
    303  * @stable ICU 4.4
    304  */
    305 U_STABLE int32_t U_EXPORT2
    306 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
    307                                 UChar *first, int32_t firstLength, int32_t firstCapacity,
    308                                 const UChar *second, int32_t secondLength,
    309                                 UErrorCode *pErrorCode);
    310 /**
    311  * Appends the second string to the first string
    312  * (merging them at the boundary) and returns the length of the first string.
    313  * The result is normalized if both the strings were normalized.
    314  * The first and second strings must be different buffers.
    315  * @param norm2 UNormalizer2 instance
    316  * @param first string, should be normalized
    317  * @param firstLength length of the first string, or -1 if NUL-terminated
    318  * @param firstCapacity number of UChars that can be written to first
    319  * @param second string, should be normalized
    320  * @param secondLength length of the source string, or -1 if NUL-terminated
    321  * @param pErrorCode Standard ICU error code. Its input value must
    322  *                   pass the U_SUCCESS() test, or else the function returns
    323  *                   immediately. Check for U_FAILURE() on output or use with
    324  *                   function chaining. (See User Guide for details.)
    325  * @return first
    326  * @stable ICU 4.4
    327  */
    328 U_STABLE int32_t U_EXPORT2
    329 unorm2_append(const UNormalizer2 *norm2,
    330               UChar *first, int32_t firstLength, int32_t firstCapacity,
    331               const UChar *second, int32_t secondLength,
    332               UErrorCode *pErrorCode);
    333 
    334 /**
    335  * Gets the decomposition mapping of c.
    336  * Roughly equivalent to normalizing the String form of c
    337  * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function
    338  * returns a negative value and does not write a string
    339  * if c does not have a decomposition mapping in this instance's data.
    340  * This function is independent of the mode of the UNormalizer2.
    341  * @param norm2 UNormalizer2 instance
    342  * @param c code point
    343  * @param decomposition String buffer which will be set to c's
    344  *                      decomposition mapping, if there is one.
    345  * @param capacity number of UChars that can be written to decomposition
    346  * @param pErrorCode Standard ICU error code. Its input value must
    347  *                   pass the U_SUCCESS() test, or else the function returns
    348  *                   immediately. Check for U_FAILURE() on output or use with
    349  *                   function chaining. (See User Guide for details.)
    350  * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value
    351  * @stable ICU 4.6
    352  */
    353 U_STABLE int32_t U_EXPORT2
    354 unorm2_getDecomposition(const UNormalizer2 *norm2,
    355                         UChar32 c, UChar *decomposition, int32_t capacity,
    356                         UErrorCode *pErrorCode);
    357 
    358 /**
    359  * Gets the raw decomposition mapping of c.
    360  *
    361  * This is similar to the unorm2_getDecomposition() function but returns the
    362  * raw decomposition mapping as specified in UnicodeData.txt or
    363  * (for custom data) in the mapping files processed by the gennorm2 tool.
    364  * By contrast, unorm2_getDecomposition() returns the processed,
    365  * recursively-decomposed version of this mapping.
    366  *
    367  * When used on a standard NFKC Normalizer2 instance,
    368  * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
    369  *
    370  * When used on a standard NFC Normalizer2 instance,
    371  * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
    372  * in this case, the result contains either one or two code points (=1..4 UChars).
    373  *
    374  * This function is independent of the mode of the UNormalizer2.
    375  * @param norm2 UNormalizer2 instance
    376  * @param c code point
    377  * @param decomposition String buffer which will be set to c's
    378  *                      raw decomposition mapping, if there is one.
    379  * @param capacity number of UChars that can be written to decomposition
    380  * @param pErrorCode Standard ICU error code. Its input value must
    381  *                   pass the U_SUCCESS() test, or else the function returns
    382  *                   immediately. Check for U_FAILURE() on output or use with
    383  *                   function chaining. (See User Guide for details.)
    384  * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value
    385  * @stable ICU 49
    386  */
    387 U_STABLE int32_t U_EXPORT2
    388 unorm2_getRawDecomposition(const UNormalizer2 *norm2,
    389                            UChar32 c, UChar *decomposition, int32_t capacity,
    390                            UErrorCode *pErrorCode);
    391 
    392 /**
    393  * Performs pairwise composition of a & b and returns the composite if there is one.
    394  *
    395  * Returns a composite code point c only if c has a two-way mapping to a+b.
    396  * In standard Unicode normalization, this means that
    397  * c has a canonical decomposition to a+b
    398  * and c does not have the Full_Composition_Exclusion property.
    399  *
    400  * This function is independent of the mode of the UNormalizer2.
    401  * @param norm2 UNormalizer2 instance
    402  * @param a A (normalization starter) code point.
    403  * @param b Another code point.
    404  * @return The non-negative composite code point if there is one; otherwise a negative value.
    405  * @stable ICU 49
    406  */
    407 U_STABLE UChar32 U_EXPORT2
    408 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
    409 
    410 /**
    411  * Gets the combining class of c.
    412  * The default implementation returns 0
    413  * but all standard implementations return the Unicode Canonical_Combining_Class value.
    414  * @param norm2 UNormalizer2 instance
    415  * @param c code point
    416  * @return c's combining class
    417  * @stable ICU 49
    418  */
    419 U_STABLE uint8_t U_EXPORT2
    420 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c);
    421 
    422 /**
    423  * Tests if the string is normalized.
    424  * Internally, in cases where the quickCheck() method would return "maybe"
    425  * (which is only possible for the two COMPOSE modes) this method
    426  * resolves to "yes" or "no" to provide a definitive result,
    427  * at the cost of doing more work in those cases.
    428  * @param norm2 UNormalizer2 instance
    429  * @param s input string
    430  * @param length length of the string, or -1 if NUL-terminated
    431  * @param pErrorCode Standard ICU error code. Its input value must
    432  *                   pass the U_SUCCESS() test, or else the function returns
    433  *                   immediately. Check for U_FAILURE() on output or use with
    434  *                   function chaining. (See User Guide for details.)
    435  * @return TRUE if s is normalized
    436  * @stable ICU 4.4
    437  */
    438 U_STABLE UBool U_EXPORT2
    439 unorm2_isNormalized(const UNormalizer2 *norm2,
    440                     const UChar *s, int32_t length,
    441                     UErrorCode *pErrorCode);
    442 
    443 /**
    444  * Tests if the string is normalized.
    445  * For the two COMPOSE modes, the result could be "maybe" in cases that
    446  * would take a little more work to resolve definitively.
    447  * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
    448  * combination of quick check + normalization, to avoid
    449  * re-checking the "yes" prefix.
    450  * @param norm2 UNormalizer2 instance
    451  * @param s input string
    452  * @param length length of the string, or -1 if NUL-terminated
    453  * @param pErrorCode Standard ICU error code. Its input value must
    454  *                   pass the U_SUCCESS() test, or else the function returns
    455  *                   immediately. Check for U_FAILURE() on output or use with
    456  *                   function chaining. (See User Guide for details.)
    457  * @return UNormalizationCheckResult
    458  * @stable ICU 4.4
    459  */
    460 U_STABLE UNormalizationCheckResult U_EXPORT2
    461 unorm2_quickCheck(const UNormalizer2 *norm2,
    462                   const UChar *s, int32_t length,
    463                   UErrorCode *pErrorCode);
    464 
    465 /**
    466  * Returns the end of the normalized substring of the input string.
    467  * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
    468  * the substring <code>UnicodeString(s, 0, end)</code>
    469  * will pass the quick check with a "yes" result.
    470  *
    471  * The returned end index is usually one or more characters before the
    472  * "no" or "maybe" character: The end index is at a normalization boundary.
    473  * (See the class documentation for more about normalization boundaries.)
    474  *
    475  * When the goal is a normalized string and most input strings are expected
    476  * to be normalized already, then call this method,
    477  * and if it returns a prefix shorter than the input string,
    478  * copy that prefix and use normalizeSecondAndAppend() for the remainder.
    479  * @param norm2 UNormalizer2 instance
    480  * @param s input string
    481  * @param length length of the string, or -1 if NUL-terminated
    482  * @param pErrorCode Standard ICU error code. Its input value must
    483  *                   pass the U_SUCCESS() test, or else the function returns
    484  *                   immediately. Check for U_FAILURE() on output or use with
    485  *                   function chaining. (See User Guide for details.)
    486  * @return "yes" span end index
    487  * @stable ICU 4.4
    488  */
    489 U_STABLE int32_t U_EXPORT2
    490 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
    491                          const UChar *s, int32_t length,
    492                          UErrorCode *pErrorCode);
    493 
    494 /**
    495  * Tests if the character always has a normalization boundary before it,
    496  * regardless of context.
    497  * For details see the Normalizer2 base class documentation.
    498  * @param norm2 UNormalizer2 instance
    499  * @param c character to test
    500  * @return TRUE if c has a normalization boundary before it
    501  * @stable ICU 4.4
    502  */
    503 U_STABLE UBool U_EXPORT2
    504 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
    505 
    506 /**
    507  * Tests if the character always has a normalization boundary after it,
    508  * regardless of context.
    509  * For details see the Normalizer2 base class documentation.
    510  * @param norm2 UNormalizer2 instance
    511  * @param c character to test
    512  * @return TRUE if c has a normalization boundary after it
    513  * @stable ICU 4.4
    514  */
    515 U_STABLE UBool U_EXPORT2
    516 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
    517 
    518 /**
    519  * Tests if the character is normalization-inert.
    520  * For details see the Normalizer2 base class documentation.
    521  * @param norm2 UNormalizer2 instance
    522  * @param c character to test
    523  * @return TRUE if c is normalization-inert
    524  * @stable ICU 4.4
    525  */
    526 U_STABLE UBool U_EXPORT2
    527 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
    528 
    529 /**
    530  * Option bit for unorm_compare:
    531  * Both input strings are assumed to fulfill FCD conditions.
    532  * @stable ICU 2.2
    533  */
    534 #define UNORM_INPUT_IS_FCD          0x20000
    535 
    536 /**
    537  * Option bit for unorm_compare:
    538  * Perform case-insensitive comparison.
    539  * @stable ICU 2.2
    540  */
    541 #define U_COMPARE_IGNORE_CASE       0x10000
    542 
    543 #ifndef U_COMPARE_CODE_POINT_ORDER
    544 /* see also unistr.h and ustring.h */
    545 /**
    546  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
    547  * Compare strings in code point order instead of code unit order.
    548  * @stable ICU 2.2
    549  */
    550 #define U_COMPARE_CODE_POINT_ORDER  0x8000
    551 #endif
    552 
    553 /**
    554  * Compares two strings for canonical equivalence.
    555  * Further options include case-insensitive comparison and
    556  * code point order (as opposed to code unit order).
    557  *
    558  * Canonical equivalence between two strings is defined as their normalized
    559  * forms (NFD or NFC) being identical.
    560  * This function compares strings incrementally instead of normalizing
    561  * (and optionally case-folding) both strings entirely,
    562  * improving performance significantly.
    563  *
    564  * Bulk normalization is only necessary if the strings do not fulfill the FCD
    565  * conditions. Only in this case, and only if the strings are relatively long,
    566  * is memory allocated temporarily.
    567  * For FCD strings and short non-FCD strings there is no memory allocation.
    568  *
    569  * Semantically, this is equivalent to
    570  *   strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
    571  * where code point order and foldCase are all optional.
    572  *
    573  * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
    574  * the case folding must be performed first, then the normalization.
    575  *
    576  * @param s1 First source string.
    577  * @param length1 Length of first source string, or -1 if NUL-terminated.
    578  *
    579  * @param s2 Second source string.
    580  * @param length2 Length of second source string, or -1 if NUL-terminated.
    581  *
    582  * @param options A bit set of options:
    583  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    584  *     Case-sensitive comparison in code unit order, and the input strings
    585  *     are quick-checked for FCD.
    586  *
    587  *   - UNORM_INPUT_IS_FCD
    588  *     Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
    589  *     If not set, the function will quickCheck for FCD
    590  *     and normalize if necessary.
    591  *
    592  *   - U_COMPARE_CODE_POINT_ORDER
    593  *     Set to choose code point order instead of code unit order
    594  *     (see u_strCompare for details).
    595  *
    596  *   - U_COMPARE_IGNORE_CASE
    597  *     Set to compare strings case-insensitively using case folding,
    598  *     instead of case-sensitively.
    599  *     If set, then the following case folding options are used.
    600  *
    601  *   - Options as used with case-insensitive comparisons, currently:
    602  *
    603  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    604  *    (see u_strCaseCompare for details)
    605  *
    606  *   - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
    607  *
    608  * @param pErrorCode ICU error code in/out parameter.
    609  *                   Must fulfill U_SUCCESS before the function call.
    610  * @return <0 or 0 or >0 as usual for string comparisons
    611  *
    612  * @see unorm_normalize
    613  * @see UNORM_FCD
    614  * @see u_strCompare
    615  * @see u_strCaseCompare
    616  *
    617  * @stable ICU 2.2
    618  */
    619 U_STABLE int32_t U_EXPORT2
    620 unorm_compare(const UChar *s1, int32_t length1,
    621               const UChar *s2, int32_t length2,
    622               uint32_t options,
    623               UErrorCode *pErrorCode);
    624 
    625 #endif  /* !UCONFIG_NO_NORMALIZATION */
    626 #endif  /* __UNORM2_H__ */
    627