Home | History | Annotate | Download | only in unicode
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 1998-2004, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *
      7 * File ustring.h
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   12/07/98    bertrand    Creation.
     13 ******************************************************************************
     14 */
     15 
     16 #ifndef USTRING_H
     17 #define USTRING_H
     18 
     19 #include "unicode/utypes.h"
     20 #include "unicode/putil.h"
     21 #include "unicode/uiter.h"
     22 
     23 /** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
     24 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
     25 #   define UBRK_TYPEDEF_UBREAK_ITERATOR
     26     typedef void UBreakIterator;
     27 #endif
     28 
     29 /**
     30  * \file
     31  * \brief C API: Unicode string handling functions
     32  *
     33  * These C API functions provide general Unicode string handling.
     34  *
     35  * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
     36  * functions. (For example, they do not check for bad arguments like NULL string pointers.)
     37  * In some cases, only the thread-safe variant of such a function is implemented here
     38  * (see u_strtok_r()).
     39  *
     40  * Other functions provide more Unicode-specific functionality like locale-specific
     41  * upper/lower-casing and string comparison in code point order.
     42  *
     43  * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
     44  * UTF-16 encodes each Unicode code point with either one or two UChar code units.
     45  * (This is the default form of Unicode, and a forward-compatible extension of the original,
     46  * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
     47  * in 1996.)
     48  *
     49  * Some APIs accept a 32-bit UChar32 value for a single code point.
     50  *
     51  * ICU also handles 16-bit Unicode text with unpaired surrogates.
     52  * Such text is not well-formed UTF-16.
     53  * Code-point-related functions treat unpaired surrogates as surrogate code points,
     54  * i.e., as separate units.
     55  *
     56  * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
     57  * it is much more efficient even for random access because the code unit values
     58  * for single-unit characters vs. lead units vs. trail units are completely disjoint.
     59  * This means that it is easy to determine character (code point) boundaries from
     60  * random offsets in the string.
     61  *
     62  * Unicode (UTF-16) string processing is optimized for the single-unit case.
     63  * Although it is important to support supplementary characters
     64  * (which use pairs of lead/trail code units called "surrogates"),
     65  * their occurrence is rare. Almost all characters in modern use require only
     66  * a single UChar code unit (i.e., their code point values are <=0xffff).
     67  *
     68  * For more details see the User Guide Strings chapter (http://oss.software.ibm.com/icu/userguide/strings.html).
     69  * For a discussion of the handling of unpaired surrogates see also
     70  * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
     71  */
     72 
     73 /**
     74  * Determine the length of an array of UChar.
     75  *
     76  * @param s The array of UChars, NULL (U+0000) terminated.
     77  * @return The number of UChars in <code>chars</code>, minus the terminator.
     78  * @stable ICU 2.0
     79  */
     80 U_STABLE int32_t U_EXPORT2
     81 u_strlen(const UChar *s);
     82 
     83 /**
     84  * Count Unicode code points in the length UChar code units of the string.
     85  * A code point may occupy either one or two UChar code units.
     86  * Counting code points involves reading all code units.
     87  *
     88  * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
     89  *
     90  * @param s The input string.
     91  * @param length The number of UChar code units to be checked, or -1 to count all
     92  *               code points before the first NUL (U+0000).
     93  * @return The number of code points in the specified code units.
     94  * @stable ICU 2.0
     95  */
     96 U_STABLE int32_t U_EXPORT2
     97 u_countChar32(const UChar *s, int32_t length);
     98 
     99 /**
    100  * Check if the string contains more Unicode code points than a certain number.
    101  * This is more efficient than counting all code points in the entire string
    102  * and comparing that number with a threshold.
    103  * This function may not need to scan the string at all if the length is known
    104  * (not -1 for NUL-termination) and falls within a certain range, and
    105  * never needs to count more than 'number+1' code points.
    106  * Logically equivalent to (u_countChar32(s, length)>number).
    107  * A Unicode code point may occupy either one or two UChar code units.
    108  *
    109  * @param s The input string.
    110  * @param length The length of the string, or -1 if it is NUL-terminated.
    111  * @param number The number of code points in the string is compared against
    112  *               the 'number' parameter.
    113  * @return Boolean value for whether the string contains more Unicode code points
    114  *         than 'number'. Same as (u_countChar32(s, length)>number).
    115  * @stable ICU 2.4
    116  */
    117 U_STABLE UBool U_EXPORT2
    118 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
    119 
    120 /**
    121  * Concatenate two ustrings.  Appends a copy of <code>src</code>,
    122  * including the null terminator, to <code>dst</code>. The initial copied
    123  * character from <code>src</code> overwrites the null terminator in <code>dst</code>.
    124  *
    125  * @param dst The destination string.
    126  * @param src The source string.
    127  * @return A pointer to <code>dst</code>.
    128  * @stable ICU 2.0
    129  */
    130 U_STABLE UChar* U_EXPORT2
    131 u_strcat(UChar     *dst,
    132     const UChar     *src);
    133 
    134 /**
    135  * Concatenate two ustrings.
    136  * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
    137  * Adds a terminating NUL.
    138  * If src is too long, then only <code>n-1</code> characters will be copied
    139  * before the terminating NUL.
    140  * If <code>n&lt;=0</code> then dst is not modified.
    141  *
    142  * @param dst The destination string.
    143  * @param src The source string.
    144  * @param n The maximum number of characters to compare.
    145  * @return A pointer to <code>dst</code>.
    146  * @stable ICU 2.0
    147  */
    148 U_STABLE UChar* U_EXPORT2
    149 u_strncat(UChar     *dst,
    150      const UChar     *src,
    151      int32_t     n);
    152 
    153 /**
    154  * Find the first occurrence of a substring in a string.
    155  * The substring is found at code point boundaries.
    156  * That means that if the substring begins with
    157  * a trail surrogate or ends with a lead surrogate,
    158  * then it is found only if these surrogates stand alone in the text.
    159  * Otherwise, the substring edge units would be matched against
    160  * halves of surrogate pairs.
    161  *
    162  * @param s The string to search (NUL-terminated).
    163  * @param substring The substring to find (NUL-terminated).
    164  * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
    165  *         or <code>s</code> itself if the <code>substring</code> is empty,
    166  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
    167  * @stable ICU 2.0
    168  *
    169  * @see u_strrstr
    170  * @see u_strFindFirst
    171  * @see u_strFindLast
    172  */
    173 U_STABLE UChar * U_EXPORT2
    174 u_strstr(const UChar *s, const UChar *substring);
    175 
    176 /**
    177  * Find the first occurrence of a substring in a string.
    178  * The substring is found at code point boundaries.
    179  * That means that if the substring begins with
    180  * a trail surrogate or ends with a lead surrogate,
    181  * then it is found only if these surrogates stand alone in the text.
    182  * Otherwise, the substring edge units would be matched against
    183  * halves of surrogate pairs.
    184  *
    185  * @param s The string to search.
    186  * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
    187  * @param substring The substring to find (NUL-terminated).
    188  * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
    189  * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
    190  *         or <code>s</code> itself if the <code>substring</code> is empty,
    191  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
    192  * @stable ICU 2.4
    193  *
    194  * @see u_strstr
    195  * @see u_strFindLast
    196  */
    197 U_STABLE UChar * U_EXPORT2
    198 u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
    199 
    200 /**
    201  * Find the first occurrence of a BMP code point in a string.
    202  * A surrogate code point is found only if its match in the text is not
    203  * part of a surrogate pair.
    204  * A NUL character is found at the string terminator.
    205  *
    206  * @param s The string to search (NUL-terminated).
    207  * @param c The BMP code point to find.
    208  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
    209  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    210  * @stable ICU 2.0
    211  *
    212  * @see u_strchr32
    213  * @see u_memchr
    214  * @see u_strstr
    215  * @see u_strFindFirst
    216  */
    217 U_STABLE UChar * U_EXPORT2
    218 u_strchr(const UChar *s, UChar c);
    219 
    220 /**
    221  * Find the first occurrence of a code point in a string.
    222  * A surrogate code point is found only if its match in the text is not
    223  * part of a surrogate pair.
    224  * A NUL character is found at the string terminator.
    225  *
    226  * @param s The string to search (NUL-terminated).
    227  * @param c The code point to find.
    228  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
    229  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    230  * @stable ICU 2.0
    231  *
    232  * @see u_strchr
    233  * @see u_memchr32
    234  * @see u_strstr
    235  * @see u_strFindFirst
    236  */
    237 U_STABLE UChar * U_EXPORT2
    238 u_strchr32(const UChar *s, UChar32 c);
    239 
    240 /**
    241  * Find the last occurrence of a substring in a string.
    242  * The substring is found at code point boundaries.
    243  * That means that if the substring begins with
    244  * a trail surrogate or ends with a lead surrogate,
    245  * then it is found only if these surrogates stand alone in the text.
    246  * Otherwise, the substring edge units would be matched against
    247  * halves of surrogate pairs.
    248  *
    249  * @param s The string to search (NUL-terminated).
    250  * @param substring The substring to find (NUL-terminated).
    251  * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
    252  *         or <code>s</code> itself if the <code>substring</code> is empty,
    253  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
    254  * @stable ICU 2.4
    255  *
    256  * @see u_strstr
    257  * @see u_strFindFirst
    258  * @see u_strFindLast
    259  */
    260 U_STABLE UChar * U_EXPORT2
    261 u_strrstr(const UChar *s, const UChar *substring);
    262 
    263 /**
    264  * Find the last occurrence of a substring in a string.
    265  * The substring is found at code point boundaries.
    266  * That means that if the substring begins with
    267  * a trail surrogate or ends with a lead surrogate,
    268  * then it is found only if these surrogates stand alone in the text.
    269  * Otherwise, the substring edge units would be matched against
    270  * halves of surrogate pairs.
    271  *
    272  * @param s The string to search.
    273  * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
    274  * @param substring The substring to find (NUL-terminated).
    275  * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
    276  * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
    277  *         or <code>s</code> itself if the <code>substring</code> is empty,
    278  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
    279  * @stable ICU 2.4
    280  *
    281  * @see u_strstr
    282  * @see u_strFindLast
    283  */
    284 U_STABLE UChar * U_EXPORT2
    285 u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
    286 
    287 /**
    288  * Find the last occurrence of a BMP code point in a string.
    289  * A surrogate code point is found only if its match in the text is not
    290  * part of a surrogate pair.
    291  * A NUL character is found at the string terminator.
    292  *
    293  * @param s The string to search (NUL-terminated).
    294  * @param c The BMP code point to find.
    295  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
    296  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    297  * @stable ICU 2.4
    298  *
    299  * @see u_strrchr32
    300  * @see u_memrchr
    301  * @see u_strrstr
    302  * @see u_strFindLast
    303  */
    304 U_STABLE UChar * U_EXPORT2
    305 u_strrchr(const UChar *s, UChar c);
    306 
    307 /**
    308  * Find the last occurrence of a code point in a string.
    309  * A surrogate code point is found only if its match in the text is not
    310  * part of a surrogate pair.
    311  * A NUL character is found at the string terminator.
    312  *
    313  * @param s The string to search (NUL-terminated).
    314  * @param c The code point to find.
    315  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
    316  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    317  * @stable ICU 2.4
    318  *
    319  * @see u_strrchr
    320  * @see u_memchr32
    321  * @see u_strrstr
    322  * @see u_strFindLast
    323  */
    324 U_STABLE UChar * U_EXPORT2
    325 u_strrchr32(const UChar *s, UChar32 c);
    326 
    327 /**
    328  * Locates the first occurrence in the string <code>string</code> of any of the characters
    329  * in the string <code>matchSet</code>.
    330  * Works just like C's strpbrk but with Unicode.
    331  *
    332  * @param string The string in which to search, NUL-terminated.
    333  * @param matchSet A NUL-terminated string defining a set of code points
    334  *                 for which to search in the text string.
    335  * @return A pointer to the  character in <code>string</code> that matches one of the
    336  *         characters in <code>matchSet</code>, or NULL if no such character is found.
    337  * @stable ICU 2.0
    338  */
    339 U_STABLE UChar * U_EXPORT2
    340 u_strpbrk(const UChar *string, const UChar *matchSet);
    341 
    342 /**
    343  * Returns the number of consecutive characters in <code>string</code>,
    344  * beginning with the first, that do not occur somewhere in <code>matchSet</code>.
    345  * Works just like C's strcspn but with Unicode.
    346  *
    347  * @param string The string in which to search, NUL-terminated.
    348  * @param matchSet A NUL-terminated string defining a set of code points
    349  *                 for which to search in the text string.
    350  * @return The number of initial characters in <code>string</code> that do not
    351  *         occur in <code>matchSet</code>.
    352  * @see u_strspn
    353  * @stable ICU 2.0
    354  */
    355 U_STABLE int32_t U_EXPORT2
    356 u_strcspn(const UChar *string, const UChar *matchSet);
    357 
    358 /**
    359  * Returns the number of consecutive characters in <code>string</code>,
    360  * beginning with the first, that occur somewhere in <code>matchSet</code>.
    361  * Works just like C's strspn but with Unicode.
    362  *
    363  * @param string The string in which to search, NUL-terminated.
    364  * @param matchSet A NUL-terminated string defining a set of code points
    365  *                 for which to search in the text string.
    366  * @return The number of initial characters in <code>string</code> that do
    367  *         occur in <code>matchSet</code>.
    368  * @see u_strcspn
    369  * @stable ICU 2.0
    370  */
    371 U_STABLE int32_t U_EXPORT2
    372 u_strspn(const UChar *string, const UChar *matchSet);
    373 
    374 /**
    375  * The string tokenizer API allows an application to break a string into
    376  * tokens. Unlike strtok(), the saveState (the current pointer within the
    377  * original string) is maintained in saveState. In the first call, the
    378  * argument src is a pointer to the string. In subsequent calls to
    379  * return successive tokens of that string, src must be specified as
    380  * NULL. The value saveState is set by this function to maintain the
    381  * function's position within the string, and on each subsequent call
    382  * you must give this argument the same variable. This function does
    383  * handle surrogate pairs. This function is similar to the strtok_r()
    384  * the POSIX Threads Extension (1003.1c-1995) version.
    385  *
    386  * @param src String containing token(s). This string will be modified.
    387  *            After the first call to u_strtok_r(), this argument must
    388  *            be NULL to get to the next token.
    389  * @param delim Set of delimiter characters (Unicode code points).
    390  * @param saveState The current pointer within the original string,
    391  *              which is set by this function. The saveState
    392  *              parameter should the address of a local variable of type
    393  *              UChar *. (i.e. defined "Uhar *myLocalSaveState" and use
    394  *              &myLocalSaveState for this parameter).
    395  * @return A pointer to the next token found in src, or NULL
    396  *         when there are no more tokens.
    397  * @stable ICU 2.0
    398  */
    399 U_STABLE UChar * U_EXPORT2
    400 u_strtok_r(UChar    *src,
    401      const UChar    *delim,
    402            UChar   **saveState);
    403 
    404 /**
    405  * Compare two Unicode strings for bitwise equality (code unit order).
    406  *
    407  * @param s1 A string to compare.
    408  * @param s2 A string to compare.
    409  * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
    410  * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
    411  * value if <code>s1</code> is bitwise greater than <code>s2</code>.
    412  * @stable ICU 2.0
    413  */
    414 U_STABLE int32_t  U_EXPORT2
    415 u_strcmp(const UChar     *s1,
    416          const UChar     *s2);
    417 
    418 /**
    419  * Compare two Unicode strings in code point order.
    420  * See u_strCompare for details.
    421  *
    422  * @param s1 A string to compare.
    423  * @param s2 A string to compare.
    424  * @return a negative/zero/positive integer corresponding to whether
    425  * the first string is less than/equal to/greater than the second one
    426  * in code point order
    427  * @stable ICU 2.0
    428  */
    429 U_STABLE int32_t U_EXPORT2
    430 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
    431 
    432 /**
    433  * Compare two Unicode strings (binary order).
    434  *
    435  * The comparison can be done in code unit order or in code point order.
    436  * They differ only in UTF-16 when
    437  * comparing supplementary code points (U+10000..U+10ffff)
    438  * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
    439  * In code unit order, high BMP code points sort after supplementary code points
    440  * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
    441  *
    442  * This functions works with strings of different explicitly specified lengths
    443  * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
    444  * NUL-terminated strings are possible with length arguments of -1.
    445  *
    446  * @param s1 First source string.
    447  * @param length1 Length of first source string, or -1 if NUL-terminated.
    448  *
    449  * @param s2 Second source string.
    450  * @param length2 Length of second source string, or -1 if NUL-terminated.
    451  *
    452  * @param codePointOrder Choose between code unit order (FALSE)
    453  *                       and code point order (TRUE).
    454  *
    455  * @return <0 or 0 or >0 as usual for string comparisons
    456  *
    457  * @stable ICU 2.2
    458  */
    459 U_STABLE int32_t U_EXPORT2
    460 u_strCompare(const UChar *s1, int32_t length1,
    461              const UChar *s2, int32_t length2,
    462              UBool codePointOrder);
    463 
    464 /**
    465  * Compare two Unicode strings (binary order)
    466  * as presented by UCharIterator objects.
    467  * Works otherwise just like u_strCompare().
    468  *
    469  * Both iterators are reset to their start positions.
    470  * When the function returns, it is undefined where the iterators
    471  * have stopped.
    472  *
    473  * @param iter1 First source string iterator.
    474  * @param iter2 Second source string iterator.
    475  * @param codePointOrder Choose between code unit order (FALSE)
    476  *                       and code point order (TRUE).
    477  *
    478  * @return <0 or 0 or >0 as usual for string comparisons
    479  *
    480  * @see u_strCompare
    481  *
    482  * @stable ICU 2.6
    483  */
    484 U_STABLE int32_t U_EXPORT2
    485 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
    486 
    487 #ifndef U_COMPARE_CODE_POINT_ORDER
    488 /* see also unistr.h and unorm.h */
    489 /**
    490  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
    491  * Compare strings in code point order instead of code unit order.
    492  * @stable ICU 2.2
    493  */
    494 #define U_COMPARE_CODE_POINT_ORDER  0x8000
    495 #endif
    496 
    497 /**
    498  * Compare two strings case-insensitively using full case folding.
    499  * This is equivalent to
    500  *   u_strCompare(u_strFoldCase(s1, options),
    501  *                u_strFoldCase(s2, options),
    502  *                (options&U_COMPARE_CODE_POINT_ORDER)!=0).
    503  *
    504  * The comparison can be done in UTF-16 code unit order or in code point order.
    505  * They differ only when comparing supplementary code points (U+10000..U+10ffff)
    506  * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
    507  * In code unit order, high BMP code points sort after supplementary code points
    508  * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
    509  *
    510  * This functions works with strings of different explicitly specified lengths
    511  * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
    512  * NUL-terminated strings are possible with length arguments of -1.
    513  *
    514  * @param s1 First source string.
    515  * @param length1 Length of first source string, or -1 if NUL-terminated.
    516  *
    517  * @param s2 Second source string.
    518  * @param length2 Length of second source string, or -1 if NUL-terminated.
    519  *
    520  * @param options A bit set of options:
    521  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    522  *     Comparison in code unit order with default case folding.
    523  *
    524  *   - U_COMPARE_CODE_POINT_ORDER
    525  *     Set to choose code point order instead of code unit order
    526  *     (see u_strCompare for details).
    527  *
    528  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    529  *
    530  * @param pErrorCode Must be a valid pointer to an error code value,
    531  *                  which must not indicate a failure before the function call.
    532  *
    533  * @return <0 or 0 or >0 as usual for string comparisons
    534  *
    535  * @stable ICU 2.2
    536  */
    537 U_STABLE int32_t U_EXPORT2
    538 u_strCaseCompare(const UChar *s1, int32_t length1,
    539                  const UChar *s2, int32_t length2,
    540                  uint32_t options,
    541                  UErrorCode *pErrorCode);
    542 
    543 /**
    544  * Compare two ustrings for bitwise equality.
    545  * Compares at most <code>n</code> characters.
    546  *
    547  * @param ucs1 A string to compare.
    548  * @param ucs2 A string to compare.
    549  * @param n The maximum number of characters to compare.
    550  * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
    551  * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
    552  * value if <code>s1</code> is bitwise greater than <code>s2</code>.
    553  * @stable ICU 2.0
    554  */
    555 U_STABLE int32_t U_EXPORT2
    556 u_strncmp(const UChar     *ucs1,
    557      const UChar     *ucs2,
    558      int32_t     n);
    559 
    560 /**
    561  * Compare two Unicode strings in code point order.
    562  * This is different in UTF-16 from u_strncmp() if supplementary characters are present.
    563  * For details, see u_strCompare().
    564  *
    565  * @param s1 A string to compare.
    566  * @param s2 A string to compare.
    567  * @param n The maximum number of characters to compare.
    568  * @return a negative/zero/positive integer corresponding to whether
    569  * the first string is less than/equal to/greater than the second one
    570  * in code point order
    571  * @stable ICU 2.0
    572  */
    573 U_STABLE int32_t U_EXPORT2
    574 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
    575 
    576 /**
    577  * Compare two strings case-insensitively using full case folding.
    578  * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
    579  *
    580  * @param s1 A string to compare.
    581  * @param s2 A string to compare.
    582  * @param options A bit set of options:
    583  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    584  *     Comparison in code unit order with default case folding.
    585  *
    586  *   - U_COMPARE_CODE_POINT_ORDER
    587  *     Set to choose code point order instead of code unit order
    588  *     (see u_strCompare for details).
    589  *
    590  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    591  *
    592  * @return A negative, zero, or positive integer indicating the comparison result.
    593  * @stable ICU 2.0
    594  */
    595 U_STABLE int32_t U_EXPORT2
    596 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
    597 
    598 /**
    599  * Compare two strings case-insensitively using full case folding.
    600  * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
    601  * u_strFoldCase(s2, at most n, options)).
    602  *
    603  * @param s1 A string to compare.
    604  * @param s2 A string to compare.
    605  * @param n The maximum number of characters each string to case-fold and then compare.
    606  * @param options A bit set of options:
    607  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    608  *     Comparison in code unit order with default case folding.
    609  *
    610  *   - U_COMPARE_CODE_POINT_ORDER
    611  *     Set to choose code point order instead of code unit order
    612  *     (see u_strCompare for details).
    613  *
    614  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    615  *
    616  * @return A negative, zero, or positive integer indicating the comparison result.
    617  * @stable ICU 2.0
    618  */
    619 U_STABLE int32_t U_EXPORT2
    620 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
    621 
    622 /**
    623  * Compare two strings case-insensitively using full case folding.
    624  * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
    625  * u_strFoldCase(s2, n, options)).
    626  *
    627  * @param s1 A string to compare.
    628  * @param s2 A string to compare.
    629  * @param length The number of characters in each string to case-fold and then compare.
    630  * @param options A bit set of options:
    631  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    632  *     Comparison in code unit order with default case folding.
    633  *
    634  *   - U_COMPARE_CODE_POINT_ORDER
    635  *     Set to choose code point order instead of code unit order
    636  *     (see u_strCompare for details).
    637  *
    638  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    639  *
    640  * @return A negative, zero, or positive integer indicating the comparison result.
    641  * @stable ICU 2.0
    642  */
    643 U_STABLE int32_t U_EXPORT2
    644 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
    645 
    646 /**
    647  * Copy a ustring. Adds a null terminator.
    648  *
    649  * @param dst The destination string.
    650  * @param src The source string.
    651  * @return A pointer to <code>dst</code>.
    652  * @stable ICU 2.0
    653  */
    654 U_STABLE UChar* U_EXPORT2
    655 u_strcpy(UChar     *dst,
    656     const UChar     *src);
    657 
    658 /**
    659  * Copy a ustring.
    660  * Copies at most <code>n</code> characters.  The result will be null terminated
    661  * if the length of <code>src</code> is less than <code>n</code>.
    662  *
    663  * @param dst The destination string.
    664  * @param src The source string.
    665  * @param n The maximum number of characters to copy.
    666  * @return A pointer to <code>dst</code>.
    667  * @stable ICU 2.0
    668  */
    669 U_STABLE UChar* U_EXPORT2
    670 u_strncpy(UChar     *dst,
    671      const UChar     *src,
    672      int32_t     n);
    673 
    674 #if !UCONFIG_NO_CONVERSION
    675 
    676 /**
    677  * Copy a byte string encoded in the default codepage to a ustring.
    678  * Adds a null terminator.
    679  * Performs a host byte to UChar conversion
    680  *
    681  * @param dst The destination string.
    682  * @param src The source string.
    683  * @return A pointer to <code>dst</code>.
    684  * @stable ICU 2.0
    685  */
    686 U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
    687                const char *src );
    688 
    689 /**
    690  * Copy a byte string encoded in the default codepage to a ustring.
    691  * Copies at most <code>n</code> characters.  The result will be null terminated
    692  * if the length of <code>src</code> is less than <code>n</code>.
    693  * Performs a host byte to UChar conversion
    694  *
    695  * @param dst The destination string.
    696  * @param src The source string.
    697  * @param n The maximum number of characters to copy.
    698  * @return A pointer to <code>dst</code>.
    699  * @stable ICU 2.0
    700  */
    701 U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
    702             const char *src,
    703             int32_t n);
    704 
    705 /**
    706  * Copy ustring to a byte string encoded in the default codepage.
    707  * Adds a null terminator.
    708  * Performs a UChar to host byte conversion
    709  *
    710  * @param dst The destination string.
    711  * @param src The source string.
    712  * @return A pointer to <code>dst</code>.
    713  * @stable ICU 2.0
    714  */
    715 U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
    716             const UChar *src );
    717 
    718 /**
    719  * Copy ustring to a byte string encoded in the default codepage.
    720  * Copies at most <code>n</code> characters.  The result will be null terminated
    721  * if the length of <code>src</code> is less than <code>n</code>.
    722  * Performs a UChar to host byte conversion
    723  *
    724  * @param dst The destination string.
    725  * @param src The source string.
    726  * @param n The maximum number of characters to copy.
    727  * @return A pointer to <code>dst</code>.
    728  * @stable ICU 2.0
    729  */
    730 U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
    731             const UChar *src,
    732             int32_t n );
    733 
    734 #endif
    735 
    736 /**
    737  * Synonym for memcpy(), but with UChars only.
    738  * @param dest The destination string
    739  * @param src The source string
    740  * @param count The number of characters to copy
    741  * @return A pointer to <code>dest</code>
    742  * @stable ICU 2.0
    743  */
    744 U_STABLE UChar* U_EXPORT2
    745 u_memcpy(UChar *dest, const UChar *src, int32_t count);
    746 
    747 /**
    748  * Synonym for memmove(), but with UChars only.
    749  * @param dest The destination string
    750  * @param src The source string
    751  * @param count The number of characters to move
    752  * @return A pointer to <code>dest</code>
    753  * @stable ICU 2.0
    754  */
    755 U_STABLE UChar* U_EXPORT2
    756 u_memmove(UChar *dest, const UChar *src, int32_t count);
    757 
    758 /**
    759  * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
    760  *
    761  * @param dest The destination string.
    762  * @param c The character to initialize the string.
    763  * @param count The maximum number of characters to set.
    764  * @return A pointer to <code>dest</code>.
    765  * @stable ICU 2.0
    766  */
    767 U_STABLE UChar* U_EXPORT2
    768 u_memset(UChar *dest, UChar c, int32_t count);
    769 
    770 /**
    771  * Compare the first <code>count</code> UChars of each buffer.
    772  *
    773  * @param buf1 The first string to compare.
    774  * @param buf2 The second string to compare.
    775  * @param count The maximum number of UChars to compare.
    776  * @return When buf1 < buf2, a negative number is returned.
    777  *      When buf1 == buf2, 0 is returned.
    778  *      When buf1 > buf2, a positive number is returned.
    779  * @stable ICU 2.0
    780  */
    781 U_STABLE int32_t U_EXPORT2
    782 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
    783 
    784 /**
    785  * Compare two Unicode strings in code point order.
    786  * This is different in UTF-16 from u_memcmp() if supplementary characters are present.
    787  * For details, see u_strCompare().
    788  *
    789  * @param s1 A string to compare.
    790  * @param s2 A string to compare.
    791  * @param count The maximum number of characters to compare.
    792  * @return a negative/zero/positive integer corresponding to whether
    793  * the first string is less than/equal to/greater than the second one
    794  * in code point order
    795  * @stable ICU 2.0
    796  */
    797 U_STABLE int32_t U_EXPORT2
    798 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
    799 
    800 /**
    801  * Find the first occurrence of a BMP code point in a string.
    802  * A surrogate code point is found only if its match in the text is not
    803  * part of a surrogate pair.
    804  * A NUL character is found at the string terminator.
    805  *
    806  * @param s The string to search (contains <code>count</code> UChars).
    807  * @param c The BMP code point to find.
    808  * @param count The length of the string.
    809  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
    810  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    811  * @stable ICU 2.0
    812  *
    813  * @see u_strchr
    814  * @see u_memchr32
    815  * @see u_strFindFirst
    816  */
    817 U_STABLE UChar* U_EXPORT2
    818 u_memchr(const UChar *s, UChar c, int32_t count);
    819 
    820 /**
    821  * Find the first occurrence of a code point in a string.
    822  * A surrogate code point is found only if its match in the text is not
    823  * part of a surrogate pair.
    824  * A NUL character is found at the string terminator.
    825  *
    826  * @param s The string to search (contains <code>count</code> UChars).
    827  * @param c The code point to find.
    828  * @param count The length of the string.
    829  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
    830  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    831  * @stable ICU 2.0
    832  *
    833  * @see u_strchr32
    834  * @see u_memchr
    835  * @see u_strFindFirst
    836  */
    837 U_STABLE UChar* U_EXPORT2
    838 u_memchr32(const UChar *s, UChar32 c, int32_t count);
    839 
    840 /**
    841  * Find the last occurrence of a BMP code point in a string.
    842  * A surrogate code point is found only if its match in the text is not
    843  * part of a surrogate pair.
    844  * A NUL character is found at the string terminator.
    845  *
    846  * @param s The string to search (contains <code>count</code> UChars).
    847  * @param c The BMP code point to find.
    848  * @param count The length of the string.
    849  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
    850  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    851  * @stable ICU 2.4
    852  *
    853  * @see u_strrchr
    854  * @see u_memrchr32
    855  * @see u_strFindLast
    856  */
    857 U_STABLE UChar* U_EXPORT2
    858 u_memrchr(const UChar *s, UChar c, int32_t count);
    859 
    860 /**
    861  * Find the last occurrence of a code point in a string.
    862  * A surrogate code point is found only if its match in the text is not
    863  * part of a surrogate pair.
    864  * A NUL character is found at the string terminator.
    865  *
    866  * @param s The string to search (contains <code>count</code> UChars).
    867  * @param c The code point to find.
    868  * @param count The length of the string.
    869  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
    870  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    871  * @stable ICU 2.4
    872  *
    873  * @see u_strrchr32
    874  * @see u_memrchr
    875  * @see u_strFindLast
    876  */
    877 U_STABLE UChar* U_EXPORT2
    878 u_memrchr32(const UChar *s, UChar32 c, int32_t count);
    879 
    880 /**
    881  * Unicode String literals in C.
    882  * We need one macro to declare a variable for the string
    883  * and to statically preinitialize it if possible,
    884  * and a second macro to dynamically intialize such a string variable if necessary.
    885  *
    886  * The macros are defined for maximum performance.
    887  * They work only for strings that contain "invariant characters", i.e.,
    888  * only latin letters, digits, and some punctuation.
    889  * See utypes.h for details.
    890  *
    891  * A pair of macros for a single string must be used with the same
    892  * parameters.
    893  * The string parameter must be a C string literal.
    894  * The length of the string, not including the terminating
    895  * <code>NUL</code>, must be specified as a constant.
    896  * The U_STRING_DECL macro should be invoked exactly once for one
    897  * such string variable before it is used.
    898  *
    899  * Usage:
    900  * <pre>
    901  * &#32;   U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
    902  * &#32;   U_STRING_DECL(ustringVar2, "jumps 5%", 8);
    903  * &#32;   static UBool didInit=FALSE;
    904  * &#32;
    905  * &#32;   int32_t function() {
    906  * &#32;       if(!didInit) {
    907  * &#32;           U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
    908  * &#32;           U_STRING_INIT(ustringVar2, "jumps 5%", 8);
    909  * &#32;           didInit=TRUE;
    910  * &#32;       }
    911  * &#32;       return u_strcmp(ustringVar1, ustringVar2);
    912  * &#32;   }
    913  * </pre>
    914  * @stable ICU 2.0
    915  */
    916 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
    917 #   define U_STRING_DECL(var, cs, length) static const wchar_t var[(length)+1]={ L ## cs }
    918     /**@stable ICU 2.0 */
    919 #   define U_STRING_INIT(var, cs, length)
    920 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
    921 #   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (const UChar *)cs }
    922     /**@stable ICU 2.0 */
    923 #   define U_STRING_INIT(var, cs, length)
    924 #else
    925 #   define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
    926     /**@stable ICU 2.0 */
    927 #   define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
    928 #endif
    929 
    930 /**
    931  * Unescape a string of characters and write the resulting
    932  * Unicode characters to the destination buffer.  The following escape
    933  * sequences are recognized:
    934  *
    935  * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
    936  * \\Uhhhhhhhh   8 hex digits
    937  * \\xhh         1-2 hex digits
    938  * \\x{h...}     1-8 hex digits
    939  * \\ooo         1-3 octal digits; o in [0-7]
    940  * \\cX          control-X; X is masked with 0x1F
    941  *
    942  * as well as the standard ANSI C escapes:
    943  *
    944  * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
    945  * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
    946  * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
    947  *
    948  * Anything else following a backslash is generically escaped.  For
    949  * example, "[a\\-z]" returns "[a-z]".
    950  *
    951  * If an escape sequence is ill-formed, this method returns an empty
    952  * string.  An example of an ill-formed sequence is "\\u" followed by
    953  * fewer than 4 hex digits.
    954  *
    955  * The above characters are recognized in the compiler's codepage,
    956  * that is, they are coded as 'u', '\\', etc.  Characters that are
    957  * not parts of escape sequences are converted using u_charsToUChars().
    958  *
    959  * This function is similar to UnicodeString::unescape() but not
    960  * identical to it.  The latter takes a source UnicodeString, so it
    961  * does escape recognition but no conversion.
    962  *
    963  * @param src a zero-terminated string of invariant characters
    964  * @param dest pointer to buffer to receive converted and unescaped
    965  * text and, if there is room, a zero terminator.  May be NULL for
    966  * preflighting, in which case no UChars will be written, but the
    967  * return value will still be valid.  On error, an empty string is
    968  * stored here (if possible).
    969  * @param destCapacity the number of UChars that may be written at
    970  * dest.  Ignored if dest == NULL.
    971  * @return the length of unescaped string.
    972  * @see u_unescapeAt
    973  * @see UnicodeString#unescape()
    974  * @see UnicodeString#unescapeAt()
    975  * @stable ICU 2.0
    976  */
    977 U_STABLE int32_t U_EXPORT2
    978 u_unescape(const char *src,
    979            UChar *dest, int32_t destCapacity);
    980 
    981 U_CDECL_BEGIN
    982 /**
    983  * Callback function for u_unescapeAt() that returns a character of
    984  * the source text given an offset and a context pointer.  The context
    985  * pointer will be whatever is passed into u_unescapeAt().
    986  *
    987  * @param offset pointer to the offset that will be passed to u_unescapeAt().
    988  * @param context an opaque pointer passed directly into u_unescapeAt()
    989  * @return the character represented by the escape sequence at
    990  * offset
    991  * @see u_unescapeAt
    992  * @stable ICU 2.0
    993  */
    994 typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
    995 U_CDECL_END
    996 
    997 /**
    998  * Unescape a single sequence. The character at offset-1 is assumed
    999  * (without checking) to be a backslash.  This method takes a callback
   1000  * pointer to a function that returns the UChar at a given offset.  By
   1001  * varying this callback, ICU functions are able to unescape char*
   1002  * strings, UnicodeString objects, and UFILE pointers.
   1003  *
   1004  * If offset is out of range, or if the escape sequence is ill-formed,
   1005  * (UChar32)0xFFFFFFFF is returned.  See documentation of u_unescape()
   1006  * for a list of recognized sequences.
   1007  *
   1008  * @param charAt callback function that returns a UChar of the source
   1009  * text given an offset and a context pointer.
   1010  * @param offset pointer to the offset that will be passed to charAt.
   1011  * The offset value will be updated upon return to point after the
   1012  * last parsed character of the escape sequence.  On error the offset
   1013  * is unchanged.
   1014  * @param length the number of characters in the source text.  The
   1015  * last character of the source text is considered to be at offset
   1016  * length-1.
   1017  * @param context an opaque pointer passed directly into charAt.
   1018  * @return the character represented by the escape sequence at
   1019  * offset, or (UChar32)0xFFFFFFFF on error.
   1020  * @see u_unescape()
   1021  * @see UnicodeString#unescape()
   1022  * @see UnicodeString#unescapeAt()
   1023  * @stable ICU 2.0
   1024  */
   1025 U_STABLE UChar32 U_EXPORT2
   1026 u_unescapeAt(UNESCAPE_CHAR_AT charAt,
   1027              int32_t *offset,
   1028              int32_t length,
   1029              void *context);
   1030 
   1031 /**
   1032  * Uppercase the characters in a string.
   1033  * Casing is locale-dependent and context-sensitive.
   1034  * The result may be longer or shorter than the original.
   1035  * The source string and the destination buffer are allowed to overlap.
   1036  *
   1037  * @param dest      A buffer for the result string. The result will be zero-terminated if
   1038  *                  the buffer is large enough.
   1039  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
   1040  *                  dest may be NULL and the function will only return the length of the result
   1041  *                  without writing any of the result string.
   1042  * @param src       The original string
   1043  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
   1044  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
   1045  * @param pErrorCode Must be a valid pointer to an error code value,
   1046  *                  which must not indicate a failure before the function call.
   1047  * @return The length of the result string. It may be greater than destCapacity. In that case,
   1048  *         only some of the result was written to the destination buffer.
   1049  * @stable ICU 2.0
   1050  */
   1051 U_STABLE int32_t U_EXPORT2
   1052 u_strToUpper(UChar *dest, int32_t destCapacity,
   1053              const UChar *src, int32_t srcLength,
   1054              const char *locale,
   1055              UErrorCode *pErrorCode);
   1056 
   1057 /**
   1058  * Lowercase the characters in a string.
   1059  * Casing is locale-dependent and context-sensitive.
   1060  * The result may be longer or shorter than the original.
   1061  * The source string and the destination buffer are allowed to overlap.
   1062  *
   1063  * @param dest      A buffer for the result string. The result will be zero-terminated if
   1064  *                  the buffer is large enough.
   1065  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
   1066  *                  dest may be NULL and the function will only return the length of the result
   1067  *                  without writing any of the result string.
   1068  * @param src       The original string
   1069  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
   1070  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
   1071  * @param pErrorCode Must be a valid pointer to an error code value,
   1072  *                  which must not indicate a failure before the function call.
   1073  * @return The length of the result string. It may be greater than destCapacity. In that case,
   1074  *         only some of the result was written to the destination buffer.
   1075  * @stable ICU 2.0
   1076  */
   1077 U_STABLE int32_t U_EXPORT2
   1078 u_strToLower(UChar *dest, int32_t destCapacity,
   1079              const UChar *src, int32_t srcLength,
   1080              const char *locale,
   1081              UErrorCode *pErrorCode);
   1082 
   1083 #if !UCONFIG_NO_BREAK_ITERATION
   1084 
   1085 /**
   1086  * Titlecase a string.
   1087  * Casing is locale-dependent and context-sensitive.
   1088  * Titlecasing uses a break iterator to find the first characters of words
   1089  * that are to be titlecased. It titlecases those characters and lowercases
   1090  * all others.
   1091  *
   1092  * The titlecase break iterator can be provided to customize for arbitrary
   1093  * styles, using rules and dictionaries beyond the standard iterators.
   1094  * It may be more efficient to always provide an iterator to avoid
   1095  * opening and closing one for each string.
   1096  * The standard titlecase iterator for the root locale implements the
   1097  * algorithm of Unicode TR 21.
   1098  *
   1099  * This function uses only the first() and next() methods of the
   1100  * provided break iterator.
   1101  *
   1102  * The result may be longer or shorter than the original.
   1103  * The source string and the destination buffer are allowed to overlap.
   1104  *
   1105  * @param dest      A buffer for the result string. The result will be zero-terminated if
   1106  *                  the buffer is large enough.
   1107  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
   1108  *                  dest may be NULL and the function will only return the length of the result
   1109  *                  without writing any of the result string.
   1110  * @param src       The original string
   1111  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
   1112  * @param titleIter A break iterator to find the first characters of words
   1113  *                  that are to be titlecased.
   1114  *                  If none is provided (NULL), then a standard titlecase
   1115  *                  break iterator is opened.
   1116  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
   1117  * @param pErrorCode Must be a valid pointer to an error code value,
   1118  *                  which must not indicate a failure before the function call.
   1119  * @return The length of the result string. It may be greater than destCapacity. In that case,
   1120  *         only some of the result was written to the destination buffer.
   1121  * @stable ICU 2.1
   1122  */
   1123 U_STABLE int32_t U_EXPORT2
   1124 u_strToTitle(UChar *dest, int32_t destCapacity,
   1125              const UChar *src, int32_t srcLength,
   1126              UBreakIterator *titleIter,
   1127              const char *locale,
   1128              UErrorCode *pErrorCode);
   1129 
   1130 #endif
   1131 
   1132 /**
   1133  * Case-fold the characters in a string.
   1134  * Case-folding is locale-independent and not context-sensitive,
   1135  * but there is an option for whether to include or exclude mappings for dotted I
   1136  * and dotless i that are marked with 'I' in CaseFolding.txt.
   1137  * The result may be longer or shorter than the original.
   1138  * The source string and the destination buffer are allowed to overlap.
   1139  *
   1140  * @param dest      A buffer for the result string. The result will be zero-terminated if
   1141  *                  the buffer is large enough.
   1142  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
   1143  *                  dest may be NULL and the function will only return the length of the result
   1144  *                  without writing any of the result string.
   1145  * @param src       The original string
   1146  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
   1147  * @param options   Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
   1148  * @param pErrorCode Must be a valid pointer to an error code value,
   1149  *                  which must not indicate a failure before the function call.
   1150  * @return The length of the result string. It may be greater than destCapacity. In that case,
   1151  *         only some of the result was written to the destination buffer.
   1152  * @stable ICU 2.0
   1153  */
   1154 U_STABLE int32_t U_EXPORT2
   1155 u_strFoldCase(UChar *dest, int32_t destCapacity,
   1156               const UChar *src, int32_t srcLength,
   1157               uint32_t options,
   1158               UErrorCode *pErrorCode);
   1159 
   1160 /**
   1161  * Converts a sequence of UChars to wchar_t units.
   1162  *
   1163  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1164  *                      the buffer is large enough.
   1165  * @param destCapacity  The size of the buffer (number of wchar_t's). If it is 0, then
   1166  *                      dest may be NULL and the function will only return the length of the
   1167  *                      result without writing any of the result string (pre-flighting).
   1168  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1169  *                      pDestLength!=NULL then *pDestLength is always set to the
   1170  *                      number of output units corresponding to the transformation of
   1171  *                      all the input units, even in case of a buffer overflow.
   1172  * @param src           The original source string
   1173  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1174  * @param pErrorCode    Must be a valid pointer to an error code value,
   1175  *                      which must not indicate a failure before the function call.
   1176  * @return The pointer to destination buffer.
   1177  * @stable ICU 2.0
   1178  */
   1179 U_STABLE wchar_t* U_EXPORT2
   1180 u_strToWCS(wchar_t *dest,
   1181            int32_t destCapacity,
   1182            int32_t *pDestLength,
   1183            const UChar *src,
   1184            int32_t srcLength,
   1185            UErrorCode *pErrorCode);
   1186 /**
   1187  * Converts a sequence of wchar_t units to UChars
   1188  *
   1189  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1190  *                      the buffer is large enough.
   1191  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
   1192  *                      dest may be NULL and the function will only return the length of the
   1193  *                      result without writing any of the result string (pre-flighting).
   1194  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1195  *                      pDestLength!=NULL then *pDestLength is always set to the
   1196  *                      number of output units corresponding to the transformation of
   1197  *                      all the input units, even in case of a buffer overflow.
   1198  * @param src           The original source string
   1199  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1200  * @param pErrorCode    Must be a valid pointer to an error code value,
   1201  *                      which must not indicate a failure before the function call.
   1202  * @return The pointer to destination buffer.
   1203  * @stable ICU 2.0
   1204  */
   1205 U_STABLE UChar* U_EXPORT2
   1206 u_strFromWCS(UChar   *dest,
   1207              int32_t destCapacity,
   1208              int32_t *pDestLength,
   1209              const wchar_t *src,
   1210              int32_t srcLength,
   1211              UErrorCode *pErrorCode);
   1212 /**
   1213  * Converts a sequence of UChars (UTF-16) to UTF-8 bytes
   1214  *
   1215  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1216  *                      the buffer is large enough.
   1217  * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
   1218  *                      dest may be NULL and the function will only return the length of the
   1219  *                      result without writing any of the result string (pre-flighting).
   1220  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1221  *                      pDestLength!=NULL then *pDestLength is always set to the
   1222  *                      number of output units corresponding to the transformation of
   1223  *                      all the input units, even in case of a buffer overflow.
   1224  * @param src           The original source string
   1225  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1226  * @param pErrorCode    Must be a valid pointer to an error code value,
   1227  *                      which must not indicate a failure before the function call.
   1228  * @return The pointer to destination buffer.
   1229  * @stable ICU 2.0
   1230  */
   1231 U_STABLE char* U_EXPORT2
   1232 u_strToUTF8(char *dest,
   1233             int32_t destCapacity,
   1234             int32_t *pDestLength,
   1235             const UChar *src,
   1236             int32_t srcLength,
   1237             UErrorCode *pErrorCode);
   1238 
   1239 /**
   1240  * Converts a sequence of UTF-8 bytes to UChars (UTF-16).
   1241  *
   1242  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1243  *                      the buffer is large enough.
   1244  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
   1245  *                      dest may be NULL and the function will only return the length of the
   1246  *                      result without writing any of the result string (pre-flighting).
   1247  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1248  *                      pDestLength!=NULL then *pDestLength is always set to the
   1249  *                      number of output units corresponding to the transformation of
   1250  *                      all the input units, even in case of a buffer overflow.
   1251  * @param src           The original source string
   1252  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1253  * @param pErrorCode    Must be a valid pointer to an error code value,
   1254  *                      which must not indicate a failure before the function call.
   1255  * @return The pointer to destination buffer.
   1256  * @stable ICU 2.0
   1257  */
   1258 U_STABLE UChar* U_EXPORT2
   1259 u_strFromUTF8(UChar *dest,
   1260               int32_t destCapacity,
   1261               int32_t *pDestLength,
   1262               const char *src,
   1263               int32_t srcLength,
   1264               UErrorCode *pErrorCode);
   1265 
   1266 /**
   1267  * Converts a sequence of UChars (UTF-16) to UTF32 units.
   1268  *
   1269  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1270  *                      the buffer is large enough.
   1271  * @param destCapacity  The size of the buffer (number of UChar32s). If it is 0, then
   1272  *                      dest may be NULL and the function will only return the length of the
   1273  *                      result without writing any of the result string (pre-flighting).
   1274  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1275  *                      pDestLength!=NULL then *pDestLength is always set to the
   1276  *                      number of output units corresponding to the transformation of
   1277  *                      all the input units, even in case of a buffer overflow.
   1278  * @param src           The original source string
   1279  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1280  * @param pErrorCode    Must be a valid pointer to an error code value,
   1281  *                      which must not indicate a failure before the function call.
   1282  * @return The pointer to destination buffer.
   1283  * @stable ICU 2.0
   1284  */
   1285 U_STABLE UChar32* U_EXPORT2
   1286 u_strToUTF32(UChar32 *dest,
   1287              int32_t  destCapacity,
   1288              int32_t  *pDestLength,
   1289              const UChar *src,
   1290              int32_t  srcLength,
   1291              UErrorCode *pErrorCode);
   1292 
   1293 /**
   1294  * Converts a sequence of UTF32 units to UChars (UTF-16)
   1295  *
   1296  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1297  *                      the buffer is large enough.
   1298  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
   1299  *                      dest may be NULL and the function will only return the length of the
   1300  *                      result without writing any of the result string (pre-flighting).
   1301  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1302  *                      pDestLength!=NULL then *pDestLength is always set to the
   1303  *                      number of output units corresponding to the transformation of
   1304  *                      all the input units, even in case of a buffer overflow.
   1305  * @param src           The original source string
   1306  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1307  * @param pErrorCode    Must be a valid pointer to an error code value,
   1308  *                      which must not indicate a failure before the function call.
   1309  * @return The pointer to destination buffer.
   1310  * @stable ICU 2.0
   1311  */
   1312 U_STABLE UChar* U_EXPORT2
   1313 u_strFromUTF32(UChar   *dest,
   1314                int32_t destCapacity,
   1315                int32_t *pDestLength,
   1316                const UChar32 *src,
   1317                int32_t srcLength,
   1318                UErrorCode *pErrorCode);
   1319 
   1320 #endif
   1321