Home | History | Annotate | Download | only in unicode
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 1998-2012, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *
      7 * File ustring.h
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   12/07/98    bertrand    Creation.
     13 ******************************************************************************
     14 */
     15 
     16 #ifndef USTRING_H
     17 #define USTRING_H
     18 
     19 #include "unicode/utypes.h"
     20 #include "unicode/putil.h"
     21 #include "unicode/uiter.h"
     22 
     23 /**
     24  * \def UBRK_TYPEDEF_UBREAK_ITERATOR
     25  * @internal
     26  */
     27 
     28 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
     29 #   define UBRK_TYPEDEF_UBREAK_ITERATOR
     30 /** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
     31     typedef struct UBreakIterator UBreakIterator;
     32 #endif
     33 
     34 /**
     35  * \file
     36  * \brief C API: Unicode string handling functions
     37  *
     38  * These C API functions provide general Unicode string handling.
     39  *
     40  * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
     41  * functions. (For example, they do not check for bad arguments like NULL string pointers.)
     42  * In some cases, only the thread-safe variant of such a function is implemented here
     43  * (see u_strtok_r()).
     44  *
     45  * Other functions provide more Unicode-specific functionality like locale-specific
     46  * upper/lower-casing and string comparison in code point order.
     47  *
     48  * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
     49  * UTF-16 encodes each Unicode code point with either one or two UChar code units.
     50  * (This is the default form of Unicode, and a forward-compatible extension of the original,
     51  * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
     52  * in 1996.)
     53  *
     54  * Some APIs accept a 32-bit UChar32 value for a single code point.
     55  *
     56  * ICU also handles 16-bit Unicode text with unpaired surrogates.
     57  * Such text is not well-formed UTF-16.
     58  * Code-point-related functions treat unpaired surrogates as surrogate code points,
     59  * i.e., as separate units.
     60  *
     61  * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
     62  * it is much more efficient even for random access because the code unit values
     63  * for single-unit characters vs. lead units vs. trail units are completely disjoint.
     64  * This means that it is easy to determine character (code point) boundaries from
     65  * random offsets in the string.
     66  *
     67  * Unicode (UTF-16) string processing is optimized for the single-unit case.
     68  * Although it is important to support supplementary characters
     69  * (which use pairs of lead/trail code units called "surrogates"),
     70  * their occurrence is rare. Almost all characters in modern use require only
     71  * a single UChar code unit (i.e., their code point values are <=0xffff).
     72  *
     73  * For more details see the User Guide Strings chapter (http://icu-project.org/userguide/strings.html).
     74  * For a discussion of the handling of unpaired surrogates see also
     75  * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
     76  */
     77 
     78 /**
     79  * \defgroup ustring_ustrlen String Length
     80  * \ingroup ustring_strlen
     81  */
     82 /*@{*/
     83 /**
     84  * Determine the length of an array of UChar.
     85  *
     86  * @param s The array of UChars, NULL (U+0000) terminated.
     87  * @return The number of UChars in <code>chars</code>, minus the terminator.
     88  * @stable ICU 2.0
     89  */
     90 U_STABLE int32_t U_EXPORT2
     91 u_strlen(const UChar *s);
     92 /*@}*/
     93 
     94 /**
     95  * Count Unicode code points in the length UChar code units of the string.
     96  * A code point may occupy either one or two UChar code units.
     97  * Counting code points involves reading all code units.
     98  *
     99  * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
    100  *
    101  * @param s The input string.
    102  * @param length The number of UChar code units to be checked, or -1 to count all
    103  *               code points before the first NUL (U+0000).
    104  * @return The number of code points in the specified code units.
    105  * @stable ICU 2.0
    106  */
    107 U_STABLE int32_t U_EXPORT2
    108 u_countChar32(const UChar *s, int32_t length);
    109 
    110 /**
    111  * Check if the string contains more Unicode code points than a certain number.
    112  * This is more efficient than counting all code points in the entire string
    113  * and comparing that number with a threshold.
    114  * This function may not need to scan the string at all if the length is known
    115  * (not -1 for NUL-termination) and falls within a certain range, and
    116  * never needs to count more than 'number+1' code points.
    117  * Logically equivalent to (u_countChar32(s, length)>number).
    118  * A Unicode code point may occupy either one or two UChar code units.
    119  *
    120  * @param s The input string.
    121  * @param length The length of the string, or -1 if it is NUL-terminated.
    122  * @param number The number of code points in the string is compared against
    123  *               the 'number' parameter.
    124  * @return Boolean value for whether the string contains more Unicode code points
    125  *         than 'number'. Same as (u_countChar32(s, length)>number).
    126  * @stable ICU 2.4
    127  */
    128 U_STABLE UBool U_EXPORT2
    129 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
    130 
    131 /**
    132  * Concatenate two ustrings.  Appends a copy of <code>src</code>,
    133  * including the null terminator, to <code>dst</code>. The initial copied
    134  * character from <code>src</code> overwrites the null terminator in <code>dst</code>.
    135  *
    136  * @param dst The destination string.
    137  * @param src The source string.
    138  * @return A pointer to <code>dst</code>.
    139  * @stable ICU 2.0
    140  */
    141 U_STABLE UChar* U_EXPORT2
    142 u_strcat(UChar     *dst,
    143     const UChar     *src);
    144 
    145 /**
    146  * Concatenate two ustrings.
    147  * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
    148  * Adds a terminating NUL.
    149  * If src is too long, then only <code>n-1</code> characters will be copied
    150  * before the terminating NUL.
    151  * If <code>n&lt;=0</code> then dst is not modified.
    152  *
    153  * @param dst The destination string.
    154  * @param src The source string (can be NULL/invalid if n<=0).
    155  * @param n The maximum number of characters to append; no-op if <=0.
    156  * @return A pointer to <code>dst</code>.
    157  * @stable ICU 2.0
    158  */
    159 U_STABLE UChar* U_EXPORT2
    160 u_strncat(UChar     *dst,
    161      const UChar     *src,
    162      int32_t     n);
    163 
    164 /**
    165  * Find the first occurrence of a substring in a string.
    166  * The substring is found at code point boundaries.
    167  * That means that if the substring begins with
    168  * a trail surrogate or ends with a lead surrogate,
    169  * then it is found only if these surrogates stand alone in the text.
    170  * Otherwise, the substring edge units would be matched against
    171  * halves of surrogate pairs.
    172  *
    173  * @param s The string to search (NUL-terminated).
    174  * @param substring The substring to find (NUL-terminated).
    175  * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
    176  *         or <code>s</code> itself if the <code>substring</code> is empty,
    177  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
    178  * @stable ICU 2.0
    179  *
    180  * @see u_strrstr
    181  * @see u_strFindFirst
    182  * @see u_strFindLast
    183  */
    184 U_STABLE UChar * U_EXPORT2
    185 u_strstr(const UChar *s, const UChar *substring);
    186 
    187 /**
    188  * Find the first occurrence of a substring in a string.
    189  * The substring is found at code point boundaries.
    190  * That means that if the substring begins with
    191  * a trail surrogate or ends with a lead surrogate,
    192  * then it is found only if these surrogates stand alone in the text.
    193  * Otherwise, the substring edge units would be matched against
    194  * halves of surrogate pairs.
    195  *
    196  * @param s The string to search.
    197  * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
    198  * @param substring The substring to find (NUL-terminated).
    199  * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
    200  * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
    201  *         or <code>s</code> itself if the <code>substring</code> is empty,
    202  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
    203  * @stable ICU 2.4
    204  *
    205  * @see u_strstr
    206  * @see u_strFindLast
    207  */
    208 U_STABLE UChar * U_EXPORT2
    209 u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
    210 
    211 /**
    212  * Find the first occurrence of a BMP code point in a string.
    213  * A surrogate code point is found only if its match in the text is not
    214  * part of a surrogate pair.
    215  * A NUL character is found at the string terminator.
    216  *
    217  * @param s The string to search (NUL-terminated).
    218  * @param c The BMP code point to find.
    219  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
    220  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    221  * @stable ICU 2.0
    222  *
    223  * @see u_strchr32
    224  * @see u_memchr
    225  * @see u_strstr
    226  * @see u_strFindFirst
    227  */
    228 U_STABLE UChar * U_EXPORT2
    229 u_strchr(const UChar *s, UChar c);
    230 
    231 /**
    232  * Find the first occurrence of a code point in a string.
    233  * A surrogate code point is found only if its match in the text is not
    234  * part of a surrogate pair.
    235  * A NUL character is found at the string terminator.
    236  *
    237  * @param s The string to search (NUL-terminated).
    238  * @param c The code point to find.
    239  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
    240  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    241  * @stable ICU 2.0
    242  *
    243  * @see u_strchr
    244  * @see u_memchr32
    245  * @see u_strstr
    246  * @see u_strFindFirst
    247  */
    248 U_STABLE UChar * U_EXPORT2
    249 u_strchr32(const UChar *s, UChar32 c);
    250 
    251 /**
    252  * Find the last occurrence of a substring in a string.
    253  * The substring is found at code point boundaries.
    254  * That means that if the substring begins with
    255  * a trail surrogate or ends with a lead surrogate,
    256  * then it is found only if these surrogates stand alone in the text.
    257  * Otherwise, the substring edge units would be matched against
    258  * halves of surrogate pairs.
    259  *
    260  * @param s The string to search (NUL-terminated).
    261  * @param substring The substring to find (NUL-terminated).
    262  * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
    263  *         or <code>s</code> itself if the <code>substring</code> is empty,
    264  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
    265  * @stable ICU 2.4
    266  *
    267  * @see u_strstr
    268  * @see u_strFindFirst
    269  * @see u_strFindLast
    270  */
    271 U_STABLE UChar * U_EXPORT2
    272 u_strrstr(const UChar *s, const UChar *substring);
    273 
    274 /**
    275  * Find the last occurrence of a substring in a string.
    276  * The substring is found at code point boundaries.
    277  * That means that if the substring begins with
    278  * a trail surrogate or ends with a lead surrogate,
    279  * then it is found only if these surrogates stand alone in the text.
    280  * Otherwise, the substring edge units would be matched against
    281  * halves of surrogate pairs.
    282  *
    283  * @param s The string to search.
    284  * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
    285  * @param substring The substring to find (NUL-terminated).
    286  * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
    287  * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
    288  *         or <code>s</code> itself if the <code>substring</code> is empty,
    289  *         or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
    290  * @stable ICU 2.4
    291  *
    292  * @see u_strstr
    293  * @see u_strFindLast
    294  */
    295 U_STABLE UChar * U_EXPORT2
    296 u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
    297 
    298 /**
    299  * Find the last occurrence of a BMP code point in a string.
    300  * A surrogate code point is found only if its match in the text is not
    301  * part of a surrogate pair.
    302  * A NUL character is found at the string terminator.
    303  *
    304  * @param s The string to search (NUL-terminated).
    305  * @param c The BMP code point to find.
    306  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
    307  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    308  * @stable ICU 2.4
    309  *
    310  * @see u_strrchr32
    311  * @see u_memrchr
    312  * @see u_strrstr
    313  * @see u_strFindLast
    314  */
    315 U_STABLE UChar * U_EXPORT2
    316 u_strrchr(const UChar *s, UChar c);
    317 
    318 /**
    319  * Find the last occurrence of a code point in a string.
    320  * A surrogate code point is found only if its match in the text is not
    321  * part of a surrogate pair.
    322  * A NUL character is found at the string terminator.
    323  *
    324  * @param s The string to search (NUL-terminated).
    325  * @param c The code point to find.
    326  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
    327  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    328  * @stable ICU 2.4
    329  *
    330  * @see u_strrchr
    331  * @see u_memchr32
    332  * @see u_strrstr
    333  * @see u_strFindLast
    334  */
    335 U_STABLE UChar * U_EXPORT2
    336 u_strrchr32(const UChar *s, UChar32 c);
    337 
    338 /**
    339  * Locates the first occurrence in the string <code>string</code> of any of the characters
    340  * in the string <code>matchSet</code>.
    341  * Works just like C's strpbrk but with Unicode.
    342  *
    343  * @param string The string in which to search, NUL-terminated.
    344  * @param matchSet A NUL-terminated string defining a set of code points
    345  *                 for which to search in the text string.
    346  * @return A pointer to the  character in <code>string</code> that matches one of the
    347  *         characters in <code>matchSet</code>, or NULL if no such character is found.
    348  * @stable ICU 2.0
    349  */
    350 U_STABLE UChar * U_EXPORT2
    351 u_strpbrk(const UChar *string, const UChar *matchSet);
    352 
    353 /**
    354  * Returns the number of consecutive characters in <code>string</code>,
    355  * beginning with the first, that do not occur somewhere in <code>matchSet</code>.
    356  * Works just like C's strcspn but with Unicode.
    357  *
    358  * @param string The string in which to search, NUL-terminated.
    359  * @param matchSet A NUL-terminated string defining a set of code points
    360  *                 for which to search in the text string.
    361  * @return The number of initial characters in <code>string</code> that do not
    362  *         occur in <code>matchSet</code>.
    363  * @see u_strspn
    364  * @stable ICU 2.0
    365  */
    366 U_STABLE int32_t U_EXPORT2
    367 u_strcspn(const UChar *string, const UChar *matchSet);
    368 
    369 /**
    370  * Returns the number of consecutive characters in <code>string</code>,
    371  * beginning with the first, that occur somewhere in <code>matchSet</code>.
    372  * Works just like C's strspn but with Unicode.
    373  *
    374  * @param string The string in which to search, NUL-terminated.
    375  * @param matchSet A NUL-terminated string defining a set of code points
    376  *                 for which to search in the text string.
    377  * @return The number of initial characters in <code>string</code> that do
    378  *         occur in <code>matchSet</code>.
    379  * @see u_strcspn
    380  * @stable ICU 2.0
    381  */
    382 U_STABLE int32_t U_EXPORT2
    383 u_strspn(const UChar *string, const UChar *matchSet);
    384 
    385 /**
    386  * The string tokenizer API allows an application to break a string into
    387  * tokens. Unlike strtok(), the saveState (the current pointer within the
    388  * original string) is maintained in saveState. In the first call, the
    389  * argument src is a pointer to the string. In subsequent calls to
    390  * return successive tokens of that string, src must be specified as
    391  * NULL. The value saveState is set by this function to maintain the
    392  * function's position within the string, and on each subsequent call
    393  * you must give this argument the same variable. This function does
    394  * handle surrogate pairs. This function is similar to the strtok_r()
    395  * the POSIX Threads Extension (1003.1c-1995) version.
    396  *
    397  * @param src String containing token(s). This string will be modified.
    398  *            After the first call to u_strtok_r(), this argument must
    399  *            be NULL to get to the next token.
    400  * @param delim Set of delimiter characters (Unicode code points).
    401  * @param saveState The current pointer within the original string,
    402  *              which is set by this function. The saveState
    403  *              parameter should the address of a local variable of type
    404  *              UChar *. (i.e. defined "Uhar *myLocalSaveState" and use
    405  *              &myLocalSaveState for this parameter).
    406  * @return A pointer to the next token found in src, or NULL
    407  *         when there are no more tokens.
    408  * @stable ICU 2.0
    409  */
    410 U_STABLE UChar * U_EXPORT2
    411 u_strtok_r(UChar    *src,
    412      const UChar    *delim,
    413            UChar   **saveState);
    414 
    415 /**
    416  * Compare two Unicode strings for bitwise equality (code unit order).
    417  *
    418  * @param s1 A string to compare.
    419  * @param s2 A string to compare.
    420  * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
    421  * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
    422  * value if <code>s1</code> is bitwise greater than <code>s2</code>.
    423  * @stable ICU 2.0
    424  */
    425 U_STABLE int32_t  U_EXPORT2
    426 u_strcmp(const UChar     *s1,
    427          const UChar     *s2);
    428 
    429 /**
    430  * Compare two Unicode strings in code point order.
    431  * See u_strCompare for details.
    432  *
    433  * @param s1 A string to compare.
    434  * @param s2 A string to compare.
    435  * @return a negative/zero/positive integer corresponding to whether
    436  * the first string is less than/equal to/greater than the second one
    437  * in code point order
    438  * @stable ICU 2.0
    439  */
    440 U_STABLE int32_t U_EXPORT2
    441 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
    442 
    443 /**
    444  * Compare two Unicode strings (binary order).
    445  *
    446  * The comparison can be done in code unit order or in code point order.
    447  * They differ only in UTF-16 when
    448  * comparing supplementary code points (U+10000..U+10ffff)
    449  * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
    450  * In code unit order, high BMP code points sort after supplementary code points
    451  * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
    452  *
    453  * This functions works with strings of different explicitly specified lengths
    454  * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
    455  * NUL-terminated strings are possible with length arguments of -1.
    456  *
    457  * @param s1 First source string.
    458  * @param length1 Length of first source string, or -1 if NUL-terminated.
    459  *
    460  * @param s2 Second source string.
    461  * @param length2 Length of second source string, or -1 if NUL-terminated.
    462  *
    463  * @param codePointOrder Choose between code unit order (FALSE)
    464  *                       and code point order (TRUE).
    465  *
    466  * @return <0 or 0 or >0 as usual for string comparisons
    467  *
    468  * @stable ICU 2.2
    469  */
    470 U_STABLE int32_t U_EXPORT2
    471 u_strCompare(const UChar *s1, int32_t length1,
    472              const UChar *s2, int32_t length2,
    473              UBool codePointOrder);
    474 
    475 /**
    476  * Compare two Unicode strings (binary order)
    477  * as presented by UCharIterator objects.
    478  * Works otherwise just like u_strCompare().
    479  *
    480  * Both iterators are reset to their start positions.
    481  * When the function returns, it is undefined where the iterators
    482  * have stopped.
    483  *
    484  * @param iter1 First source string iterator.
    485  * @param iter2 Second source string iterator.
    486  * @param codePointOrder Choose between code unit order (FALSE)
    487  *                       and code point order (TRUE).
    488  *
    489  * @return <0 or 0 or >0 as usual for string comparisons
    490  *
    491  * @see u_strCompare
    492  *
    493  * @stable ICU 2.6
    494  */
    495 U_STABLE int32_t U_EXPORT2
    496 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
    497 
    498 #ifndef U_COMPARE_CODE_POINT_ORDER
    499 /* see also unistr.h and unorm.h */
    500 /**
    501  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
    502  * Compare strings in code point order instead of code unit order.
    503  * @stable ICU 2.2
    504  */
    505 #define U_COMPARE_CODE_POINT_ORDER  0x8000
    506 #endif
    507 
    508 /**
    509  * Compare two strings case-insensitively using full case folding.
    510  * This is equivalent to
    511  *   u_strCompare(u_strFoldCase(s1, options),
    512  *                u_strFoldCase(s2, options),
    513  *                (options&U_COMPARE_CODE_POINT_ORDER)!=0).
    514  *
    515  * The comparison can be done in UTF-16 code unit order or in code point order.
    516  * They differ only when comparing supplementary code points (U+10000..U+10ffff)
    517  * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
    518  * In code unit order, high BMP code points sort after supplementary code points
    519  * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
    520  *
    521  * This functions works with strings of different explicitly specified lengths
    522  * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
    523  * NUL-terminated strings are possible with length arguments of -1.
    524  *
    525  * @param s1 First source string.
    526  * @param length1 Length of first source string, or -1 if NUL-terminated.
    527  *
    528  * @param s2 Second source string.
    529  * @param length2 Length of second source string, or -1 if NUL-terminated.
    530  *
    531  * @param options A bit set of options:
    532  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    533  *     Comparison in code unit order with default case folding.
    534  *
    535  *   - U_COMPARE_CODE_POINT_ORDER
    536  *     Set to choose code point order instead of code unit order
    537  *     (see u_strCompare for details).
    538  *
    539  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    540  *
    541  * @param pErrorCode Must be a valid pointer to an error code value,
    542  *                  which must not indicate a failure before the function call.
    543  *
    544  * @return <0 or 0 or >0 as usual for string comparisons
    545  *
    546  * @stable ICU 2.2
    547  */
    548 U_STABLE int32_t U_EXPORT2
    549 u_strCaseCompare(const UChar *s1, int32_t length1,
    550                  const UChar *s2, int32_t length2,
    551                  uint32_t options,
    552                  UErrorCode *pErrorCode);
    553 
    554 /**
    555  * Compare two ustrings for bitwise equality.
    556  * Compares at most <code>n</code> characters.
    557  *
    558  * @param ucs1 A string to compare (can be NULL/invalid if n<=0).
    559  * @param ucs2 A string to compare (can be NULL/invalid if n<=0).
    560  * @param n The maximum number of characters to compare; always returns 0 if n<=0.
    561  * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
    562  * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
    563  * value if <code>s1</code> is bitwise greater than <code>s2</code>.
    564  * @stable ICU 2.0
    565  */
    566 U_STABLE int32_t U_EXPORT2
    567 u_strncmp(const UChar     *ucs1,
    568      const UChar     *ucs2,
    569      int32_t     n);
    570 
    571 /**
    572  * Compare two Unicode strings in code point order.
    573  * This is different in UTF-16 from u_strncmp() if supplementary characters are present.
    574  * For details, see u_strCompare().
    575  *
    576  * @param s1 A string to compare.
    577  * @param s2 A string to compare.
    578  * @param n The maximum number of characters to compare.
    579  * @return a negative/zero/positive integer corresponding to whether
    580  * the first string is less than/equal to/greater than the second one
    581  * in code point order
    582  * @stable ICU 2.0
    583  */
    584 U_STABLE int32_t U_EXPORT2
    585 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
    586 
    587 /**
    588  * Compare two strings case-insensitively using full case folding.
    589  * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
    590  *
    591  * @param s1 A string to compare.
    592  * @param s2 A string to compare.
    593  * @param options A bit set of options:
    594  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    595  *     Comparison in code unit order with default case folding.
    596  *
    597  *   - U_COMPARE_CODE_POINT_ORDER
    598  *     Set to choose code point order instead of code unit order
    599  *     (see u_strCompare for details).
    600  *
    601  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    602  *
    603  * @return A negative, zero, or positive integer indicating the comparison result.
    604  * @stable ICU 2.0
    605  */
    606 U_STABLE int32_t U_EXPORT2
    607 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
    608 
    609 /**
    610  * Compare two strings case-insensitively using full case folding.
    611  * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
    612  * u_strFoldCase(s2, at most n, options)).
    613  *
    614  * @param s1 A string to compare.
    615  * @param s2 A string to compare.
    616  * @param n The maximum number of characters each string to case-fold and then compare.
    617  * @param options A bit set of options:
    618  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    619  *     Comparison in code unit order with default case folding.
    620  *
    621  *   - U_COMPARE_CODE_POINT_ORDER
    622  *     Set to choose code point order instead of code unit order
    623  *     (see u_strCompare for details).
    624  *
    625  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    626  *
    627  * @return A negative, zero, or positive integer indicating the comparison result.
    628  * @stable ICU 2.0
    629  */
    630 U_STABLE int32_t U_EXPORT2
    631 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
    632 
    633 /**
    634  * Compare two strings case-insensitively using full case folding.
    635  * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
    636  * u_strFoldCase(s2, n, options)).
    637  *
    638  * @param s1 A string to compare.
    639  * @param s2 A string to compare.
    640  * @param length The number of characters in each string to case-fold and then compare.
    641  * @param options A bit set of options:
    642  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    643  *     Comparison in code unit order with default case folding.
    644  *
    645  *   - U_COMPARE_CODE_POINT_ORDER
    646  *     Set to choose code point order instead of code unit order
    647  *     (see u_strCompare for details).
    648  *
    649  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    650  *
    651  * @return A negative, zero, or positive integer indicating the comparison result.
    652  * @stable ICU 2.0
    653  */
    654 U_STABLE int32_t U_EXPORT2
    655 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
    656 
    657 /**
    658  * Copy a ustring. Adds a null terminator.
    659  *
    660  * @param dst The destination string.
    661  * @param src The source string.
    662  * @return A pointer to <code>dst</code>.
    663  * @stable ICU 2.0
    664  */
    665 U_STABLE UChar* U_EXPORT2
    666 u_strcpy(UChar     *dst,
    667     const UChar     *src);
    668 
    669 /**
    670  * Copy a ustring.
    671  * Copies at most <code>n</code> characters.  The result will be null terminated
    672  * if the length of <code>src</code> is less than <code>n</code>.
    673  *
    674  * @param dst The destination string.
    675  * @param src The source string (can be NULL/invalid if n<=0).
    676  * @param n The maximum number of characters to copy; no-op if <=0.
    677  * @return A pointer to <code>dst</code>.
    678  * @stable ICU 2.0
    679  */
    680 U_STABLE UChar* U_EXPORT2
    681 u_strncpy(UChar     *dst,
    682      const UChar     *src,
    683      int32_t     n);
    684 
    685 #if !UCONFIG_NO_CONVERSION
    686 
    687 /**
    688  * Copy a byte string encoded in the default codepage to a ustring.
    689  * Adds a null terminator.
    690  * Performs a host byte to UChar conversion
    691  *
    692  * @param dst The destination string.
    693  * @param src The source string.
    694  * @return A pointer to <code>dst</code>.
    695  * @stable ICU 2.0
    696  */
    697 U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
    698                const char *src );
    699 
    700 /**
    701  * Copy a byte string encoded in the default codepage to a ustring.
    702  * Copies at most <code>n</code> characters.  The result will be null terminated
    703  * if the length of <code>src</code> is less than <code>n</code>.
    704  * Performs a host byte to UChar conversion
    705  *
    706  * @param dst The destination string.
    707  * @param src The source string.
    708  * @param n The maximum number of characters to copy.
    709  * @return A pointer to <code>dst</code>.
    710  * @stable ICU 2.0
    711  */
    712 U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
    713             const char *src,
    714             int32_t n);
    715 
    716 /**
    717  * Copy ustring to a byte string encoded in the default codepage.
    718  * Adds a null terminator.
    719  * Performs a UChar to host byte conversion
    720  *
    721  * @param dst The destination string.
    722  * @param src The source string.
    723  * @return A pointer to <code>dst</code>.
    724  * @stable ICU 2.0
    725  */
    726 U_STABLE char* U_EXPORT2 u_austrcpy(char *dst,
    727             const UChar *src );
    728 
    729 /**
    730  * Copy ustring to a byte string encoded in the default codepage.
    731  * Copies at most <code>n</code> characters.  The result will be null terminated
    732  * if the length of <code>src</code> is less than <code>n</code>.
    733  * Performs a UChar to host byte conversion
    734  *
    735  * @param dst The destination string.
    736  * @param src The source string.
    737  * @param n The maximum number of characters to copy.
    738  * @return A pointer to <code>dst</code>.
    739  * @stable ICU 2.0
    740  */
    741 U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
    742             const UChar *src,
    743             int32_t n );
    744 
    745 #endif
    746 
    747 /**
    748  * Synonym for memcpy(), but with UChars only.
    749  * @param dest The destination string
    750  * @param src The source string (can be NULL/invalid if count<=0)
    751  * @param count The number of characters to copy; no-op if <=0
    752  * @return A pointer to <code>dest</code>
    753  * @stable ICU 2.0
    754  */
    755 U_STABLE UChar* U_EXPORT2
    756 u_memcpy(UChar *dest, const UChar *src, int32_t count);
    757 
    758 /**
    759  * Synonym for memmove(), but with UChars only.
    760  * @param dest The destination string
    761  * @param src The source string (can be NULL/invalid if count<=0)
    762  * @param count The number of characters to move; no-op if <=0
    763  * @return A pointer to <code>dest</code>
    764  * @stable ICU 2.0
    765  */
    766 U_STABLE UChar* U_EXPORT2
    767 u_memmove(UChar *dest, const UChar *src, int32_t count);
    768 
    769 /**
    770  * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
    771  *
    772  * @param dest The destination string.
    773  * @param c The character to initialize the string.
    774  * @param count The maximum number of characters to set.
    775  * @return A pointer to <code>dest</code>.
    776  * @stable ICU 2.0
    777  */
    778 U_STABLE UChar* U_EXPORT2
    779 u_memset(UChar *dest, UChar c, int32_t count);
    780 
    781 /**
    782  * Compare the first <code>count</code> UChars of each buffer.
    783  *
    784  * @param buf1 The first string to compare.
    785  * @param buf2 The second string to compare.
    786  * @param count The maximum number of UChars to compare.
    787  * @return When buf1 < buf2, a negative number is returned.
    788  *      When buf1 == buf2, 0 is returned.
    789  *      When buf1 > buf2, a positive number is returned.
    790  * @stable ICU 2.0
    791  */
    792 U_STABLE int32_t U_EXPORT2
    793 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
    794 
    795 /**
    796  * Compare two Unicode strings in code point order.
    797  * This is different in UTF-16 from u_memcmp() if supplementary characters are present.
    798  * For details, see u_strCompare().
    799  *
    800  * @param s1 A string to compare.
    801  * @param s2 A string to compare.
    802  * @param count The maximum number of characters to compare.
    803  * @return a negative/zero/positive integer corresponding to whether
    804  * the first string is less than/equal to/greater than the second one
    805  * in code point order
    806  * @stable ICU 2.0
    807  */
    808 U_STABLE int32_t U_EXPORT2
    809 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
    810 
    811 /**
    812  * Find the first occurrence of a BMP code point in a string.
    813  * A surrogate code point is found only if its match in the text is not
    814  * part of a surrogate pair.
    815  * A NUL character is found at the string terminator.
    816  *
    817  * @param s The string to search (contains <code>count</code> UChars).
    818  * @param c The BMP code point to find.
    819  * @param count The length of the string.
    820  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
    821  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    822  * @stable ICU 2.0
    823  *
    824  * @see u_strchr
    825  * @see u_memchr32
    826  * @see u_strFindFirst
    827  */
    828 U_STABLE UChar* U_EXPORT2
    829 u_memchr(const UChar *s, UChar c, int32_t count);
    830 
    831 /**
    832  * Find the first occurrence of a code point in a string.
    833  * A surrogate code point is found only if its match in the text is not
    834  * part of a surrogate pair.
    835  * A NUL character is found at the string terminator.
    836  *
    837  * @param s The string to search (contains <code>count</code> UChars).
    838  * @param c The code point to find.
    839  * @param count The length of the string.
    840  * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
    841  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    842  * @stable ICU 2.0
    843  *
    844  * @see u_strchr32
    845  * @see u_memchr
    846  * @see u_strFindFirst
    847  */
    848 U_STABLE UChar* U_EXPORT2
    849 u_memchr32(const UChar *s, UChar32 c, int32_t count);
    850 
    851 /**
    852  * Find the last occurrence of a BMP code point in a string.
    853  * A surrogate code point is found only if its match in the text is not
    854  * part of a surrogate pair.
    855  * A NUL character is found at the string terminator.
    856  *
    857  * @param s The string to search (contains <code>count</code> UChars).
    858  * @param c The BMP code point to find.
    859  * @param count The length of the string.
    860  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
    861  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    862  * @stable ICU 2.4
    863  *
    864  * @see u_strrchr
    865  * @see u_memrchr32
    866  * @see u_strFindLast
    867  */
    868 U_STABLE UChar* U_EXPORT2
    869 u_memrchr(const UChar *s, UChar c, int32_t count);
    870 
    871 /**
    872  * Find the last occurrence of a code point in a string.
    873  * A surrogate code point is found only if its match in the text is not
    874  * part of a surrogate pair.
    875  * A NUL character is found at the string terminator.
    876  *
    877  * @param s The string to search (contains <code>count</code> UChars).
    878  * @param c The code point to find.
    879  * @param count The length of the string.
    880  * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
    881  *         or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
    882  * @stable ICU 2.4
    883  *
    884  * @see u_strrchr32
    885  * @see u_memrchr
    886  * @see u_strFindLast
    887  */
    888 U_STABLE UChar* U_EXPORT2
    889 u_memrchr32(const UChar *s, UChar32 c, int32_t count);
    890 
    891 /**
    892  * Unicode String literals in C.
    893  * We need one macro to declare a variable for the string
    894  * and to statically preinitialize it if possible,
    895  * and a second macro to dynamically intialize such a string variable if necessary.
    896  *
    897  * The macros are defined for maximum performance.
    898  * They work only for strings that contain "invariant characters", i.e.,
    899  * only latin letters, digits, and some punctuation.
    900  * See utypes.h for details.
    901  *
    902  * A pair of macros for a single string must be used with the same
    903  * parameters.
    904  * The string parameter must be a C string literal.
    905  * The length of the string, not including the terminating
    906  * <code>NUL</code>, must be specified as a constant.
    907  * The U_STRING_DECL macro should be invoked exactly once for one
    908  * such string variable before it is used.
    909  *
    910  * Usage:
    911  * <pre>
    912  *    U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
    913  *    U_STRING_DECL(ustringVar2, "jumps 5%", 8);
    914  *    static UBool didInit=FALSE;
    915  *
    916  *    int32_t function() {
    917  *        if(!didInit) {
    918  *            U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
    919  *            U_STRING_INIT(ustringVar2, "jumps 5%", 8);
    920  *            didInit=TRUE;
    921  *        }
    922  *        return u_strcmp(ustringVar1, ustringVar2);
    923  *    }
    924  * </pre>
    925  *
    926  * Note that the macros will NOT consistently work if their argument is another <code>#define</code>.
    927  *  The following will not work on all platforms, don't use it.
    928  *
    929  * <pre>
    930  *     #define GLUCK "Mr. Gluck"
    931  *     U_STRING_DECL(var, GLUCK, 9)
    932  *     U_STRING_INIT(var, GLUCK, 9)
    933  * </pre>
    934  *
    935  * Instead, use the string literal "Mr. Gluck"  as the argument to both macro
    936  * calls.
    937  *
    938  *
    939  * @stable ICU 2.0
    940  */
    941 #if defined(U_DECLARE_UTF16)
    942 #   define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs)
    943     /**@stable ICU 2.0 */
    944 #   define U_STRING_INIT(var, cs, length)
    945 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
    946 #   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
    947     /**@stable ICU 2.0 */
    948 #   define U_STRING_INIT(var, cs, length)
    949 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
    950 #   define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
    951     /**@stable ICU 2.0 */
    952 #   define U_STRING_INIT(var, cs, length)
    953 #else
    954 #   define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
    955     /**@stable ICU 2.0 */
    956 #   define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
    957 #endif
    958 
    959 /**
    960  * Unescape a string of characters and write the resulting
    961  * Unicode characters to the destination buffer.  The following escape
    962  * sequences are recognized:
    963  *
    964  * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
    965  * \\Uhhhhhhhh   8 hex digits
    966  * \\xhh         1-2 hex digits
    967  * \\x{h...}     1-8 hex digits
    968  * \\ooo         1-3 octal digits; o in [0-7]
    969  * \\cX          control-X; X is masked with 0x1F
    970  *
    971  * as well as the standard ANSI C escapes:
    972  *
    973  * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
    974  * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
    975  * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
    976  *
    977  * Anything else following a backslash is generically escaped.  For
    978  * example, "[a\\-z]" returns "[a-z]".
    979  *
    980  * If an escape sequence is ill-formed, this method returns an empty
    981  * string.  An example of an ill-formed sequence is "\\u" followed by
    982  * fewer than 4 hex digits.
    983  *
    984  * The above characters are recognized in the compiler's codepage,
    985  * that is, they are coded as 'u', '\\', etc.  Characters that are
    986  * not parts of escape sequences are converted using u_charsToUChars().
    987  *
    988  * This function is similar to UnicodeString::unescape() but not
    989  * identical to it.  The latter takes a source UnicodeString, so it
    990  * does escape recognition but no conversion.
    991  *
    992  * @param src a zero-terminated string of invariant characters
    993  * @param dest pointer to buffer to receive converted and unescaped
    994  * text and, if there is room, a zero terminator.  May be NULL for
    995  * preflighting, in which case no UChars will be written, but the
    996  * return value will still be valid.  On error, an empty string is
    997  * stored here (if possible).
    998  * @param destCapacity the number of UChars that may be written at
    999  * dest.  Ignored if dest == NULL.
   1000  * @return the length of unescaped string.
   1001  * @see u_unescapeAt
   1002  * @see UnicodeString#unescape()
   1003  * @see UnicodeString#unescapeAt()
   1004  * @stable ICU 2.0
   1005  */
   1006 U_STABLE int32_t U_EXPORT2
   1007 u_unescape(const char *src,
   1008            UChar *dest, int32_t destCapacity);
   1009 
   1010 U_CDECL_BEGIN
   1011 /**
   1012  * Callback function for u_unescapeAt() that returns a character of
   1013  * the source text given an offset and a context pointer.  The context
   1014  * pointer will be whatever is passed into u_unescapeAt().
   1015  *
   1016  * @param offset pointer to the offset that will be passed to u_unescapeAt().
   1017  * @param context an opaque pointer passed directly into u_unescapeAt()
   1018  * @return the character represented by the escape sequence at
   1019  * offset
   1020  * @see u_unescapeAt
   1021  * @stable ICU 2.0
   1022  */
   1023 typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
   1024 U_CDECL_END
   1025 
   1026 /**
   1027  * Unescape a single sequence. The character at offset-1 is assumed
   1028  * (without checking) to be a backslash.  This method takes a callback
   1029  * pointer to a function that returns the UChar at a given offset.  By
   1030  * varying this callback, ICU functions are able to unescape char*
   1031  * strings, UnicodeString objects, and UFILE pointers.
   1032  *
   1033  * If offset is out of range, or if the escape sequence is ill-formed,
   1034  * (UChar32)0xFFFFFFFF is returned.  See documentation of u_unescape()
   1035  * for a list of recognized sequences.
   1036  *
   1037  * @param charAt callback function that returns a UChar of the source
   1038  * text given an offset and a context pointer.
   1039  * @param offset pointer to the offset that will be passed to charAt.
   1040  * The offset value will be updated upon return to point after the
   1041  * last parsed character of the escape sequence.  On error the offset
   1042  * is unchanged.
   1043  * @param length the number of characters in the source text.  The
   1044  * last character of the source text is considered to be at offset
   1045  * length-1.
   1046  * @param context an opaque pointer passed directly into charAt.
   1047  * @return the character represented by the escape sequence at
   1048  * offset, or (UChar32)0xFFFFFFFF on error.
   1049  * @see u_unescape()
   1050  * @see UnicodeString#unescape()
   1051  * @see UnicodeString#unescapeAt()
   1052  * @stable ICU 2.0
   1053  */
   1054 U_STABLE UChar32 U_EXPORT2
   1055 u_unescapeAt(UNESCAPE_CHAR_AT charAt,
   1056              int32_t *offset,
   1057              int32_t length,
   1058              void *context);
   1059 
   1060 /**
   1061  * Uppercase the characters in a string.
   1062  * Casing is locale-dependent and context-sensitive.
   1063  * The result may be longer or shorter than the original.
   1064  * The source string and the destination buffer are allowed to overlap.
   1065  *
   1066  * @param dest      A buffer for the result string. The result will be zero-terminated if
   1067  *                  the buffer is large enough.
   1068  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
   1069  *                  dest may be NULL and the function will only return the length of the result
   1070  *                  without writing any of the result string.
   1071  * @param src       The original string
   1072  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
   1073  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
   1074  * @param pErrorCode Must be a valid pointer to an error code value,
   1075  *                  which must not indicate a failure before the function call.
   1076  * @return The length of the result string. It may be greater than destCapacity. In that case,
   1077  *         only some of the result was written to the destination buffer.
   1078  * @stable ICU 2.0
   1079  */
   1080 U_STABLE int32_t U_EXPORT2
   1081 u_strToUpper(UChar *dest, int32_t destCapacity,
   1082              const UChar *src, int32_t srcLength,
   1083              const char *locale,
   1084              UErrorCode *pErrorCode);
   1085 
   1086 /**
   1087  * Lowercase the characters in a string.
   1088  * Casing is locale-dependent and context-sensitive.
   1089  * The result may be longer or shorter than the original.
   1090  * The source string and the destination buffer are allowed to overlap.
   1091  *
   1092  * @param dest      A buffer for the result string. The result will be zero-terminated if
   1093  *                  the buffer is large enough.
   1094  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
   1095  *                  dest may be NULL and the function will only return the length of the result
   1096  *                  without writing any of the result string.
   1097  * @param src       The original string
   1098  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
   1099  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
   1100  * @param pErrorCode Must be a valid pointer to an error code value,
   1101  *                  which must not indicate a failure before the function call.
   1102  * @return The length of the result string. It may be greater than destCapacity. In that case,
   1103  *         only some of the result was written to the destination buffer.
   1104  * @stable ICU 2.0
   1105  */
   1106 U_STABLE int32_t U_EXPORT2
   1107 u_strToLower(UChar *dest, int32_t destCapacity,
   1108              const UChar *src, int32_t srcLength,
   1109              const char *locale,
   1110              UErrorCode *pErrorCode);
   1111 
   1112 #if !UCONFIG_NO_BREAK_ITERATION
   1113 
   1114 /**
   1115  * Titlecase a string.
   1116  * Casing is locale-dependent and context-sensitive.
   1117  * Titlecasing uses a break iterator to find the first characters of words
   1118  * that are to be titlecased. It titlecases those characters and lowercases
   1119  * all others.
   1120  *
   1121  * The titlecase break iterator can be provided to customize for arbitrary
   1122  * styles, using rules and dictionaries beyond the standard iterators.
   1123  * It may be more efficient to always provide an iterator to avoid
   1124  * opening and closing one for each string.
   1125  * The standard titlecase iterator for the root locale implements the
   1126  * algorithm of Unicode TR 21.
   1127  *
   1128  * This function uses only the setText(), first() and next() methods of the
   1129  * provided break iterator.
   1130  *
   1131  * The result may be longer or shorter than the original.
   1132  * The source string and the destination buffer are allowed to overlap.
   1133  *
   1134  * @param dest      A buffer for the result string. The result will be zero-terminated if
   1135  *                  the buffer is large enough.
   1136  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
   1137  *                  dest may be NULL and the function will only return the length of the result
   1138  *                  without writing any of the result string.
   1139  * @param src       The original string
   1140  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
   1141  * @param titleIter A break iterator to find the first characters of words
   1142  *                  that are to be titlecased.
   1143  *                  If none is provided (NULL), then a standard titlecase
   1144  *                  break iterator is opened.
   1145  * @param locale    The locale to consider, or "" for the root locale or NULL for the default locale.
   1146  * @param pErrorCode Must be a valid pointer to an error code value,
   1147  *                  which must not indicate a failure before the function call.
   1148  * @return The length of the result string. It may be greater than destCapacity. In that case,
   1149  *         only some of the result was written to the destination buffer.
   1150  * @stable ICU 2.1
   1151  */
   1152 U_STABLE int32_t U_EXPORT2
   1153 u_strToTitle(UChar *dest, int32_t destCapacity,
   1154              const UChar *src, int32_t srcLength,
   1155              UBreakIterator *titleIter,
   1156              const char *locale,
   1157              UErrorCode *pErrorCode);
   1158 
   1159 #endif
   1160 
   1161 /**
   1162  * Case-folds the characters in a string.
   1163  *
   1164  * Case-folding is locale-independent and not context-sensitive,
   1165  * but there is an option for whether to include or exclude mappings for dotted I
   1166  * and dotless i that are marked with 'T' in CaseFolding.txt.
   1167  *
   1168  * The result may be longer or shorter than the original.
   1169  * The source string and the destination buffer are allowed to overlap.
   1170  *
   1171  * @param dest      A buffer for the result string. The result will be zero-terminated if
   1172  *                  the buffer is large enough.
   1173  * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
   1174  *                  dest may be NULL and the function will only return the length of the result
   1175  *                  without writing any of the result string.
   1176  * @param src       The original string
   1177  * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
   1178  * @param options   Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
   1179  * @param pErrorCode Must be a valid pointer to an error code value,
   1180  *                  which must not indicate a failure before the function call.
   1181  * @return The length of the result string. It may be greater than destCapacity. In that case,
   1182  *         only some of the result was written to the destination buffer.
   1183  * @stable ICU 2.0
   1184  */
   1185 U_STABLE int32_t U_EXPORT2
   1186 u_strFoldCase(UChar *dest, int32_t destCapacity,
   1187               const UChar *src, int32_t srcLength,
   1188               uint32_t options,
   1189               UErrorCode *pErrorCode);
   1190 
   1191 #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
   1192 /**
   1193  * Convert a UTF-16 string to a wchar_t string.
   1194  * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
   1195  * this function simply calls the fast, dedicated function for that.
   1196  * Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed.
   1197  *
   1198  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1199  *                      the buffer is large enough.
   1200  * @param destCapacity  The size of the buffer (number of wchar_t's). If it is 0, then
   1201  *                      dest may be NULL and the function will only return the length of the
   1202  *                      result without writing any of the result string (pre-flighting).
   1203  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1204  *                      pDestLength!=NULL then *pDestLength is always set to the
   1205  *                      number of output units corresponding to the transformation of
   1206  *                      all the input units, even in case of a buffer overflow.
   1207  * @param src           The original source string
   1208  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1209  * @param pErrorCode    Must be a valid pointer to an error code value,
   1210  *                      which must not indicate a failure before the function call.
   1211  * @return The pointer to destination buffer.
   1212  * @stable ICU 2.0
   1213  */
   1214 U_STABLE wchar_t* U_EXPORT2
   1215 u_strToWCS(wchar_t *dest,
   1216            int32_t destCapacity,
   1217            int32_t *pDestLength,
   1218            const UChar *src,
   1219            int32_t srcLength,
   1220            UErrorCode *pErrorCode);
   1221 /**
   1222  * Convert a wchar_t string to UTF-16.
   1223  * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
   1224  * this function simply calls the fast, dedicated function for that.
   1225  * Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed.
   1226  *
   1227  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1228  *                      the buffer is large enough.
   1229  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
   1230  *                      dest may be NULL and the function will only return the length of the
   1231  *                      result without writing any of the result string (pre-flighting).
   1232  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1233  *                      pDestLength!=NULL then *pDestLength is always set to the
   1234  *                      number of output units corresponding to the transformation of
   1235  *                      all the input units, even in case of a buffer overflow.
   1236  * @param src           The original source string
   1237  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1238  * @param pErrorCode    Must be a valid pointer to an error code value,
   1239  *                      which must not indicate a failure before the function call.
   1240  * @return The pointer to destination buffer.
   1241  * @stable ICU 2.0
   1242  */
   1243 U_STABLE UChar* U_EXPORT2
   1244 u_strFromWCS(UChar   *dest,
   1245              int32_t destCapacity,
   1246              int32_t *pDestLength,
   1247              const wchar_t *src,
   1248              int32_t srcLength,
   1249              UErrorCode *pErrorCode);
   1250 #endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */
   1251 
   1252 /**
   1253  * Convert a UTF-16 string to UTF-8.
   1254  * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
   1255  *
   1256  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1257  *                      the buffer is large enough.
   1258  * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
   1259  *                      dest may be NULL and the function will only return the length of the
   1260  *                      result without writing any of the result string (pre-flighting).
   1261  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1262  *                      pDestLength!=NULL then *pDestLength is always set to the
   1263  *                      number of output units corresponding to the transformation of
   1264  *                      all the input units, even in case of a buffer overflow.
   1265  * @param src           The original source string
   1266  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1267  * @param pErrorCode    Must be a valid pointer to an error code value,
   1268  *                      which must not indicate a failure before the function call.
   1269  * @return The pointer to destination buffer.
   1270  * @stable ICU 2.0
   1271  * @see u_strToUTF8WithSub
   1272  * @see u_strFromUTF8
   1273  */
   1274 U_STABLE char* U_EXPORT2
   1275 u_strToUTF8(char *dest,
   1276             int32_t destCapacity,
   1277             int32_t *pDestLength,
   1278             const UChar *src,
   1279             int32_t srcLength,
   1280             UErrorCode *pErrorCode);
   1281 
   1282 /**
   1283  * Convert a UTF-8 string to UTF-16.
   1284  * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
   1285  *
   1286  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1287  *                      the buffer is large enough.
   1288  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
   1289  *                      dest may be NULL and the function will only return the length of the
   1290  *                      result without writing any of the result string (pre-flighting).
   1291  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1292  *                      pDestLength!=NULL then *pDestLength is always set to the
   1293  *                      number of output units corresponding to the transformation of
   1294  *                      all the input units, even in case of a buffer overflow.
   1295  * @param src           The original source string
   1296  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1297  * @param pErrorCode    Must be a valid pointer to an error code value,
   1298  *                      which must not indicate a failure before the function call.
   1299  * @return The pointer to destination buffer.
   1300  * @stable ICU 2.0
   1301  * @see u_strFromUTF8WithSub
   1302  * @see u_strFromUTF8Lenient
   1303  */
   1304 U_STABLE UChar* U_EXPORT2
   1305 u_strFromUTF8(UChar *dest,
   1306               int32_t destCapacity,
   1307               int32_t *pDestLength,
   1308               const char *src,
   1309               int32_t srcLength,
   1310               UErrorCode *pErrorCode);
   1311 
   1312 /**
   1313  * Convert a UTF-16 string to UTF-8.
   1314  * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
   1315  *
   1316  * Same as u_strToUTF8() except for the additional subchar which is output for
   1317  * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
   1318  * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
   1319  *
   1320  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1321  *                      the buffer is large enough.
   1322  * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
   1323  *                      dest may be NULL and the function will only return the length of the
   1324  *                      result without writing any of the result string (pre-flighting).
   1325  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1326  *                      pDestLength!=NULL then *pDestLength is always set to the
   1327  *                      number of output units corresponding to the transformation of
   1328  *                      all the input units, even in case of a buffer overflow.
   1329  * @param src           The original source string
   1330  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1331  * @param subchar       The substitution character to use in place of an illegal input sequence,
   1332  *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
   1333  *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
   1334  *                      except for surrogate code points (U+D800..U+DFFF).
   1335  *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
   1336  * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
   1337  *                      Set to 0 if no substitutions occur or subchar<0.
   1338  *                      pNumSubstitutions can be NULL.
   1339  * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
   1340  *                      pass the U_SUCCESS() test, or else the function returns
   1341  *                      immediately. Check for U_FAILURE() on output or use with
   1342  *                      function chaining. (See User Guide for details.)
   1343  * @return The pointer to destination buffer.
   1344  * @see u_strToUTF8
   1345  * @see u_strFromUTF8WithSub
   1346  * @stable ICU 3.6
   1347  */
   1348 U_STABLE char* U_EXPORT2
   1349 u_strToUTF8WithSub(char *dest,
   1350             int32_t destCapacity,
   1351             int32_t *pDestLength,
   1352             const UChar *src,
   1353             int32_t srcLength,
   1354             UChar32 subchar, int32_t *pNumSubstitutions,
   1355             UErrorCode *pErrorCode);
   1356 
   1357 /**
   1358  * Convert a UTF-8 string to UTF-16.
   1359  * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
   1360  *
   1361  * Same as u_strFromUTF8() except for the additional subchar which is output for
   1362  * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
   1363  * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
   1364  *
   1365  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1366  *                      the buffer is large enough.
   1367  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
   1368  *                      dest may be NULL and the function will only return the length of the
   1369  *                      result without writing any of the result string (pre-flighting).
   1370  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1371  *                      pDestLength!=NULL then *pDestLength is always set to the
   1372  *                      number of output units corresponding to the transformation of
   1373  *                      all the input units, even in case of a buffer overflow.
   1374  * @param src           The original source string
   1375  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1376  * @param subchar       The substitution character to use in place of an illegal input sequence,
   1377  *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
   1378  *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
   1379  *                      except for surrogate code points (U+D800..U+DFFF).
   1380  *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
   1381  * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
   1382  *                      Set to 0 if no substitutions occur or subchar<0.
   1383  *                      pNumSubstitutions can be NULL.
   1384  * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
   1385  *                      pass the U_SUCCESS() test, or else the function returns
   1386  *                      immediately. Check for U_FAILURE() on output or use with
   1387  *                      function chaining. (See User Guide for details.)
   1388  * @return The pointer to destination buffer.
   1389  * @see u_strFromUTF8
   1390  * @see u_strFromUTF8Lenient
   1391  * @see u_strToUTF8WithSub
   1392  * @stable ICU 3.6
   1393  */
   1394 U_STABLE UChar* U_EXPORT2
   1395 u_strFromUTF8WithSub(UChar *dest,
   1396               int32_t destCapacity,
   1397               int32_t *pDestLength,
   1398               const char *src,
   1399               int32_t srcLength,
   1400               UChar32 subchar, int32_t *pNumSubstitutions,
   1401               UErrorCode *pErrorCode);
   1402 
   1403 /**
   1404  * Convert a UTF-8 string to UTF-16.
   1405  *
   1406  * Same as u_strFromUTF8() except that this function is designed to be very fast,
   1407  * which it achieves by being lenient about malformed UTF-8 sequences.
   1408  * This function is intended for use in environments where UTF-8 text is
   1409  * expected to be well-formed.
   1410  *
   1411  * Its semantics are:
   1412  * - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
   1413  * - The function will not read beyond the input string, nor write beyond
   1414  *   the destCapacity.
   1415  * - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not
   1416  *   be well-formed UTF-16.
   1417  *   The function will resynchronize to valid code point boundaries
   1418  *   within a small number of code points after an illegal sequence.
   1419  * - Non-shortest forms are not detected and will result in "spoofing" output.
   1420  *
   1421  * For further performance improvement, if srcLength is given (>=0),
   1422  * then it must be destCapacity>=srcLength.
   1423  *
   1424  * There is no inverse u_strToUTF8Lenient() function because there is practically
   1425  * no performance gain from not checking that a UTF-16 string is well-formed.
   1426  *
   1427  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1428  *                      the buffer is large enough.
   1429  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
   1430  *                      dest may be NULL and the function will only return the length of the
   1431  *                      result without writing any of the result string (pre-flighting).
   1432  *                      Unlike for other ICU functions, if srcLength>=0 then it
   1433  *                      must be destCapacity>=srcLength.
   1434  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1435  *                      pDestLength!=NULL then *pDestLength is always set to the
   1436  *                      number of output units corresponding to the transformation of
   1437  *                      all the input units, even in case of a buffer overflow.
   1438  *                      Unlike for other ICU functions, if srcLength>=0 but
   1439  *                      destCapacity<srcLength, then *pDestLength will be set to srcLength
   1440  *                      (and U_BUFFER_OVERFLOW_ERROR will be set)
   1441  *                      regardless of the actual result length.
   1442  * @param src           The original source string
   1443  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1444  * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
   1445  *                      pass the U_SUCCESS() test, or else the function returns
   1446  *                      immediately. Check for U_FAILURE() on output or use with
   1447  *                      function chaining. (See User Guide for details.)
   1448  * @return The pointer to destination buffer.
   1449  * @see u_strFromUTF8
   1450  * @see u_strFromUTF8WithSub
   1451  * @see u_strToUTF8WithSub
   1452  * @stable ICU 3.6
   1453  */
   1454 U_STABLE UChar * U_EXPORT2
   1455 u_strFromUTF8Lenient(UChar *dest,
   1456                      int32_t destCapacity,
   1457                      int32_t *pDestLength,
   1458                      const char *src,
   1459                      int32_t srcLength,
   1460                      UErrorCode *pErrorCode);
   1461 
   1462 /**
   1463  * Convert a UTF-16 string to UTF-32.
   1464  * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
   1465  *
   1466  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1467  *                      the buffer is large enough.
   1468  * @param destCapacity  The size of the buffer (number of UChar32s). If it is 0, then
   1469  *                      dest may be NULL and the function will only return the length of the
   1470  *                      result without writing any of the result string (pre-flighting).
   1471  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1472  *                      pDestLength!=NULL then *pDestLength is always set to the
   1473  *                      number of output units corresponding to the transformation of
   1474  *                      all the input units, even in case of a buffer overflow.
   1475  * @param src           The original source string
   1476  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1477  * @param pErrorCode    Must be a valid pointer to an error code value,
   1478  *                      which must not indicate a failure before the function call.
   1479  * @return The pointer to destination buffer.
   1480  * @see u_strToUTF32WithSub
   1481  * @see u_strFromUTF32
   1482  * @stable ICU 2.0
   1483  */
   1484 U_STABLE UChar32* U_EXPORT2
   1485 u_strToUTF32(UChar32 *dest,
   1486              int32_t  destCapacity,
   1487              int32_t  *pDestLength,
   1488              const UChar *src,
   1489              int32_t  srcLength,
   1490              UErrorCode *pErrorCode);
   1491 
   1492 /**
   1493  * Convert a UTF-32 string to UTF-16.
   1494  * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
   1495  *
   1496  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1497  *                      the buffer is large enough.
   1498  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
   1499  *                      dest may be NULL and the function will only return the length of the
   1500  *                      result without writing any of the result string (pre-flighting).
   1501  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1502  *                      pDestLength!=NULL then *pDestLength is always set to the
   1503  *                      number of output units corresponding to the transformation of
   1504  *                      all the input units, even in case of a buffer overflow.
   1505  * @param src           The original source string
   1506  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1507  * @param pErrorCode    Must be a valid pointer to an error code value,
   1508  *                      which must not indicate a failure before the function call.
   1509  * @return The pointer to destination buffer.
   1510  * @see u_strFromUTF32WithSub
   1511  * @see u_strToUTF32
   1512  * @stable ICU 2.0
   1513  */
   1514 U_STABLE UChar* U_EXPORT2
   1515 u_strFromUTF32(UChar   *dest,
   1516                int32_t destCapacity,
   1517                int32_t *pDestLength,
   1518                const UChar32 *src,
   1519                int32_t srcLength,
   1520                UErrorCode *pErrorCode);
   1521 
   1522 /**
   1523  * Convert a UTF-16 string to UTF-32.
   1524  * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
   1525  *
   1526  * Same as u_strToUTF32() except for the additional subchar which is output for
   1527  * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
   1528  * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32().
   1529  *
   1530  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1531  *                      the buffer is large enough.
   1532  * @param destCapacity  The size of the buffer (number of UChar32s). If it is 0, then
   1533  *                      dest may be NULL and the function will only return the length of the
   1534  *                      result without writing any of the result string (pre-flighting).
   1535  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1536  *                      pDestLength!=NULL then *pDestLength is always set to the
   1537  *                      number of output units corresponding to the transformation of
   1538  *                      all the input units, even in case of a buffer overflow.
   1539  * @param src           The original source string
   1540  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1541  * @param subchar       The substitution character to use in place of an illegal input sequence,
   1542  *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
   1543  *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
   1544  *                      except for surrogate code points (U+D800..U+DFFF).
   1545  *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
   1546  * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
   1547  *                      Set to 0 if no substitutions occur or subchar<0.
   1548  *                      pNumSubstitutions can be NULL.
   1549  * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
   1550  *                      pass the U_SUCCESS() test, or else the function returns
   1551  *                      immediately. Check for U_FAILURE() on output or use with
   1552  *                      function chaining. (See User Guide for details.)
   1553  * @return The pointer to destination buffer.
   1554  * @see u_strToUTF32
   1555  * @see u_strFromUTF32WithSub
   1556  * @stable ICU 4.2
   1557  */
   1558 U_STABLE UChar32* U_EXPORT2
   1559 u_strToUTF32WithSub(UChar32 *dest,
   1560              int32_t destCapacity,
   1561              int32_t *pDestLength,
   1562              const UChar *src,
   1563              int32_t srcLength,
   1564              UChar32 subchar, int32_t *pNumSubstitutions,
   1565              UErrorCode *pErrorCode);
   1566 
   1567 /**
   1568  * Convert a UTF-32 string to UTF-16.
   1569  * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
   1570  *
   1571  * Same as u_strFromUTF32() except for the additional subchar which is output for
   1572  * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
   1573  * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32().
   1574  *
   1575  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1576  *                      the buffer is large enough.
   1577  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
   1578  *                      dest may be NULL and the function will only return the length of the
   1579  *                      result without writing any of the result string (pre-flighting).
   1580  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1581  *                      pDestLength!=NULL then *pDestLength is always set to the
   1582  *                      number of output units corresponding to the transformation of
   1583  *                      all the input units, even in case of a buffer overflow.
   1584  * @param src           The original source string
   1585  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1586  * @param subchar       The substitution character to use in place of an illegal input sequence,
   1587  *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
   1588  *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
   1589  *                      except for surrogate code points (U+D800..U+DFFF).
   1590  *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
   1591  * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
   1592  *                      Set to 0 if no substitutions occur or subchar<0.
   1593  *                      pNumSubstitutions can be NULL.
   1594  * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
   1595  *                      pass the U_SUCCESS() test, or else the function returns
   1596  *                      immediately. Check for U_FAILURE() on output or use with
   1597  *                      function chaining. (See User Guide for details.)
   1598  * @return The pointer to destination buffer.
   1599  * @see u_strFromUTF32
   1600  * @see u_strToUTF32WithSub
   1601  * @stable ICU 4.2
   1602  */
   1603 U_STABLE UChar* U_EXPORT2
   1604 u_strFromUTF32WithSub(UChar *dest,
   1605                int32_t destCapacity,
   1606                int32_t *pDestLength,
   1607                const UChar32 *src,
   1608                int32_t srcLength,
   1609                UChar32 subchar, int32_t *pNumSubstitutions,
   1610                UErrorCode *pErrorCode);
   1611 
   1612 /**
   1613  * Convert a 16-bit Unicode string to Java Modified UTF-8.
   1614  * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8
   1615  *
   1616  * This function behaves according to the documentation for Java DataOutput.writeUTF()
   1617  * except that it does not encode the output length in the destination buffer
   1618  * and does not have an output length restriction.
   1619  * See http://java.sun.com/javase/6/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String)
   1620  *
   1621  * The input string need not be well-formed UTF-16.
   1622  * (Therefore there is no subchar parameter.)
   1623  *
   1624  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1625  *                      the buffer is large enough.
   1626  * @param destCapacity  The size of the buffer (number of chars). If it is 0, then
   1627  *                      dest may be NULL and the function will only return the length of the
   1628  *                      result without writing any of the result string (pre-flighting).
   1629  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1630  *                      pDestLength!=NULL then *pDestLength is always set to the
   1631  *                      number of output units corresponding to the transformation of
   1632  *                      all the input units, even in case of a buffer overflow.
   1633  * @param src           The original source string
   1634  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1635  * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
   1636  *                      pass the U_SUCCESS() test, or else the function returns
   1637  *                      immediately. Check for U_FAILURE() on output or use with
   1638  *                      function chaining. (See User Guide for details.)
   1639  * @return The pointer to destination buffer.
   1640  * @stable ICU 4.4
   1641  * @see u_strToUTF8WithSub
   1642  * @see u_strFromJavaModifiedUTF8WithSub
   1643  */
   1644 U_STABLE char* U_EXPORT2
   1645 u_strToJavaModifiedUTF8(
   1646         char *dest,
   1647         int32_t destCapacity,
   1648         int32_t *pDestLength,
   1649         const UChar *src,
   1650         int32_t srcLength,
   1651         UErrorCode *pErrorCode);
   1652 
   1653 /**
   1654  * Convert a Java Modified UTF-8 string to a 16-bit Unicode string.
   1655  * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
   1656  *
   1657  * This function behaves according to the documentation for Java DataInput.readUTF()
   1658  * except that it takes a length parameter rather than
   1659  * interpreting the first two input bytes as the length.
   1660  * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#readUTF()
   1661  *
   1662  * The output string may not be well-formed UTF-16.
   1663  *
   1664  * @param dest          A buffer for the result string. The result will be zero-terminated if
   1665  *                      the buffer is large enough.
   1666  * @param destCapacity  The size of the buffer (number of UChars). If it is 0, then
   1667  *                      dest may be NULL and the function will only return the length of the
   1668  *                      result without writing any of the result string (pre-flighting).
   1669  * @param pDestLength   A pointer to receive the number of units written to the destination. If
   1670  *                      pDestLength!=NULL then *pDestLength is always set to the
   1671  *                      number of output units corresponding to the transformation of
   1672  *                      all the input units, even in case of a buffer overflow.
   1673  * @param src           The original source string
   1674  * @param srcLength     The length of the original string. If -1, then src must be zero-terminated.
   1675  * @param subchar       The substitution character to use in place of an illegal input sequence,
   1676  *                      or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
   1677  *                      A substitution character can be any valid Unicode code point (up to U+10FFFF)
   1678  *                      except for surrogate code points (U+D800..U+DFFF).
   1679  *                      The recommended value is U+FFFD "REPLACEMENT CHARACTER".
   1680  * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
   1681  *                      Set to 0 if no substitutions occur or subchar<0.
   1682  *                      pNumSubstitutions can be NULL.
   1683  * @param pErrorCode    Pointer to a standard ICU error code. Its input value must
   1684  *                      pass the U_SUCCESS() test, or else the function returns
   1685  *                      immediately. Check for U_FAILURE() on output or use with
   1686  *                      function chaining. (See User Guide for details.)
   1687  * @return The pointer to destination buffer.
   1688  * @see u_strFromUTF8WithSub
   1689  * @see u_strFromUTF8Lenient
   1690  * @see u_strToJavaModifiedUTF8
   1691  * @stable ICU 4.4
   1692  */
   1693 U_STABLE UChar* U_EXPORT2
   1694 u_strFromJavaModifiedUTF8WithSub(
   1695         UChar *dest,
   1696         int32_t destCapacity,
   1697         int32_t *pDestLength,
   1698         const char *src,
   1699         int32_t srcLength,
   1700         UChar32 subchar, int32_t *pNumSubstitutions,
   1701         UErrorCode *pErrorCode);
   1702 
   1703 #endif
   1704