1 /* 2 ********************************************************************** 3 * Copyright (C) 1998-2004, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * 7 * File ustring.h 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 12/07/98 bertrand Creation. 13 ****************************************************************************** 14 */ 15 16 #ifndef USTRING_H 17 #define USTRING_H 18 19 #include "unicode/utypes.h" 20 #include "unicode/putil.h" 21 #include "unicode/uiter.h" 22 23 /** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/ 24 #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR 25 # define UBRK_TYPEDEF_UBREAK_ITERATOR 26 typedef void UBreakIterator; 27 #endif 28 29 /** 30 * \file 31 * \brief C API: Unicode string handling functions 32 * 33 * These C API functions provide general Unicode string handling. 34 * 35 * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h> 36 * functions. (For example, they do not check for bad arguments like NULL string pointers.) 37 * In some cases, only the thread-safe variant of such a function is implemented here 38 * (see u_strtok_r()). 39 * 40 * Other functions provide more Unicode-specific functionality like locale-specific 41 * upper/lower-casing and string comparison in code point order. 42 * 43 * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units. 44 * UTF-16 encodes each Unicode code point with either one or two UChar code units. 45 * (This is the default form of Unicode, and a forward-compatible extension of the original, 46 * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0 47 * in 1996.) 48 * 49 * Some APIs accept a 32-bit UChar32 value for a single code point. 50 * 51 * ICU also handles 16-bit Unicode text with unpaired surrogates. 52 * Such text is not well-formed UTF-16. 53 * Code-point-related functions treat unpaired surrogates as surrogate code points, 54 * i.e., as separate units. 55 * 56 * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings), 57 * it is much more efficient even for random access because the code unit values 58 * for single-unit characters vs. lead units vs. trail units are completely disjoint. 59 * This means that it is easy to determine character (code point) boundaries from 60 * random offsets in the string. 61 * 62 * Unicode (UTF-16) string processing is optimized for the single-unit case. 63 * Although it is important to support supplementary characters 64 * (which use pairs of lead/trail code units called "surrogates"), 65 * their occurrence is rare. Almost all characters in modern use require only 66 * a single UChar code unit (i.e., their code point values are <=0xffff). 67 * 68 * For more details see the User Guide Strings chapter (http://oss.software.ibm.com/icu/userguide/strings.html). 69 * For a discussion of the handling of unpaired surrogates see also 70 * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18. 71 */ 72 73 /** 74 * Determine the length of an array of UChar. 75 * 76 * @param s The array of UChars, NULL (U+0000) terminated. 77 * @return The number of UChars in <code>chars</code>, minus the terminator. 78 * @stable ICU 2.0 79 */ 80 U_STABLE int32_t U_EXPORT2 81 u_strlen(const UChar *s); 82 83 /** 84 * Count Unicode code points in the length UChar code units of the string. 85 * A code point may occupy either one or two UChar code units. 86 * Counting code points involves reading all code units. 87 * 88 * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h). 89 * 90 * @param s The input string. 91 * @param length The number of UChar code units to be checked, or -1 to count all 92 * code points before the first NUL (U+0000). 93 * @return The number of code points in the specified code units. 94 * @stable ICU 2.0 95 */ 96 U_STABLE int32_t U_EXPORT2 97 u_countChar32(const UChar *s, int32_t length); 98 99 /** 100 * Check if the string contains more Unicode code points than a certain number. 101 * This is more efficient than counting all code points in the entire string 102 * and comparing that number with a threshold. 103 * This function may not need to scan the string at all if the length is known 104 * (not -1 for NUL-termination) and falls within a certain range, and 105 * never needs to count more than 'number+1' code points. 106 * Logically equivalent to (u_countChar32(s, length)>number). 107 * A Unicode code point may occupy either one or two UChar code units. 108 * 109 * @param s The input string. 110 * @param length The length of the string, or -1 if it is NUL-terminated. 111 * @param number The number of code points in the string is compared against 112 * the 'number' parameter. 113 * @return Boolean value for whether the string contains more Unicode code points 114 * than 'number'. Same as (u_countChar32(s, length)>number). 115 * @stable ICU 2.4 116 */ 117 U_STABLE UBool U_EXPORT2 118 u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number); 119 120 /** 121 * Concatenate two ustrings. Appends a copy of <code>src</code>, 122 * including the null terminator, to <code>dst</code>. The initial copied 123 * character from <code>src</code> overwrites the null terminator in <code>dst</code>. 124 * 125 * @param dst The destination string. 126 * @param src The source string. 127 * @return A pointer to <code>dst</code>. 128 * @stable ICU 2.0 129 */ 130 U_STABLE UChar* U_EXPORT2 131 u_strcat(UChar *dst, 132 const UChar *src); 133 134 /** 135 * Concatenate two ustrings. 136 * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>. 137 * Adds a terminating NUL. 138 * If src is too long, then only <code>n-1</code> characters will be copied 139 * before the terminating NUL. 140 * If <code>n<=0</code> then dst is not modified. 141 * 142 * @param dst The destination string. 143 * @param src The source string. 144 * @param n The maximum number of characters to compare. 145 * @return A pointer to <code>dst</code>. 146 * @stable ICU 2.0 147 */ 148 U_STABLE UChar* U_EXPORT2 149 u_strncat(UChar *dst, 150 const UChar *src, 151 int32_t n); 152 153 /** 154 * Find the first occurrence of a substring in a string. 155 * The substring is found at code point boundaries. 156 * That means that if the substring begins with 157 * a trail surrogate or ends with a lead surrogate, 158 * then it is found only if these surrogates stand alone in the text. 159 * Otherwise, the substring edge units would be matched against 160 * halves of surrogate pairs. 161 * 162 * @param s The string to search (NUL-terminated). 163 * @param substring The substring to find (NUL-terminated). 164 * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>, 165 * or <code>s</code> itself if the <code>substring</code> is empty, 166 * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. 167 * @stable ICU 2.0 168 * 169 * @see u_strrstr 170 * @see u_strFindFirst 171 * @see u_strFindLast 172 */ 173 U_STABLE UChar * U_EXPORT2 174 u_strstr(const UChar *s, const UChar *substring); 175 176 /** 177 * Find the first occurrence of a substring in a string. 178 * The substring is found at code point boundaries. 179 * That means that if the substring begins with 180 * a trail surrogate or ends with a lead surrogate, 181 * then it is found only if these surrogates stand alone in the text. 182 * Otherwise, the substring edge units would be matched against 183 * halves of surrogate pairs. 184 * 185 * @param s The string to search. 186 * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. 187 * @param substring The substring to find (NUL-terminated). 188 * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. 189 * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>, 190 * or <code>s</code> itself if the <code>substring</code> is empty, 191 * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. 192 * @stable ICU 2.4 193 * 194 * @see u_strstr 195 * @see u_strFindLast 196 */ 197 U_STABLE UChar * U_EXPORT2 198 u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength); 199 200 /** 201 * Find the first occurrence of a BMP code point in a string. 202 * A surrogate code point is found only if its match in the text is not 203 * part of a surrogate pair. 204 * A NUL character is found at the string terminator. 205 * 206 * @param s The string to search (NUL-terminated). 207 * @param c The BMP code point to find. 208 * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> 209 * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. 210 * @stable ICU 2.0 211 * 212 * @see u_strchr32 213 * @see u_memchr 214 * @see u_strstr 215 * @see u_strFindFirst 216 */ 217 U_STABLE UChar * U_EXPORT2 218 u_strchr(const UChar *s, UChar c); 219 220 /** 221 * Find the first occurrence of a code point in a string. 222 * A surrogate code point is found only if its match in the text is not 223 * part of a surrogate pair. 224 * A NUL character is found at the string terminator. 225 * 226 * @param s The string to search (NUL-terminated). 227 * @param c The code point to find. 228 * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> 229 * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. 230 * @stable ICU 2.0 231 * 232 * @see u_strchr 233 * @see u_memchr32 234 * @see u_strstr 235 * @see u_strFindFirst 236 */ 237 U_STABLE UChar * U_EXPORT2 238 u_strchr32(const UChar *s, UChar32 c); 239 240 /** 241 * Find the last occurrence of a substring in a string. 242 * The substring is found at code point boundaries. 243 * That means that if the substring begins with 244 * a trail surrogate or ends with a lead surrogate, 245 * then it is found only if these surrogates stand alone in the text. 246 * Otherwise, the substring edge units would be matched against 247 * halves of surrogate pairs. 248 * 249 * @param s The string to search (NUL-terminated). 250 * @param substring The substring to find (NUL-terminated). 251 * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>, 252 * or <code>s</code> itself if the <code>substring</code> is empty, 253 * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. 254 * @stable ICU 2.4 255 * 256 * @see u_strstr 257 * @see u_strFindFirst 258 * @see u_strFindLast 259 */ 260 U_STABLE UChar * U_EXPORT2 261 u_strrstr(const UChar *s, const UChar *substring); 262 263 /** 264 * Find the last occurrence of a substring in a string. 265 * The substring is found at code point boundaries. 266 * That means that if the substring begins with 267 * a trail surrogate or ends with a lead surrogate, 268 * then it is found only if these surrogates stand alone in the text. 269 * Otherwise, the substring edge units would be matched against 270 * halves of surrogate pairs. 271 * 272 * @param s The string to search. 273 * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. 274 * @param substring The substring to find (NUL-terminated). 275 * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. 276 * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>, 277 * or <code>s</code> itself if the <code>substring</code> is empty, 278 * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. 279 * @stable ICU 2.4 280 * 281 * @see u_strstr 282 * @see u_strFindLast 283 */ 284 U_STABLE UChar * U_EXPORT2 285 u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength); 286 287 /** 288 * Find the last occurrence of a BMP code point in a string. 289 * A surrogate code point is found only if its match in the text is not 290 * part of a surrogate pair. 291 * A NUL character is found at the string terminator. 292 * 293 * @param s The string to search (NUL-terminated). 294 * @param c The BMP code point to find. 295 * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> 296 * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. 297 * @stable ICU 2.4 298 * 299 * @see u_strrchr32 300 * @see u_memrchr 301 * @see u_strrstr 302 * @see u_strFindLast 303 */ 304 U_STABLE UChar * U_EXPORT2 305 u_strrchr(const UChar *s, UChar c); 306 307 /** 308 * Find the last occurrence of a code point in a string. 309 * A surrogate code point is found only if its match in the text is not 310 * part of a surrogate pair. 311 * A NUL character is found at the string terminator. 312 * 313 * @param s The string to search (NUL-terminated). 314 * @param c The code point to find. 315 * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> 316 * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. 317 * @stable ICU 2.4 318 * 319 * @see u_strrchr 320 * @see u_memchr32 321 * @see u_strrstr 322 * @see u_strFindLast 323 */ 324 U_STABLE UChar * U_EXPORT2 325 u_strrchr32(const UChar *s, UChar32 c); 326 327 /** 328 * Locates the first occurrence in the string <code>string</code> of any of the characters 329 * in the string <code>matchSet</code>. 330 * Works just like C's strpbrk but with Unicode. 331 * 332 * @param string The string in which to search, NUL-terminated. 333 * @param matchSet A NUL-terminated string defining a set of code points 334 * for which to search in the text string. 335 * @return A pointer to the character in <code>string</code> that matches one of the 336 * characters in <code>matchSet</code>, or NULL if no such character is found. 337 * @stable ICU 2.0 338 */ 339 U_STABLE UChar * U_EXPORT2 340 u_strpbrk(const UChar *string, const UChar *matchSet); 341 342 /** 343 * Returns the number of consecutive characters in <code>string</code>, 344 * beginning with the first, that do not occur somewhere in <code>matchSet</code>. 345 * Works just like C's strcspn but with Unicode. 346 * 347 * @param string The string in which to search, NUL-terminated. 348 * @param matchSet A NUL-terminated string defining a set of code points 349 * for which to search in the text string. 350 * @return The number of initial characters in <code>string</code> that do not 351 * occur in <code>matchSet</code>. 352 * @see u_strspn 353 * @stable ICU 2.0 354 */ 355 U_STABLE int32_t U_EXPORT2 356 u_strcspn(const UChar *string, const UChar *matchSet); 357 358 /** 359 * Returns the number of consecutive characters in <code>string</code>, 360 * beginning with the first, that occur somewhere in <code>matchSet</code>. 361 * Works just like C's strspn but with Unicode. 362 * 363 * @param string The string in which to search, NUL-terminated. 364 * @param matchSet A NUL-terminated string defining a set of code points 365 * for which to search in the text string. 366 * @return The number of initial characters in <code>string</code> that do 367 * occur in <code>matchSet</code>. 368 * @see u_strcspn 369 * @stable ICU 2.0 370 */ 371 U_STABLE int32_t U_EXPORT2 372 u_strspn(const UChar *string, const UChar *matchSet); 373 374 /** 375 * The string tokenizer API allows an application to break a string into 376 * tokens. Unlike strtok(), the saveState (the current pointer within the 377 * original string) is maintained in saveState. In the first call, the 378 * argument src is a pointer to the string. In subsequent calls to 379 * return successive tokens of that string, src must be specified as 380 * NULL. The value saveState is set by this function to maintain the 381 * function's position within the string, and on each subsequent call 382 * you must give this argument the same variable. This function does 383 * handle surrogate pairs. This function is similar to the strtok_r() 384 * the POSIX Threads Extension (1003.1c-1995) version. 385 * 386 * @param src String containing token(s). This string will be modified. 387 * After the first call to u_strtok_r(), this argument must 388 * be NULL to get to the next token. 389 * @param delim Set of delimiter characters (Unicode code points). 390 * @param saveState The current pointer within the original string, 391 * which is set by this function. The saveState 392 * parameter should the address of a local variable of type 393 * UChar *. (i.e. defined "Uhar *myLocalSaveState" and use 394 * &myLocalSaveState for this parameter). 395 * @return A pointer to the next token found in src, or NULL 396 * when there are no more tokens. 397 * @stable ICU 2.0 398 */ 399 U_STABLE UChar * U_EXPORT2 400 u_strtok_r(UChar *src, 401 const UChar *delim, 402 UChar **saveState); 403 404 /** 405 * Compare two Unicode strings for bitwise equality (code unit order). 406 * 407 * @param s1 A string to compare. 408 * @param s2 A string to compare. 409 * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative 410 * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive 411 * value if <code>s1</code> is bitwise greater than <code>s2</code>. 412 * @stable ICU 2.0 413 */ 414 U_STABLE int32_t U_EXPORT2 415 u_strcmp(const UChar *s1, 416 const UChar *s2); 417 418 /** 419 * Compare two Unicode strings in code point order. 420 * See u_strCompare for details. 421 * 422 * @param s1 A string to compare. 423 * @param s2 A string to compare. 424 * @return a negative/zero/positive integer corresponding to whether 425 * the first string is less than/equal to/greater than the second one 426 * in code point order 427 * @stable ICU 2.0 428 */ 429 U_STABLE int32_t U_EXPORT2 430 u_strcmpCodePointOrder(const UChar *s1, const UChar *s2); 431 432 /** 433 * Compare two Unicode strings (binary order). 434 * 435 * The comparison can be done in code unit order or in code point order. 436 * They differ only in UTF-16 when 437 * comparing supplementary code points (U+10000..U+10ffff) 438 * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). 439 * In code unit order, high BMP code points sort after supplementary code points 440 * because they are stored as pairs of surrogates which are at U+d800..U+dfff. 441 * 442 * This functions works with strings of different explicitly specified lengths 443 * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. 444 * NUL-terminated strings are possible with length arguments of -1. 445 * 446 * @param s1 First source string. 447 * @param length1 Length of first source string, or -1 if NUL-terminated. 448 * 449 * @param s2 Second source string. 450 * @param length2 Length of second source string, or -1 if NUL-terminated. 451 * 452 * @param codePointOrder Choose between code unit order (FALSE) 453 * and code point order (TRUE). 454 * 455 * @return <0 or 0 or >0 as usual for string comparisons 456 * 457 * @stable ICU 2.2 458 */ 459 U_STABLE int32_t U_EXPORT2 460 u_strCompare(const UChar *s1, int32_t length1, 461 const UChar *s2, int32_t length2, 462 UBool codePointOrder); 463 464 /** 465 * Compare two Unicode strings (binary order) 466 * as presented by UCharIterator objects. 467 * Works otherwise just like u_strCompare(). 468 * 469 * Both iterators are reset to their start positions. 470 * When the function returns, it is undefined where the iterators 471 * have stopped. 472 * 473 * @param iter1 First source string iterator. 474 * @param iter2 Second source string iterator. 475 * @param codePointOrder Choose between code unit order (FALSE) 476 * and code point order (TRUE). 477 * 478 * @return <0 or 0 or >0 as usual for string comparisons 479 * 480 * @see u_strCompare 481 * 482 * @stable ICU 2.6 483 */ 484 U_STABLE int32_t U_EXPORT2 485 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder); 486 487 #ifndef U_COMPARE_CODE_POINT_ORDER 488 /* see also unistr.h and unorm.h */ 489 /** 490 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: 491 * Compare strings in code point order instead of code unit order. 492 * @stable ICU 2.2 493 */ 494 #define U_COMPARE_CODE_POINT_ORDER 0x8000 495 #endif 496 497 /** 498 * Compare two strings case-insensitively using full case folding. 499 * This is equivalent to 500 * u_strCompare(u_strFoldCase(s1, options), 501 * u_strFoldCase(s2, options), 502 * (options&U_COMPARE_CODE_POINT_ORDER)!=0). 503 * 504 * The comparison can be done in UTF-16 code unit order or in code point order. 505 * They differ only when comparing supplementary code points (U+10000..U+10ffff) 506 * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). 507 * In code unit order, high BMP code points sort after supplementary code points 508 * because they are stored as pairs of surrogates which are at U+d800..U+dfff. 509 * 510 * This functions works with strings of different explicitly specified lengths 511 * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. 512 * NUL-terminated strings are possible with length arguments of -1. 513 * 514 * @param s1 First source string. 515 * @param length1 Length of first source string, or -1 if NUL-terminated. 516 * 517 * @param s2 Second source string. 518 * @param length2 Length of second source string, or -1 if NUL-terminated. 519 * 520 * @param options A bit set of options: 521 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 522 * Comparison in code unit order with default case folding. 523 * 524 * - U_COMPARE_CODE_POINT_ORDER 525 * Set to choose code point order instead of code unit order 526 * (see u_strCompare for details). 527 * 528 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 529 * 530 * @param pErrorCode Must be a valid pointer to an error code value, 531 * which must not indicate a failure before the function call. 532 * 533 * @return <0 or 0 or >0 as usual for string comparisons 534 * 535 * @stable ICU 2.2 536 */ 537 U_STABLE int32_t U_EXPORT2 538 u_strCaseCompare(const UChar *s1, int32_t length1, 539 const UChar *s2, int32_t length2, 540 uint32_t options, 541 UErrorCode *pErrorCode); 542 543 /** 544 * Compare two ustrings for bitwise equality. 545 * Compares at most <code>n</code> characters. 546 * 547 * @param ucs1 A string to compare. 548 * @param ucs2 A string to compare. 549 * @param n The maximum number of characters to compare. 550 * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative 551 * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive 552 * value if <code>s1</code> is bitwise greater than <code>s2</code>. 553 * @stable ICU 2.0 554 */ 555 U_STABLE int32_t U_EXPORT2 556 u_strncmp(const UChar *ucs1, 557 const UChar *ucs2, 558 int32_t n); 559 560 /** 561 * Compare two Unicode strings in code point order. 562 * This is different in UTF-16 from u_strncmp() if supplementary characters are present. 563 * For details, see u_strCompare(). 564 * 565 * @param s1 A string to compare. 566 * @param s2 A string to compare. 567 * @param n The maximum number of characters to compare. 568 * @return a negative/zero/positive integer corresponding to whether 569 * the first string is less than/equal to/greater than the second one 570 * in code point order 571 * @stable ICU 2.0 572 */ 573 U_STABLE int32_t U_EXPORT2 574 u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n); 575 576 /** 577 * Compare two strings case-insensitively using full case folding. 578 * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)). 579 * 580 * @param s1 A string to compare. 581 * @param s2 A string to compare. 582 * @param options A bit set of options: 583 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 584 * Comparison in code unit order with default case folding. 585 * 586 * - U_COMPARE_CODE_POINT_ORDER 587 * Set to choose code point order instead of code unit order 588 * (see u_strCompare for details). 589 * 590 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 591 * 592 * @return A negative, zero, or positive integer indicating the comparison result. 593 * @stable ICU 2.0 594 */ 595 U_STABLE int32_t U_EXPORT2 596 u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options); 597 598 /** 599 * Compare two strings case-insensitively using full case folding. 600 * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options), 601 * u_strFoldCase(s2, at most n, options)). 602 * 603 * @param s1 A string to compare. 604 * @param s2 A string to compare. 605 * @param n The maximum number of characters each string to case-fold and then compare. 606 * @param options A bit set of options: 607 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 608 * Comparison in code unit order with default case folding. 609 * 610 * - U_COMPARE_CODE_POINT_ORDER 611 * Set to choose code point order instead of code unit order 612 * (see u_strCompare for details). 613 * 614 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 615 * 616 * @return A negative, zero, or positive integer indicating the comparison result. 617 * @stable ICU 2.0 618 */ 619 U_STABLE int32_t U_EXPORT2 620 u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options); 621 622 /** 623 * Compare two strings case-insensitively using full case folding. 624 * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options), 625 * u_strFoldCase(s2, n, options)). 626 * 627 * @param s1 A string to compare. 628 * @param s2 A string to compare. 629 * @param length The number of characters in each string to case-fold and then compare. 630 * @param options A bit set of options: 631 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 632 * Comparison in code unit order with default case folding. 633 * 634 * - U_COMPARE_CODE_POINT_ORDER 635 * Set to choose code point order instead of code unit order 636 * (see u_strCompare for details). 637 * 638 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 639 * 640 * @return A negative, zero, or positive integer indicating the comparison result. 641 * @stable ICU 2.0 642 */ 643 U_STABLE int32_t U_EXPORT2 644 u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options); 645 646 /** 647 * Copy a ustring. Adds a null terminator. 648 * 649 * @param dst The destination string. 650 * @param src The source string. 651 * @return A pointer to <code>dst</code>. 652 * @stable ICU 2.0 653 */ 654 U_STABLE UChar* U_EXPORT2 655 u_strcpy(UChar *dst, 656 const UChar *src); 657 658 /** 659 * Copy a ustring. 660 * Copies at most <code>n</code> characters. The result will be null terminated 661 * if the length of <code>src</code> is less than <code>n</code>. 662 * 663 * @param dst The destination string. 664 * @param src The source string. 665 * @param n The maximum number of characters to copy. 666 * @return A pointer to <code>dst</code>. 667 * @stable ICU 2.0 668 */ 669 U_STABLE UChar* U_EXPORT2 670 u_strncpy(UChar *dst, 671 const UChar *src, 672 int32_t n); 673 674 #if !UCONFIG_NO_CONVERSION 675 676 /** 677 * Copy a byte string encoded in the default codepage to a ustring. 678 * Adds a null terminator. 679 * Performs a host byte to UChar conversion 680 * 681 * @param dst The destination string. 682 * @param src The source string. 683 * @return A pointer to <code>dst</code>. 684 * @stable ICU 2.0 685 */ 686 U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst, 687 const char *src ); 688 689 /** 690 * Copy a byte string encoded in the default codepage to a ustring. 691 * Copies at most <code>n</code> characters. The result will be null terminated 692 * if the length of <code>src</code> is less than <code>n</code>. 693 * Performs a host byte to UChar conversion 694 * 695 * @param dst The destination string. 696 * @param src The source string. 697 * @param n The maximum number of characters to copy. 698 * @return A pointer to <code>dst</code>. 699 * @stable ICU 2.0 700 */ 701 U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst, 702 const char *src, 703 int32_t n); 704 705 /** 706 * Copy ustring to a byte string encoded in the default codepage. 707 * Adds a null terminator. 708 * Performs a UChar to host byte conversion 709 * 710 * @param dst The destination string. 711 * @param src The source string. 712 * @return A pointer to <code>dst</code>. 713 * @stable ICU 2.0 714 */ 715 U_STABLE char* U_EXPORT2 u_austrcpy(char *dst, 716 const UChar *src ); 717 718 /** 719 * Copy ustring to a byte string encoded in the default codepage. 720 * Copies at most <code>n</code> characters. The result will be null terminated 721 * if the length of <code>src</code> is less than <code>n</code>. 722 * Performs a UChar to host byte conversion 723 * 724 * @param dst The destination string. 725 * @param src The source string. 726 * @param n The maximum number of characters to copy. 727 * @return A pointer to <code>dst</code>. 728 * @stable ICU 2.0 729 */ 730 U_STABLE char* U_EXPORT2 u_austrncpy(char *dst, 731 const UChar *src, 732 int32_t n ); 733 734 #endif 735 736 /** 737 * Synonym for memcpy(), but with UChars only. 738 * @param dest The destination string 739 * @param src The source string 740 * @param count The number of characters to copy 741 * @return A pointer to <code>dest</code> 742 * @stable ICU 2.0 743 */ 744 U_STABLE UChar* U_EXPORT2 745 u_memcpy(UChar *dest, const UChar *src, int32_t count); 746 747 /** 748 * Synonym for memmove(), but with UChars only. 749 * @param dest The destination string 750 * @param src The source string 751 * @param count The number of characters to move 752 * @return A pointer to <code>dest</code> 753 * @stable ICU 2.0 754 */ 755 U_STABLE UChar* U_EXPORT2 756 u_memmove(UChar *dest, const UChar *src, int32_t count); 757 758 /** 759 * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>. 760 * 761 * @param dest The destination string. 762 * @param c The character to initialize the string. 763 * @param count The maximum number of characters to set. 764 * @return A pointer to <code>dest</code>. 765 * @stable ICU 2.0 766 */ 767 U_STABLE UChar* U_EXPORT2 768 u_memset(UChar *dest, UChar c, int32_t count); 769 770 /** 771 * Compare the first <code>count</code> UChars of each buffer. 772 * 773 * @param buf1 The first string to compare. 774 * @param buf2 The second string to compare. 775 * @param count The maximum number of UChars to compare. 776 * @return When buf1 < buf2, a negative number is returned. 777 * When buf1 == buf2, 0 is returned. 778 * When buf1 > buf2, a positive number is returned. 779 * @stable ICU 2.0 780 */ 781 U_STABLE int32_t U_EXPORT2 782 u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count); 783 784 /** 785 * Compare two Unicode strings in code point order. 786 * This is different in UTF-16 from u_memcmp() if supplementary characters are present. 787 * For details, see u_strCompare(). 788 * 789 * @param s1 A string to compare. 790 * @param s2 A string to compare. 791 * @param count The maximum number of characters to compare. 792 * @return a negative/zero/positive integer corresponding to whether 793 * the first string is less than/equal to/greater than the second one 794 * in code point order 795 * @stable ICU 2.0 796 */ 797 U_STABLE int32_t U_EXPORT2 798 u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count); 799 800 /** 801 * Find the first occurrence of a BMP code point in a string. 802 * A surrogate code point is found only if its match in the text is not 803 * part of a surrogate pair. 804 * A NUL character is found at the string terminator. 805 * 806 * @param s The string to search (contains <code>count</code> UChars). 807 * @param c The BMP code point to find. 808 * @param count The length of the string. 809 * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> 810 * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. 811 * @stable ICU 2.0 812 * 813 * @see u_strchr 814 * @see u_memchr32 815 * @see u_strFindFirst 816 */ 817 U_STABLE UChar* U_EXPORT2 818 u_memchr(const UChar *s, UChar c, int32_t count); 819 820 /** 821 * Find the first occurrence of a code point in a string. 822 * A surrogate code point is found only if its match in the text is not 823 * part of a surrogate pair. 824 * A NUL character is found at the string terminator. 825 * 826 * @param s The string to search (contains <code>count</code> UChars). 827 * @param c The code point to find. 828 * @param count The length of the string. 829 * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> 830 * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. 831 * @stable ICU 2.0 832 * 833 * @see u_strchr32 834 * @see u_memchr 835 * @see u_strFindFirst 836 */ 837 U_STABLE UChar* U_EXPORT2 838 u_memchr32(const UChar *s, UChar32 c, int32_t count); 839 840 /** 841 * Find the last occurrence of a BMP code point in a string. 842 * A surrogate code point is found only if its match in the text is not 843 * part of a surrogate pair. 844 * A NUL character is found at the string terminator. 845 * 846 * @param s The string to search (contains <code>count</code> UChars). 847 * @param c The BMP code point to find. 848 * @param count The length of the string. 849 * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> 850 * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. 851 * @stable ICU 2.4 852 * 853 * @see u_strrchr 854 * @see u_memrchr32 855 * @see u_strFindLast 856 */ 857 U_STABLE UChar* U_EXPORT2 858 u_memrchr(const UChar *s, UChar c, int32_t count); 859 860 /** 861 * Find the last occurrence of a code point in a string. 862 * A surrogate code point is found only if its match in the text is not 863 * part of a surrogate pair. 864 * A NUL character is found at the string terminator. 865 * 866 * @param s The string to search (contains <code>count</code> UChars). 867 * @param c The code point to find. 868 * @param count The length of the string. 869 * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> 870 * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. 871 * @stable ICU 2.4 872 * 873 * @see u_strrchr32 874 * @see u_memrchr 875 * @see u_strFindLast 876 */ 877 U_STABLE UChar* U_EXPORT2 878 u_memrchr32(const UChar *s, UChar32 c, int32_t count); 879 880 /** 881 * Unicode String literals in C. 882 * We need one macro to declare a variable for the string 883 * and to statically preinitialize it if possible, 884 * and a second macro to dynamically intialize such a string variable if necessary. 885 * 886 * The macros are defined for maximum performance. 887 * They work only for strings that contain "invariant characters", i.e., 888 * only latin letters, digits, and some punctuation. 889 * See utypes.h for details. 890 * 891 * A pair of macros for a single string must be used with the same 892 * parameters. 893 * The string parameter must be a C string literal. 894 * The length of the string, not including the terminating 895 * <code>NUL</code>, must be specified as a constant. 896 * The U_STRING_DECL macro should be invoked exactly once for one 897 * such string variable before it is used. 898 * 899 * Usage: 900 * <pre> 901 *   U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11); 902 *   U_STRING_DECL(ustringVar2, "jumps 5%", 8); 903 *   static UBool didInit=FALSE; 904 *   905 *   int32_t function() { 906 *   if(!didInit) { 907 *   U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11); 908 *   U_STRING_INIT(ustringVar2, "jumps 5%", 8); 909 *   didInit=TRUE; 910 *   } 911 *   return u_strcmp(ustringVar1, ustringVar2); 912 *   } 913 * </pre> 914 * @stable ICU 2.0 915 */ 916 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY 917 # define U_STRING_DECL(var, cs, length) static const wchar_t var[(length)+1]={ L ## cs } 918 /**@stable ICU 2.0 */ 919 # define U_STRING_INIT(var, cs, length) 920 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY 921 # define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (const UChar *)cs } 922 /**@stable ICU 2.0 */ 923 # define U_STRING_INIT(var, cs, length) 924 #else 925 # define U_STRING_DECL(var, cs, length) static UChar var[(length)+1] 926 /**@stable ICU 2.0 */ 927 # define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1) 928 #endif 929 930 /** 931 * Unescape a string of characters and write the resulting 932 * Unicode characters to the destination buffer. The following escape 933 * sequences are recognized: 934 * 935 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] 936 * \\Uhhhhhhhh 8 hex digits 937 * \\xhh 1-2 hex digits 938 * \\x{h...} 1-8 hex digits 939 * \\ooo 1-3 octal digits; o in [0-7] 940 * \\cX control-X; X is masked with 0x1F 941 * 942 * as well as the standard ANSI C escapes: 943 * 944 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, 945 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, 946 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C 947 * 948 * Anything else following a backslash is generically escaped. For 949 * example, "[a\\-z]" returns "[a-z]". 950 * 951 * If an escape sequence is ill-formed, this method returns an empty 952 * string. An example of an ill-formed sequence is "\\u" followed by 953 * fewer than 4 hex digits. 954 * 955 * The above characters are recognized in the compiler's codepage, 956 * that is, they are coded as 'u', '\\', etc. Characters that are 957 * not parts of escape sequences are converted using u_charsToUChars(). 958 * 959 * This function is similar to UnicodeString::unescape() but not 960 * identical to it. The latter takes a source UnicodeString, so it 961 * does escape recognition but no conversion. 962 * 963 * @param src a zero-terminated string of invariant characters 964 * @param dest pointer to buffer to receive converted and unescaped 965 * text and, if there is room, a zero terminator. May be NULL for 966 * preflighting, in which case no UChars will be written, but the 967 * return value will still be valid. On error, an empty string is 968 * stored here (if possible). 969 * @param destCapacity the number of UChars that may be written at 970 * dest. Ignored if dest == NULL. 971 * @return the length of unescaped string. 972 * @see u_unescapeAt 973 * @see UnicodeString#unescape() 974 * @see UnicodeString#unescapeAt() 975 * @stable ICU 2.0 976 */ 977 U_STABLE int32_t U_EXPORT2 978 u_unescape(const char *src, 979 UChar *dest, int32_t destCapacity); 980 981 U_CDECL_BEGIN 982 /** 983 * Callback function for u_unescapeAt() that returns a character of 984 * the source text given an offset and a context pointer. The context 985 * pointer will be whatever is passed into u_unescapeAt(). 986 * 987 * @param offset pointer to the offset that will be passed to u_unescapeAt(). 988 * @param context an opaque pointer passed directly into u_unescapeAt() 989 * @return the character represented by the escape sequence at 990 * offset 991 * @see u_unescapeAt 992 * @stable ICU 2.0 993 */ 994 typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context); 995 U_CDECL_END 996 997 /** 998 * Unescape a single sequence. The character at offset-1 is assumed 999 * (without checking) to be a backslash. This method takes a callback 1000 * pointer to a function that returns the UChar at a given offset. By 1001 * varying this callback, ICU functions are able to unescape char* 1002 * strings, UnicodeString objects, and UFILE pointers. 1003 * 1004 * If offset is out of range, or if the escape sequence is ill-formed, 1005 * (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape() 1006 * for a list of recognized sequences. 1007 * 1008 * @param charAt callback function that returns a UChar of the source 1009 * text given an offset and a context pointer. 1010 * @param offset pointer to the offset that will be passed to charAt. 1011 * The offset value will be updated upon return to point after the 1012 * last parsed character of the escape sequence. On error the offset 1013 * is unchanged. 1014 * @param length the number of characters in the source text. The 1015 * last character of the source text is considered to be at offset 1016 * length-1. 1017 * @param context an opaque pointer passed directly into charAt. 1018 * @return the character represented by the escape sequence at 1019 * offset, or (UChar32)0xFFFFFFFF on error. 1020 * @see u_unescape() 1021 * @see UnicodeString#unescape() 1022 * @see UnicodeString#unescapeAt() 1023 * @stable ICU 2.0 1024 */ 1025 U_STABLE UChar32 U_EXPORT2 1026 u_unescapeAt(UNESCAPE_CHAR_AT charAt, 1027 int32_t *offset, 1028 int32_t length, 1029 void *context); 1030 1031 /** 1032 * Uppercase the characters in a string. 1033 * Casing is locale-dependent and context-sensitive. 1034 * The result may be longer or shorter than the original. 1035 * The source string and the destination buffer are allowed to overlap. 1036 * 1037 * @param dest A buffer for the result string. The result will be zero-terminated if 1038 * the buffer is large enough. 1039 * @param destCapacity The size of the buffer (number of UChars). If it is 0, then 1040 * dest may be NULL and the function will only return the length of the result 1041 * without writing any of the result string. 1042 * @param src The original string 1043 * @param srcLength The length of the original string. If -1, then src must be zero-terminated. 1044 * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. 1045 * @param pErrorCode Must be a valid pointer to an error code value, 1046 * which must not indicate a failure before the function call. 1047 * @return The length of the result string. It may be greater than destCapacity. In that case, 1048 * only some of the result was written to the destination buffer. 1049 * @stable ICU 2.0 1050 */ 1051 U_STABLE int32_t U_EXPORT2 1052 u_strToUpper(UChar *dest, int32_t destCapacity, 1053 const UChar *src, int32_t srcLength, 1054 const char *locale, 1055 UErrorCode *pErrorCode); 1056 1057 /** 1058 * Lowercase the characters in a string. 1059 * Casing is locale-dependent and context-sensitive. 1060 * The result may be longer or shorter than the original. 1061 * The source string and the destination buffer are allowed to overlap. 1062 * 1063 * @param dest A buffer for the result string. The result will be zero-terminated if 1064 * the buffer is large enough. 1065 * @param destCapacity The size of the buffer (number of UChars). If it is 0, then 1066 * dest may be NULL and the function will only return the length of the result 1067 * without writing any of the result string. 1068 * @param src The original string 1069 * @param srcLength The length of the original string. If -1, then src must be zero-terminated. 1070 * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. 1071 * @param pErrorCode Must be a valid pointer to an error code value, 1072 * which must not indicate a failure before the function call. 1073 * @return The length of the result string. It may be greater than destCapacity. In that case, 1074 * only some of the result was written to the destination buffer. 1075 * @stable ICU 2.0 1076 */ 1077 U_STABLE int32_t U_EXPORT2 1078 u_strToLower(UChar *dest, int32_t destCapacity, 1079 const UChar *src, int32_t srcLength, 1080 const char *locale, 1081 UErrorCode *pErrorCode); 1082 1083 #if !UCONFIG_NO_BREAK_ITERATION 1084 1085 /** 1086 * Titlecase a string. 1087 * Casing is locale-dependent and context-sensitive. 1088 * Titlecasing uses a break iterator to find the first characters of words 1089 * that are to be titlecased. It titlecases those characters and lowercases 1090 * all others. 1091 * 1092 * The titlecase break iterator can be provided to customize for arbitrary 1093 * styles, using rules and dictionaries beyond the standard iterators. 1094 * It may be more efficient to always provide an iterator to avoid 1095 * opening and closing one for each string. 1096 * The standard titlecase iterator for the root locale implements the 1097 * algorithm of Unicode TR 21. 1098 * 1099 * This function uses only the first() and next() methods of the 1100 * provided break iterator. 1101 * 1102 * The result may be longer or shorter than the original. 1103 * The source string and the destination buffer are allowed to overlap. 1104 * 1105 * @param dest A buffer for the result string. The result will be zero-terminated if 1106 * the buffer is large enough. 1107 * @param destCapacity The size of the buffer (number of UChars). If it is 0, then 1108 * dest may be NULL and the function will only return the length of the result 1109 * without writing any of the result string. 1110 * @param src The original string 1111 * @param srcLength The length of the original string. If -1, then src must be zero-terminated. 1112 * @param titleIter A break iterator to find the first characters of words 1113 * that are to be titlecased. 1114 * If none is provided (NULL), then a standard titlecase 1115 * break iterator is opened. 1116 * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. 1117 * @param pErrorCode Must be a valid pointer to an error code value, 1118 * which must not indicate a failure before the function call. 1119 * @return The length of the result string. It may be greater than destCapacity. In that case, 1120 * only some of the result was written to the destination buffer. 1121 * @stable ICU 2.1 1122 */ 1123 U_STABLE int32_t U_EXPORT2 1124 u_strToTitle(UChar *dest, int32_t destCapacity, 1125 const UChar *src, int32_t srcLength, 1126 UBreakIterator *titleIter, 1127 const char *locale, 1128 UErrorCode *pErrorCode); 1129 1130 #endif 1131 1132 /** 1133 * Case-fold the characters in a string. 1134 * Case-folding is locale-independent and not context-sensitive, 1135 * but there is an option for whether to include or exclude mappings for dotted I 1136 * and dotless i that are marked with 'I' in CaseFolding.txt. 1137 * The result may be longer or shorter than the original. 1138 * The source string and the destination buffer are allowed to overlap. 1139 * 1140 * @param dest A buffer for the result string. The result will be zero-terminated if 1141 * the buffer is large enough. 1142 * @param destCapacity The size of the buffer (number of UChars). If it is 0, then 1143 * dest may be NULL and the function will only return the length of the result 1144 * without writing any of the result string. 1145 * @param src The original string 1146 * @param srcLength The length of the original string. If -1, then src must be zero-terminated. 1147 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I 1148 * @param pErrorCode Must be a valid pointer to an error code value, 1149 * which must not indicate a failure before the function call. 1150 * @return The length of the result string. It may be greater than destCapacity. In that case, 1151 * only some of the result was written to the destination buffer. 1152 * @stable ICU 2.0 1153 */ 1154 U_STABLE int32_t U_EXPORT2 1155 u_strFoldCase(UChar *dest, int32_t destCapacity, 1156 const UChar *src, int32_t srcLength, 1157 uint32_t options, 1158 UErrorCode *pErrorCode); 1159 1160 /** 1161 * Converts a sequence of UChars to wchar_t units. 1162 * 1163 * @param dest A buffer for the result string. The result will be zero-terminated if 1164 * the buffer is large enough. 1165 * @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then 1166 * dest may be NULL and the function will only return the length of the 1167 * result without writing any of the result string (pre-flighting). 1168 * @param pDestLength A pointer to receive the number of units written to the destination. If 1169 * pDestLength!=NULL then *pDestLength is always set to the 1170 * number of output units corresponding to the transformation of 1171 * all the input units, even in case of a buffer overflow. 1172 * @param src The original source string 1173 * @param srcLength The length of the original string. If -1, then src must be zero-terminated. 1174 * @param pErrorCode Must be a valid pointer to an error code value, 1175 * which must not indicate a failure before the function call. 1176 * @return The pointer to destination buffer. 1177 * @stable ICU 2.0 1178 */ 1179 U_STABLE wchar_t* U_EXPORT2 1180 u_strToWCS(wchar_t *dest, 1181 int32_t destCapacity, 1182 int32_t *pDestLength, 1183 const UChar *src, 1184 int32_t srcLength, 1185 UErrorCode *pErrorCode); 1186 /** 1187 * Converts a sequence of wchar_t units to UChars 1188 * 1189 * @param dest A buffer for the result string. The result will be zero-terminated if 1190 * the buffer is large enough. 1191 * @param destCapacity The size of the buffer (number of UChars). If it is 0, then 1192 * dest may be NULL and the function will only return the length of the 1193 * result without writing any of the result string (pre-flighting). 1194 * @param pDestLength A pointer to receive the number of units written to the destination. If 1195 * pDestLength!=NULL then *pDestLength is always set to the 1196 * number of output units corresponding to the transformation of 1197 * all the input units, even in case of a buffer overflow. 1198 * @param src The original source string 1199 * @param srcLength The length of the original string. If -1, then src must be zero-terminated. 1200 * @param pErrorCode Must be a valid pointer to an error code value, 1201 * which must not indicate a failure before the function call. 1202 * @return The pointer to destination buffer. 1203 * @stable ICU 2.0 1204 */ 1205 U_STABLE UChar* U_EXPORT2 1206 u_strFromWCS(UChar *dest, 1207 int32_t destCapacity, 1208 int32_t *pDestLength, 1209 const wchar_t *src, 1210 int32_t srcLength, 1211 UErrorCode *pErrorCode); 1212 /** 1213 * Converts a sequence of UChars (UTF-16) to UTF-8 bytes 1214 * 1215 * @param dest A buffer for the result string. The result will be zero-terminated if 1216 * the buffer is large enough. 1217 * @param destCapacity The size of the buffer (number of chars). If it is 0, then 1218 * dest may be NULL and the function will only return the length of the 1219 * result without writing any of the result string (pre-flighting). 1220 * @param pDestLength A pointer to receive the number of units written to the destination. If 1221 * pDestLength!=NULL then *pDestLength is always set to the 1222 * number of output units corresponding to the transformation of 1223 * all the input units, even in case of a buffer overflow. 1224 * @param src The original source string 1225 * @param srcLength The length of the original string. If -1, then src must be zero-terminated. 1226 * @param pErrorCode Must be a valid pointer to an error code value, 1227 * which must not indicate a failure before the function call. 1228 * @return The pointer to destination buffer. 1229 * @stable ICU 2.0 1230 */ 1231 U_STABLE char* U_EXPORT2 1232 u_strToUTF8(char *dest, 1233 int32_t destCapacity, 1234 int32_t *pDestLength, 1235 const UChar *src, 1236 int32_t srcLength, 1237 UErrorCode *pErrorCode); 1238 1239 /** 1240 * Converts a sequence of UTF-8 bytes to UChars (UTF-16). 1241 * 1242 * @param dest A buffer for the result string. The result will be zero-terminated if 1243 * the buffer is large enough. 1244 * @param destCapacity The size of the buffer (number of UChars). If it is 0, then 1245 * dest may be NULL and the function will only return the length of the 1246 * result without writing any of the result string (pre-flighting). 1247 * @param pDestLength A pointer to receive the number of units written to the destination. If 1248 * pDestLength!=NULL then *pDestLength is always set to the 1249 * number of output units corresponding to the transformation of 1250 * all the input units, even in case of a buffer overflow. 1251 * @param src The original source string 1252 * @param srcLength The length of the original string. If -1, then src must be zero-terminated. 1253 * @param pErrorCode Must be a valid pointer to an error code value, 1254 * which must not indicate a failure before the function call. 1255 * @return The pointer to destination buffer. 1256 * @stable ICU 2.0 1257 */ 1258 U_STABLE UChar* U_EXPORT2 1259 u_strFromUTF8(UChar *dest, 1260 int32_t destCapacity, 1261 int32_t *pDestLength, 1262 const char *src, 1263 int32_t srcLength, 1264 UErrorCode *pErrorCode); 1265 1266 /** 1267 * Converts a sequence of UChars (UTF-16) to UTF32 units. 1268 * 1269 * @param dest A buffer for the result string. The result will be zero-terminated if 1270 * the buffer is large enough. 1271 * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then 1272 * dest may be NULL and the function will only return the length of the 1273 * result without writing any of the result string (pre-flighting). 1274 * @param pDestLength A pointer to receive the number of units written to the destination. If 1275 * pDestLength!=NULL then *pDestLength is always set to the 1276 * number of output units corresponding to the transformation of 1277 * all the input units, even in case of a buffer overflow. 1278 * @param src The original source string 1279 * @param srcLength The length of the original string. If -1, then src must be zero-terminated. 1280 * @param pErrorCode Must be a valid pointer to an error code value, 1281 * which must not indicate a failure before the function call. 1282 * @return The pointer to destination buffer. 1283 * @stable ICU 2.0 1284 */ 1285 U_STABLE UChar32* U_EXPORT2 1286 u_strToUTF32(UChar32 *dest, 1287 int32_t destCapacity, 1288 int32_t *pDestLength, 1289 const UChar *src, 1290 int32_t srcLength, 1291 UErrorCode *pErrorCode); 1292 1293 /** 1294 * Converts a sequence of UTF32 units to UChars (UTF-16) 1295 * 1296 * @param dest A buffer for the result string. The result will be zero-terminated if 1297 * the buffer is large enough. 1298 * @param destCapacity The size of the buffer (number of UChars). If it is 0, then 1299 * dest may be NULL and the function will only return the length of the 1300 * result without writing any of the result string (pre-flighting). 1301 * @param pDestLength A pointer to receive the number of units written to the destination. If 1302 * pDestLength!=NULL then *pDestLength is always set to the 1303 * number of output units corresponding to the transformation of 1304 * all the input units, even in case of a buffer overflow. 1305 * @param src The original source string 1306 * @param srcLength The length of the original string. If -1, then src must be zero-terminated. 1307 * @param pErrorCode Must be a valid pointer to an error code value, 1308 * which must not indicate a failure before the function call. 1309 * @return The pointer to destination buffer. 1310 * @stable ICU 2.0 1311 */ 1312 U_STABLE UChar* U_EXPORT2 1313 u_strFromUTF32(UChar *dest, 1314 int32_t destCapacity, 1315 int32_t *pDestLength, 1316 const UChar32 *src, 1317 int32_t srcLength, 1318 UErrorCode *pErrorCode); 1319 1320 #endif 1321