1 /* 2 ********************************************************************** 3 * Copyright (C) 1998-2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * 7 * File unistr.h 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 09/25/98 stephen Creation. 13 * 11/11/98 stephen Changed per 11/9 code review. 14 * 04/20/99 stephen Overhauled per 4/16 code review. 15 * 11/18/99 aliu Made to inherit from Replaceable. Added method 16 * handleReplaceBetween(); other methods unchanged. 17 * 06/25/01 grhoten Remove dependency on iostream. 18 ****************************************************************************** 19 */ 20 21 #ifndef UNISTR_H 22 #define UNISTR_H 23 24 /** 25 * \file 26 * \brief C++ API: Unicode String 27 */ 28 29 #include "unicode/utypes.h" 30 #include "unicode/rep.h" 31 #include "unicode/std_string.h" 32 #include "unicode/stringpiece.h" 33 #include "unicode/bytestream.h" 34 #include "unicode/ucasemap.h" 35 36 struct UConverter; // unicode/ucnv.h 37 class StringThreadTest; 38 39 #ifndef U_COMPARE_CODE_POINT_ORDER 40 /* see also ustring.h and unorm.h */ 41 /** 42 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: 43 * Compare strings in code point order instead of code unit order. 44 * @stable ICU 2.2 45 */ 46 #define U_COMPARE_CODE_POINT_ORDER 0x8000 47 #endif 48 49 #ifndef USTRING_H 50 /** 51 * \ingroup ustring_ustrlen 52 */ 53 U_STABLE int32_t U_EXPORT2 54 u_strlen(const UChar *s); 55 #endif 56 57 #ifndef U_HIDE_INTERNAL_API 58 /** 59 * \def U_STRING_CASE_MAPPER_DEFINED 60 * @internal 61 */ 62 63 #ifndef U_STRING_CASE_MAPPER_DEFINED 64 #define U_STRING_CASE_MAPPER_DEFINED 65 66 /** 67 * Internal string case mapping function type. 68 * @internal 69 */ 70 typedef int32_t U_CALLCONV 71 UStringCaseMapper(const UCaseMap *csm, 72 UChar *dest, int32_t destCapacity, 73 const UChar *src, int32_t srcLength, 74 UErrorCode *pErrorCode); 75 76 #endif 77 #endif /* U_HIDE_INTERNAL_API */ 78 79 U_NAMESPACE_BEGIN 80 81 class BreakIterator; // unicode/brkiter.h 82 class Locale; // unicode/locid.h 83 class StringCharacterIterator; 84 class UnicodeStringAppendable; // unicode/appendable.h 85 86 /* The <iostream> include has been moved to unicode/ustream.h */ 87 88 /** 89 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 90 * which constructs a Unicode string from an invariant-character char * string. 91 * About invariant characters see utypes.h. 92 * This constructor has no runtime dependency on conversion code and is 93 * therefore recommended over ones taking a charset name string 94 * (where the empty string "" indicates invariant-character conversion). 95 * 96 * @stable ICU 3.2 97 */ 98 #define US_INV icu::UnicodeString::kInvariant 99 100 /** 101 * Unicode String literals in C++. 102 * Dependent on the platform properties, different UnicodeString 103 * constructors should be used to create a UnicodeString object from 104 * a string literal. 105 * The macros are defined for maximum performance. 106 * They work only for strings that contain "invariant characters", i.e., 107 * only latin letters, digits, and some punctuation. 108 * See utypes.h for details. 109 * 110 * The string parameter must be a C string literal. 111 * The length of the string, not including the terminating 112 * <code>NUL</code>, must be specified as a constant. 113 * The U_STRING_DECL macro should be invoked exactly once for one 114 * such string variable before it is used. 115 * @stable ICU 2.0 116 */ 117 #if defined(U_DECLARE_UTF16) 118 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) 119 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) 120 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length) 121 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY 122 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length) 123 #else 124 # define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV) 125 #endif 126 127 /** 128 * Unicode String literals in C++. 129 * Dependent on the platform properties, different UnicodeString 130 * constructors should be used to create a UnicodeString object from 131 * a string literal. 132 * The macros are defined for improved performance. 133 * They work only for strings that contain "invariant characters", i.e., 134 * only latin letters, digits, and some punctuation. 135 * See utypes.h for details. 136 * 137 * The string parameter must be a C string literal. 138 * @stable ICU 2.0 139 */ 140 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 141 142 /** 143 * \def UNISTR_FROM_CHAR_EXPLICIT 144 * This can be defined to be empty or "explicit". 145 * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32) 146 * constructors are marked as explicit, preventing their inadvertent use. 147 * @stable ICU 49 148 */ 149 #ifndef UNISTR_FROM_CHAR_EXPLICIT 150 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 151 // Auto-"explicit" in ICU library code. 152 # define UNISTR_FROM_CHAR_EXPLICIT explicit 153 # else 154 // Empty by default for source code compatibility. 155 # define UNISTR_FROM_CHAR_EXPLICIT 156 # endif 157 #endif 158 159 /** 160 * \def UNISTR_FROM_STRING_EXPLICIT 161 * This can be defined to be empty or "explicit". 162 * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *) 163 * constructors are marked as explicit, preventing their inadvertent use. 164 * 165 * In particular, this helps prevent accidentally depending on ICU conversion code 166 * by passing a string literal into an API with a const UnicodeString & parameter. 167 * @stable ICU 49 168 */ 169 #ifndef UNISTR_FROM_STRING_EXPLICIT 170 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 171 // Auto-"explicit" in ICU library code. 172 # define UNISTR_FROM_STRING_EXPLICIT explicit 173 # else 174 // Empty by default for source code compatibility. 175 # define UNISTR_FROM_STRING_EXPLICIT 176 # endif 177 #endif 178 179 /** 180 * UnicodeString is a string class that stores Unicode characters directly and provides 181 * similar functionality as the Java String and StringBuffer classes. 182 * It is a concrete implementation of the abstract class Replaceable (for transliteration). 183 * 184 * The UnicodeString class is not suitable for subclassing. 185 * 186 * <p>For an overview of Unicode strings in C and C++ see the 187 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p> 188 * 189 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>. 190 * A Unicode character may be stored with either one code unit 191 * (the most common case) or with a matched pair of special code units 192 * ("surrogates"). The data type for code units is UChar. 193 * For single-character handling, a Unicode character code <em>point</em> is a value 194 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p> 195 * 196 * <p>Indexes and offsets into and lengths of strings always count code units, not code points. 197 * This is the same as with multi-byte char* strings in traditional string handling. 198 * Operations on partial strings typically do not test for code point boundaries. 199 * If necessary, the user needs to take care of such boundaries by testing for the code unit 200 * values or by using functions like 201 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() 202 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p> 203 * 204 * UnicodeString methods are more lenient with regard to input parameter values 205 * than other ICU APIs. In particular: 206 * - If indexes are out of bounds for a UnicodeString object 207 * (<0 or >length()) then they are "pinned" to the nearest boundary. 208 * - If primitive string pointer values (e.g., const UChar * or char *) 209 * for input strings are NULL, then those input string parameters are treated 210 * as if they pointed to an empty string. 211 * However, this is <em>not</em> the case for char * parameters for charset names 212 * or other IDs. 213 * - Most UnicodeString methods do not take a UErrorCode parameter because 214 * there are usually very few opportunities for failure other than a shortage 215 * of memory, error codes in low-level C++ string methods would be inconvenient, 216 * and the error code as the last parameter (ICU convention) would prevent 217 * the use of default parameter values. 218 * Instead, such methods set the UnicodeString into a "bogus" state 219 * (see isBogus()) if an error occurs. 220 * 221 * In string comparisons, two UnicodeString objects that are both "bogus" 222 * compare equal (to be transitive and prevent endless loops in sorting), 223 * and a "bogus" string compares less than any non-"bogus" one. 224 * 225 * Const UnicodeString methods are thread-safe. Multiple threads can use 226 * const methods on the same UnicodeString object simultaneously, 227 * but non-const methods must not be called concurrently (in multiple threads) 228 * with any other (const or non-const) methods. 229 * 230 * Similarly, const UnicodeString & parameters are thread-safe. 231 * One object may be passed in as such a parameter concurrently in multiple threads. 232 * This includes the const UnicodeString & parameters for 233 * copy construction, assignment, and cloning. 234 * 235 * <p>UnicodeString uses several storage methods. 236 * String contents can be stored inside the UnicodeString object itself, 237 * in an allocated and shared buffer, or in an outside buffer that is "aliased". 238 * Most of this is done transparently, but careful aliasing in particular provides 239 * significant performance improvements. 240 * Also, the internal buffer is accessible via special functions. 241 * For details see the 242 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p> 243 * 244 * @see utf.h 245 * @see CharacterIterator 246 * @stable ICU 2.0 247 */ 248 class U_COMMON_API UnicodeString : public Replaceable 249 { 250 public: 251 252 /** 253 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 254 * which constructs a Unicode string from an invariant-character char * string. 255 * Use the macro US_INV instead of the full qualification for this value. 256 * 257 * @see US_INV 258 * @stable ICU 3.2 259 */ 260 enum EInvariant { 261 /** 262 * @see EInvariant 263 * @stable ICU 3.2 264 */ 265 kInvariant 266 }; 267 268 //======================================== 269 // Read-only operations 270 //======================================== 271 272 /* Comparison - bitwise only - for international comparison use collation */ 273 274 /** 275 * Equality operator. Performs only bitwise comparison. 276 * @param text The UnicodeString to compare to this one. 277 * @return TRUE if <TT>text</TT> contains the same characters as this one, 278 * FALSE otherwise. 279 * @stable ICU 2.0 280 */ 281 inline UBool operator== (const UnicodeString& text) const; 282 283 /** 284 * Inequality operator. Performs only bitwise comparison. 285 * @param text The UnicodeString to compare to this one. 286 * @return FALSE if <TT>text</TT> contains the same characters as this one, 287 * TRUE otherwise. 288 * @stable ICU 2.0 289 */ 290 inline UBool operator!= (const UnicodeString& text) const; 291 292 /** 293 * Greater than operator. Performs only bitwise comparison. 294 * @param text The UnicodeString to compare to this one. 295 * @return TRUE if the characters in this are bitwise 296 * greater than the characters in <code>text</code>, FALSE otherwise 297 * @stable ICU 2.0 298 */ 299 inline UBool operator> (const UnicodeString& text) const; 300 301 /** 302 * Less than operator. Performs only bitwise comparison. 303 * @param text The UnicodeString to compare to this one. 304 * @return TRUE if the characters in this are bitwise 305 * less than the characters in <code>text</code>, FALSE otherwise 306 * @stable ICU 2.0 307 */ 308 inline UBool operator< (const UnicodeString& text) const; 309 310 /** 311 * Greater than or equal operator. Performs only bitwise comparison. 312 * @param text The UnicodeString to compare to this one. 313 * @return TRUE if the characters in this are bitwise 314 * greater than or equal to the characters in <code>text</code>, FALSE otherwise 315 * @stable ICU 2.0 316 */ 317 inline UBool operator>= (const UnicodeString& text) const; 318 319 /** 320 * Less than or equal operator. Performs only bitwise comparison. 321 * @param text The UnicodeString to compare to this one. 322 * @return TRUE if the characters in this are bitwise 323 * less than or equal to the characters in <code>text</code>, FALSE otherwise 324 * @stable ICU 2.0 325 */ 326 inline UBool operator<= (const UnicodeString& text) const; 327 328 /** 329 * Compare the characters bitwise in this UnicodeString to 330 * the characters in <code>text</code>. 331 * @param text The UnicodeString to compare to this one. 332 * @return The result of bitwise character comparison: 0 if this 333 * contains the same characters as <code>text</code>, -1 if the characters in 334 * this are bitwise less than the characters in <code>text</code>, +1 if the 335 * characters in this are bitwise greater than the characters 336 * in <code>text</code>. 337 * @stable ICU 2.0 338 */ 339 inline int8_t compare(const UnicodeString& text) const; 340 341 /** 342 * Compare the characters bitwise in the range 343 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 344 * in <TT>text</TT> 345 * @param start the offset at which the compare operation begins 346 * @param length the number of characters of text to compare. 347 * @param text the other text to be compared against this string. 348 * @return The result of bitwise character comparison: 0 if this 349 * contains the same characters as <code>text</code>, -1 if the characters in 350 * this are bitwise less than the characters in <code>text</code>, +1 if the 351 * characters in this are bitwise greater than the characters 352 * in <code>text</code>. 353 * @stable ICU 2.0 354 */ 355 inline int8_t compare(int32_t start, 356 int32_t length, 357 const UnicodeString& text) const; 358 359 /** 360 * Compare the characters bitwise in the range 361 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 362 * in <TT>srcText</TT> in the range 363 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 364 * @param start the offset at which the compare operation begins 365 * @param length the number of characters in this to compare. 366 * @param srcText the text to be compared 367 * @param srcStart the offset into <TT>srcText</TT> to start comparison 368 * @param srcLength the number of characters in <TT>src</TT> to compare 369 * @return The result of bitwise character comparison: 0 if this 370 * contains the same characters as <code>srcText</code>, -1 if the characters in 371 * this are bitwise less than the characters in <code>srcText</code>, +1 if the 372 * characters in this are bitwise greater than the characters 373 * in <code>srcText</code>. 374 * @stable ICU 2.0 375 */ 376 inline int8_t compare(int32_t start, 377 int32_t length, 378 const UnicodeString& srcText, 379 int32_t srcStart, 380 int32_t srcLength) const; 381 382 /** 383 * Compare the characters bitwise in this UnicodeString with the first 384 * <TT>srcLength</TT> characters in <TT>srcChars</TT>. 385 * @param srcChars The characters to compare to this UnicodeString. 386 * @param srcLength the number of characters in <TT>srcChars</TT> to compare 387 * @return The result of bitwise character comparison: 0 if this 388 * contains the same characters as <code>srcChars</code>, -1 if the characters in 389 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 390 * characters in this are bitwise greater than the characters 391 * in <code>srcChars</code>. 392 * @stable ICU 2.0 393 */ 394 inline int8_t compare(const UChar *srcChars, 395 int32_t srcLength) const; 396 397 /** 398 * Compare the characters bitwise in the range 399 * [<TT>start</TT>, <TT>start + length</TT>) with the first 400 * <TT>length</TT> characters in <TT>srcChars</TT> 401 * @param start the offset at which the compare operation begins 402 * @param length the number of characters to compare. 403 * @param srcChars the characters to be compared 404 * @return The result of bitwise character comparison: 0 if this 405 * contains the same characters as <code>srcChars</code>, -1 if the characters in 406 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 407 * characters in this are bitwise greater than the characters 408 * in <code>srcChars</code>. 409 * @stable ICU 2.0 410 */ 411 inline int8_t compare(int32_t start, 412 int32_t length, 413 const UChar *srcChars) const; 414 415 /** 416 * Compare the characters bitwise in the range 417 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 418 * in <TT>srcChars</TT> in the range 419 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 420 * @param start the offset at which the compare operation begins 421 * @param length the number of characters in this to compare 422 * @param srcChars the characters to be compared 423 * @param srcStart the offset into <TT>srcChars</TT> to start comparison 424 * @param srcLength the number of characters in <TT>srcChars</TT> to compare 425 * @return The result of bitwise character comparison: 0 if this 426 * contains the same characters as <code>srcChars</code>, -1 if the characters in 427 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 428 * characters in this are bitwise greater than the characters 429 * in <code>srcChars</code>. 430 * @stable ICU 2.0 431 */ 432 inline int8_t compare(int32_t start, 433 int32_t length, 434 const UChar *srcChars, 435 int32_t srcStart, 436 int32_t srcLength) const; 437 438 /** 439 * Compare the characters bitwise in the range 440 * [<TT>start</TT>, <TT>limit</TT>) with the characters 441 * in <TT>srcText</TT> in the range 442 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). 443 * @param start the offset at which the compare operation begins 444 * @param limit the offset immediately following the compare operation 445 * @param srcText the text to be compared 446 * @param srcStart the offset into <TT>srcText</TT> to start comparison 447 * @param srcLimit the offset into <TT>srcText</TT> to limit comparison 448 * @return The result of bitwise character comparison: 0 if this 449 * contains the same characters as <code>srcText</code>, -1 if the characters in 450 * this are bitwise less than the characters in <code>srcText</code>, +1 if the 451 * characters in this are bitwise greater than the characters 452 * in <code>srcText</code>. 453 * @stable ICU 2.0 454 */ 455 inline int8_t compareBetween(int32_t start, 456 int32_t limit, 457 const UnicodeString& srcText, 458 int32_t srcStart, 459 int32_t srcLimit) const; 460 461 /** 462 * Compare two Unicode strings in code point order. 463 * The result may be different from the results of compare(), operator<, etc. 464 * if supplementary characters are present: 465 * 466 * In UTF-16, supplementary characters (with code points U+10000 and above) are 467 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 468 * which means that they compare as less than some other BMP characters like U+feff. 469 * This function compares Unicode strings in code point order. 470 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 471 * 472 * @param text Another string to compare this one to. 473 * @return a negative/zero/positive integer corresponding to whether 474 * this string is less than/equal to/greater than the second one 475 * in code point order 476 * @stable ICU 2.0 477 */ 478 inline int8_t compareCodePointOrder(const UnicodeString& text) const; 479 480 /** 481 * Compare two Unicode strings in code point order. 482 * The result may be different from the results of compare(), operator<, etc. 483 * if supplementary characters are present: 484 * 485 * In UTF-16, supplementary characters (with code points U+10000 and above) are 486 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 487 * which means that they compare as less than some other BMP characters like U+feff. 488 * This function compares Unicode strings in code point order. 489 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 490 * 491 * @param start The start offset in this string at which the compare operation begins. 492 * @param length The number of code units from this string to compare. 493 * @param srcText Another string to compare this one to. 494 * @return a negative/zero/positive integer corresponding to whether 495 * this string is less than/equal to/greater than the second one 496 * in code point order 497 * @stable ICU 2.0 498 */ 499 inline int8_t compareCodePointOrder(int32_t start, 500 int32_t length, 501 const UnicodeString& srcText) const; 502 503 /** 504 * Compare two Unicode strings in code point order. 505 * The result may be different from the results of compare(), operator<, etc. 506 * if supplementary characters are present: 507 * 508 * In UTF-16, supplementary characters (with code points U+10000 and above) are 509 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 510 * which means that they compare as less than some other BMP characters like U+feff. 511 * This function compares Unicode strings in code point order. 512 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 513 * 514 * @param start The start offset in this string at which the compare operation begins. 515 * @param length The number of code units from this string to compare. 516 * @param srcText Another string to compare this one to. 517 * @param srcStart The start offset in that string at which the compare operation begins. 518 * @param srcLength The number of code units from that string to compare. 519 * @return a negative/zero/positive integer corresponding to whether 520 * this string is less than/equal to/greater than the second one 521 * in code point order 522 * @stable ICU 2.0 523 */ 524 inline int8_t compareCodePointOrder(int32_t start, 525 int32_t length, 526 const UnicodeString& srcText, 527 int32_t srcStart, 528 int32_t srcLength) const; 529 530 /** 531 * Compare two Unicode strings in code point order. 532 * The result may be different from the results of compare(), operator<, etc. 533 * if supplementary characters are present: 534 * 535 * In UTF-16, supplementary characters (with code points U+10000 and above) are 536 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 537 * which means that they compare as less than some other BMP characters like U+feff. 538 * This function compares Unicode strings in code point order. 539 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 540 * 541 * @param srcChars A pointer to another string to compare this one to. 542 * @param srcLength The number of code units from that string to compare. 543 * @return a negative/zero/positive integer corresponding to whether 544 * this string is less than/equal to/greater than the second one 545 * in code point order 546 * @stable ICU 2.0 547 */ 548 inline int8_t compareCodePointOrder(const UChar *srcChars, 549 int32_t srcLength) const; 550 551 /** 552 * Compare two Unicode strings in code point order. 553 * The result may be different from the results of compare(), operator<, etc. 554 * if supplementary characters are present: 555 * 556 * In UTF-16, supplementary characters (with code points U+10000 and above) are 557 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 558 * which means that they compare as less than some other BMP characters like U+feff. 559 * This function compares Unicode strings in code point order. 560 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 561 * 562 * @param start The start offset in this string at which the compare operation begins. 563 * @param length The number of code units from this string to compare. 564 * @param srcChars A pointer to another string to compare this one to. 565 * @return a negative/zero/positive integer corresponding to whether 566 * this string is less than/equal to/greater than the second one 567 * in code point order 568 * @stable ICU 2.0 569 */ 570 inline int8_t compareCodePointOrder(int32_t start, 571 int32_t length, 572 const UChar *srcChars) const; 573 574 /** 575 * Compare two Unicode strings in code point order. 576 * The result may be different from the results of compare(), operator<, etc. 577 * if supplementary characters are present: 578 * 579 * In UTF-16, supplementary characters (with code points U+10000 and above) are 580 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 581 * which means that they compare as less than some other BMP characters like U+feff. 582 * This function compares Unicode strings in code point order. 583 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 584 * 585 * @param start The start offset in this string at which the compare operation begins. 586 * @param length The number of code units from this string to compare. 587 * @param srcChars A pointer to another string to compare this one to. 588 * @param srcStart The start offset in that string at which the compare operation begins. 589 * @param srcLength The number of code units from that string to compare. 590 * @return a negative/zero/positive integer corresponding to whether 591 * this string is less than/equal to/greater than the second one 592 * in code point order 593 * @stable ICU 2.0 594 */ 595 inline int8_t compareCodePointOrder(int32_t start, 596 int32_t length, 597 const UChar *srcChars, 598 int32_t srcStart, 599 int32_t srcLength) const; 600 601 /** 602 * Compare two Unicode strings in code point order. 603 * The result may be different from the results of compare(), operator<, etc. 604 * if supplementary characters are present: 605 * 606 * In UTF-16, supplementary characters (with code points U+10000 and above) are 607 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 608 * which means that they compare as less than some other BMP characters like U+feff. 609 * This function compares Unicode strings in code point order. 610 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 611 * 612 * @param start The start offset in this string at which the compare operation begins. 613 * @param limit The offset after the last code unit from this string to compare. 614 * @param srcText Another string to compare this one to. 615 * @param srcStart The start offset in that string at which the compare operation begins. 616 * @param srcLimit The offset after the last code unit from that string to compare. 617 * @return a negative/zero/positive integer corresponding to whether 618 * this string is less than/equal to/greater than the second one 619 * in code point order 620 * @stable ICU 2.0 621 */ 622 inline int8_t compareCodePointOrderBetween(int32_t start, 623 int32_t limit, 624 const UnicodeString& srcText, 625 int32_t srcStart, 626 int32_t srcLimit) const; 627 628 /** 629 * Compare two strings case-insensitively using full case folding. 630 * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). 631 * 632 * @param text Another string to compare this one to. 633 * @param options A bit set of options: 634 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 635 * Comparison in code unit order with default case folding. 636 * 637 * - U_COMPARE_CODE_POINT_ORDER 638 * Set to choose code point order instead of code unit order 639 * (see u_strCompare for details). 640 * 641 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 642 * 643 * @return A negative, zero, or positive integer indicating the comparison result. 644 * @stable ICU 2.0 645 */ 646 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; 647 648 /** 649 * Compare two strings case-insensitively using full case folding. 650 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 651 * 652 * @param start The start offset in this string at which the compare operation begins. 653 * @param length The number of code units from this string to compare. 654 * @param srcText Another string to compare this one to. 655 * @param options A bit set of options: 656 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 657 * Comparison in code unit order with default case folding. 658 * 659 * - U_COMPARE_CODE_POINT_ORDER 660 * Set to choose code point order instead of code unit order 661 * (see u_strCompare for details). 662 * 663 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 664 * 665 * @return A negative, zero, or positive integer indicating the comparison result. 666 * @stable ICU 2.0 667 */ 668 inline int8_t caseCompare(int32_t start, 669 int32_t length, 670 const UnicodeString& srcText, 671 uint32_t options) const; 672 673 /** 674 * Compare two strings case-insensitively using full case folding. 675 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 676 * 677 * @param start The start offset in this string at which the compare operation begins. 678 * @param length The number of code units from this string to compare. 679 * @param srcText Another string to compare this one to. 680 * @param srcStart The start offset in that string at which the compare operation begins. 681 * @param srcLength The number of code units from that string to compare. 682 * @param options A bit set of options: 683 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 684 * Comparison in code unit order with default case folding. 685 * 686 * - U_COMPARE_CODE_POINT_ORDER 687 * Set to choose code point order instead of code unit order 688 * (see u_strCompare for details). 689 * 690 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 691 * 692 * @return A negative, zero, or positive integer indicating the comparison result. 693 * @stable ICU 2.0 694 */ 695 inline int8_t caseCompare(int32_t start, 696 int32_t length, 697 const UnicodeString& srcText, 698 int32_t srcStart, 699 int32_t srcLength, 700 uint32_t options) const; 701 702 /** 703 * Compare two strings case-insensitively using full case folding. 704 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 705 * 706 * @param srcChars A pointer to another string to compare this one to. 707 * @param srcLength The number of code units from that string to compare. 708 * @param options A bit set of options: 709 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 710 * Comparison in code unit order with default case folding. 711 * 712 * - U_COMPARE_CODE_POINT_ORDER 713 * Set to choose code point order instead of code unit order 714 * (see u_strCompare for details). 715 * 716 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 717 * 718 * @return A negative, zero, or positive integer indicating the comparison result. 719 * @stable ICU 2.0 720 */ 721 inline int8_t caseCompare(const UChar *srcChars, 722 int32_t srcLength, 723 uint32_t options) const; 724 725 /** 726 * Compare two strings case-insensitively using full case folding. 727 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 728 * 729 * @param start The start offset in this string at which the compare operation begins. 730 * @param length The number of code units from this string to compare. 731 * @param srcChars A pointer to another string to compare this one to. 732 * @param options A bit set of options: 733 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 734 * Comparison in code unit order with default case folding. 735 * 736 * - U_COMPARE_CODE_POINT_ORDER 737 * Set to choose code point order instead of code unit order 738 * (see u_strCompare for details). 739 * 740 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 741 * 742 * @return A negative, zero, or positive integer indicating the comparison result. 743 * @stable ICU 2.0 744 */ 745 inline int8_t caseCompare(int32_t start, 746 int32_t length, 747 const UChar *srcChars, 748 uint32_t options) const; 749 750 /** 751 * Compare two strings case-insensitively using full case folding. 752 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 753 * 754 * @param start The start offset in this string at which the compare operation begins. 755 * @param length The number of code units from this string to compare. 756 * @param srcChars A pointer to another string to compare this one to. 757 * @param srcStart The start offset in that string at which the compare operation begins. 758 * @param srcLength The number of code units from that string to compare. 759 * @param options A bit set of options: 760 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 761 * Comparison in code unit order with default case folding. 762 * 763 * - U_COMPARE_CODE_POINT_ORDER 764 * Set to choose code point order instead of code unit order 765 * (see u_strCompare for details). 766 * 767 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 768 * 769 * @return A negative, zero, or positive integer indicating the comparison result. 770 * @stable ICU 2.0 771 */ 772 inline int8_t caseCompare(int32_t start, 773 int32_t length, 774 const UChar *srcChars, 775 int32_t srcStart, 776 int32_t srcLength, 777 uint32_t options) const; 778 779 /** 780 * Compare two strings case-insensitively using full case folding. 781 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). 782 * 783 * @param start The start offset in this string at which the compare operation begins. 784 * @param limit The offset after the last code unit from this string to compare. 785 * @param srcText Another string to compare this one to. 786 * @param srcStart The start offset in that string at which the compare operation begins. 787 * @param srcLimit The offset after the last code unit from that string to compare. 788 * @param options A bit set of options: 789 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 790 * Comparison in code unit order with default case folding. 791 * 792 * - U_COMPARE_CODE_POINT_ORDER 793 * Set to choose code point order instead of code unit order 794 * (see u_strCompare for details). 795 * 796 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 797 * 798 * @return A negative, zero, or positive integer indicating the comparison result. 799 * @stable ICU 2.0 800 */ 801 inline int8_t caseCompareBetween(int32_t start, 802 int32_t limit, 803 const UnicodeString& srcText, 804 int32_t srcStart, 805 int32_t srcLimit, 806 uint32_t options) const; 807 808 /** 809 * Determine if this starts with the characters in <TT>text</TT> 810 * @param text The text to match. 811 * @return TRUE if this starts with the characters in <TT>text</TT>, 812 * FALSE otherwise 813 * @stable ICU 2.0 814 */ 815 inline UBool startsWith(const UnicodeString& text) const; 816 817 /** 818 * Determine if this starts with the characters in <TT>srcText</TT> 819 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 820 * @param srcText The text to match. 821 * @param srcStart the offset into <TT>srcText</TT> to start matching 822 * @param srcLength the number of characters in <TT>srcText</TT> to match 823 * @return TRUE if this starts with the characters in <TT>text</TT>, 824 * FALSE otherwise 825 * @stable ICU 2.0 826 */ 827 inline UBool startsWith(const UnicodeString& srcText, 828 int32_t srcStart, 829 int32_t srcLength) const; 830 831 /** 832 * Determine if this starts with the characters in <TT>srcChars</TT> 833 * @param srcChars The characters to match. 834 * @param srcLength the number of characters in <TT>srcChars</TT> 835 * @return TRUE if this starts with the characters in <TT>srcChars</TT>, 836 * FALSE otherwise 837 * @stable ICU 2.0 838 */ 839 inline UBool startsWith(const UChar *srcChars, 840 int32_t srcLength) const; 841 842 /** 843 * Determine if this ends with the characters in <TT>srcChars</TT> 844 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 845 * @param srcChars The characters to match. 846 * @param srcStart the offset into <TT>srcText</TT> to start matching 847 * @param srcLength the number of characters in <TT>srcChars</TT> to match 848 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise 849 * @stable ICU 2.0 850 */ 851 inline UBool startsWith(const UChar *srcChars, 852 int32_t srcStart, 853 int32_t srcLength) const; 854 855 /** 856 * Determine if this ends with the characters in <TT>text</TT> 857 * @param text The text to match. 858 * @return TRUE if this ends with the characters in <TT>text</TT>, 859 * FALSE otherwise 860 * @stable ICU 2.0 861 */ 862 inline UBool endsWith(const UnicodeString& text) const; 863 864 /** 865 * Determine if this ends with the characters in <TT>srcText</TT> 866 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 867 * @param srcText The text to match. 868 * @param srcStart the offset into <TT>srcText</TT> to start matching 869 * @param srcLength the number of characters in <TT>srcText</TT> to match 870 * @return TRUE if this ends with the characters in <TT>text</TT>, 871 * FALSE otherwise 872 * @stable ICU 2.0 873 */ 874 inline UBool endsWith(const UnicodeString& srcText, 875 int32_t srcStart, 876 int32_t srcLength) const; 877 878 /** 879 * Determine if this ends with the characters in <TT>srcChars</TT> 880 * @param srcChars The characters to match. 881 * @param srcLength the number of characters in <TT>srcChars</TT> 882 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 883 * FALSE otherwise 884 * @stable ICU 2.0 885 */ 886 inline UBool endsWith(const UChar *srcChars, 887 int32_t srcLength) const; 888 889 /** 890 * Determine if this ends with the characters in <TT>srcChars</TT> 891 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 892 * @param srcChars The characters to match. 893 * @param srcStart the offset into <TT>srcText</TT> to start matching 894 * @param srcLength the number of characters in <TT>srcChars</TT> to match 895 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 896 * FALSE otherwise 897 * @stable ICU 2.0 898 */ 899 inline UBool endsWith(const UChar *srcChars, 900 int32_t srcStart, 901 int32_t srcLength) const; 902 903 904 /* Searching - bitwise only */ 905 906 /** 907 * Locate in this the first occurrence of the characters in <TT>text</TT>, 908 * using bitwise comparison. 909 * @param text The text to search for. 910 * @return The offset into this of the start of <TT>text</TT>, 911 * or -1 if not found. 912 * @stable ICU 2.0 913 */ 914 inline int32_t indexOf(const UnicodeString& text) const; 915 916 /** 917 * Locate in this the first occurrence of the characters in <TT>text</TT> 918 * starting at offset <TT>start</TT>, using bitwise comparison. 919 * @param text The text to search for. 920 * @param start The offset at which searching will start. 921 * @return The offset into this of the start of <TT>text</TT>, 922 * or -1 if not found. 923 * @stable ICU 2.0 924 */ 925 inline int32_t indexOf(const UnicodeString& text, 926 int32_t start) const; 927 928 /** 929 * Locate in this the first occurrence in the range 930 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 931 * in <TT>text</TT>, using bitwise comparison. 932 * @param text The text to search for. 933 * @param start The offset at which searching will start. 934 * @param length The number of characters to search 935 * @return The offset into this of the start of <TT>text</TT>, 936 * or -1 if not found. 937 * @stable ICU 2.0 938 */ 939 inline int32_t indexOf(const UnicodeString& text, 940 int32_t start, 941 int32_t length) const; 942 943 /** 944 * Locate in this the first occurrence in the range 945 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 946 * in <TT>srcText</TT> in the range 947 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 948 * using bitwise comparison. 949 * @param srcText The text to search for. 950 * @param srcStart the offset into <TT>srcText</TT> at which 951 * to start matching 952 * @param srcLength the number of characters in <TT>srcText</TT> to match 953 * @param start the offset into this at which to start matching 954 * @param length the number of characters in this to search 955 * @return The offset into this of the start of <TT>text</TT>, 956 * or -1 if not found. 957 * @stable ICU 2.0 958 */ 959 inline int32_t indexOf(const UnicodeString& srcText, 960 int32_t srcStart, 961 int32_t srcLength, 962 int32_t start, 963 int32_t length) const; 964 965 /** 966 * Locate in this the first occurrence of the characters in 967 * <TT>srcChars</TT> 968 * starting at offset <TT>start</TT>, using bitwise comparison. 969 * @param srcChars The text to search for. 970 * @param srcLength the number of characters in <TT>srcChars</TT> to match 971 * @param start the offset into this at which to start matching 972 * @return The offset into this of the start of <TT>text</TT>, 973 * or -1 if not found. 974 * @stable ICU 2.0 975 */ 976 inline int32_t indexOf(const UChar *srcChars, 977 int32_t srcLength, 978 int32_t start) const; 979 980 /** 981 * Locate in this the first occurrence in the range 982 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 983 * in <TT>srcChars</TT>, using bitwise comparison. 984 * @param srcChars The text to search for. 985 * @param srcLength the number of characters in <TT>srcChars</TT> 986 * @param start The offset at which searching will start. 987 * @param length The number of characters to search 988 * @return The offset into this of the start of <TT>srcChars</TT>, 989 * or -1 if not found. 990 * @stable ICU 2.0 991 */ 992 inline int32_t indexOf(const UChar *srcChars, 993 int32_t srcLength, 994 int32_t start, 995 int32_t length) const; 996 997 /** 998 * Locate in this the first occurrence in the range 999 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1000 * in <TT>srcChars</TT> in the range 1001 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1002 * using bitwise comparison. 1003 * @param srcChars The text to search for. 1004 * @param srcStart the offset into <TT>srcChars</TT> at which 1005 * to start matching 1006 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1007 * @param start the offset into this at which to start matching 1008 * @param length the number of characters in this to search 1009 * @return The offset into this of the start of <TT>text</TT>, 1010 * or -1 if not found. 1011 * @stable ICU 2.0 1012 */ 1013 int32_t indexOf(const UChar *srcChars, 1014 int32_t srcStart, 1015 int32_t srcLength, 1016 int32_t start, 1017 int32_t length) const; 1018 1019 /** 1020 * Locate in this the first occurrence of the BMP code point <code>c</code>, 1021 * using bitwise comparison. 1022 * @param c The code unit to search for. 1023 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1024 * @stable ICU 2.0 1025 */ 1026 inline int32_t indexOf(UChar c) const; 1027 1028 /** 1029 * Locate in this the first occurrence of the code point <TT>c</TT>, 1030 * using bitwise comparison. 1031 * 1032 * @param c The code point to search for. 1033 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1034 * @stable ICU 2.0 1035 */ 1036 inline int32_t indexOf(UChar32 c) const; 1037 1038 /** 1039 * Locate in this the first occurrence of the BMP code point <code>c</code>, 1040 * starting at offset <TT>start</TT>, using bitwise comparison. 1041 * @param c The code unit to search for. 1042 * @param start The offset at which searching will start. 1043 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1044 * @stable ICU 2.0 1045 */ 1046 inline int32_t indexOf(UChar c, 1047 int32_t start) const; 1048 1049 /** 1050 * Locate in this the first occurrence of the code point <TT>c</TT> 1051 * starting at offset <TT>start</TT>, using bitwise comparison. 1052 * 1053 * @param c The code point to search for. 1054 * @param start The offset at which searching will start. 1055 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1056 * @stable ICU 2.0 1057 */ 1058 inline int32_t indexOf(UChar32 c, 1059 int32_t start) const; 1060 1061 /** 1062 * Locate in this the first occurrence of the BMP code point <code>c</code> 1063 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1064 * using bitwise comparison. 1065 * @param c The code unit to search for. 1066 * @param start the offset into this at which to start matching 1067 * @param length the number of characters in this to search 1068 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1069 * @stable ICU 2.0 1070 */ 1071 inline int32_t indexOf(UChar c, 1072 int32_t start, 1073 int32_t length) const; 1074 1075 /** 1076 * Locate in this the first occurrence of the code point <TT>c</TT> 1077 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1078 * using bitwise comparison. 1079 * 1080 * @param c The code point to search for. 1081 * @param start the offset into this at which to start matching 1082 * @param length the number of characters in this to search 1083 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1084 * @stable ICU 2.0 1085 */ 1086 inline int32_t indexOf(UChar32 c, 1087 int32_t start, 1088 int32_t length) const; 1089 1090 /** 1091 * Locate in this the last occurrence of the characters in <TT>text</TT>, 1092 * using bitwise comparison. 1093 * @param text The text to search for. 1094 * @return The offset into this of the start of <TT>text</TT>, 1095 * or -1 if not found. 1096 * @stable ICU 2.0 1097 */ 1098 inline int32_t lastIndexOf(const UnicodeString& text) const; 1099 1100 /** 1101 * Locate in this the last occurrence of the characters in <TT>text</TT> 1102 * starting at offset <TT>start</TT>, using bitwise comparison. 1103 * @param text The text to search for. 1104 * @param start The offset at which searching will start. 1105 * @return The offset into this of the start of <TT>text</TT>, 1106 * or -1 if not found. 1107 * @stable ICU 2.0 1108 */ 1109 inline int32_t lastIndexOf(const UnicodeString& text, 1110 int32_t start) const; 1111 1112 /** 1113 * Locate in this the last occurrence in the range 1114 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1115 * in <TT>text</TT>, using bitwise comparison. 1116 * @param text The text to search for. 1117 * @param start The offset at which searching will start. 1118 * @param length The number of characters to search 1119 * @return The offset into this of the start of <TT>text</TT>, 1120 * or -1 if not found. 1121 * @stable ICU 2.0 1122 */ 1123 inline int32_t lastIndexOf(const UnicodeString& text, 1124 int32_t start, 1125 int32_t length) const; 1126 1127 /** 1128 * Locate in this the last occurrence in the range 1129 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1130 * in <TT>srcText</TT> in the range 1131 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1132 * using bitwise comparison. 1133 * @param srcText The text to search for. 1134 * @param srcStart the offset into <TT>srcText</TT> at which 1135 * to start matching 1136 * @param srcLength the number of characters in <TT>srcText</TT> to match 1137 * @param start the offset into this at which to start matching 1138 * @param length the number of characters in this to search 1139 * @return The offset into this of the start of <TT>text</TT>, 1140 * or -1 if not found. 1141 * @stable ICU 2.0 1142 */ 1143 inline int32_t lastIndexOf(const UnicodeString& srcText, 1144 int32_t srcStart, 1145 int32_t srcLength, 1146 int32_t start, 1147 int32_t length) const; 1148 1149 /** 1150 * Locate in this the last occurrence of the characters in <TT>srcChars</TT> 1151 * starting at offset <TT>start</TT>, using bitwise comparison. 1152 * @param srcChars The text to search for. 1153 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1154 * @param start the offset into this at which to start matching 1155 * @return The offset into this of the start of <TT>text</TT>, 1156 * or -1 if not found. 1157 * @stable ICU 2.0 1158 */ 1159 inline int32_t lastIndexOf(const UChar *srcChars, 1160 int32_t srcLength, 1161 int32_t start) const; 1162 1163 /** 1164 * Locate in this the last occurrence in the range 1165 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1166 * in <TT>srcChars</TT>, using bitwise comparison. 1167 * @param srcChars The text to search for. 1168 * @param srcLength the number of characters in <TT>srcChars</TT> 1169 * @param start The offset at which searching will start. 1170 * @param length The number of characters to search 1171 * @return The offset into this of the start of <TT>srcChars</TT>, 1172 * or -1 if not found. 1173 * @stable ICU 2.0 1174 */ 1175 inline int32_t lastIndexOf(const UChar *srcChars, 1176 int32_t srcLength, 1177 int32_t start, 1178 int32_t length) const; 1179 1180 /** 1181 * Locate in this the last occurrence in the range 1182 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1183 * in <TT>srcChars</TT> in the range 1184 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1185 * using bitwise comparison. 1186 * @param srcChars The text to search for. 1187 * @param srcStart the offset into <TT>srcChars</TT> at which 1188 * to start matching 1189 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1190 * @param start the offset into this at which to start matching 1191 * @param length the number of characters in this to search 1192 * @return The offset into this of the start of <TT>text</TT>, 1193 * or -1 if not found. 1194 * @stable ICU 2.0 1195 */ 1196 int32_t lastIndexOf(const UChar *srcChars, 1197 int32_t srcStart, 1198 int32_t srcLength, 1199 int32_t start, 1200 int32_t length) const; 1201 1202 /** 1203 * Locate in this the last occurrence of the BMP code point <code>c</code>, 1204 * using bitwise comparison. 1205 * @param c The code unit to search for. 1206 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1207 * @stable ICU 2.0 1208 */ 1209 inline int32_t lastIndexOf(UChar c) const; 1210 1211 /** 1212 * Locate in this the last occurrence of the code point <TT>c</TT>, 1213 * using bitwise comparison. 1214 * 1215 * @param c The code point to search for. 1216 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1217 * @stable ICU 2.0 1218 */ 1219 inline int32_t lastIndexOf(UChar32 c) const; 1220 1221 /** 1222 * Locate in this the last occurrence of the BMP code point <code>c</code> 1223 * starting at offset <TT>start</TT>, using bitwise comparison. 1224 * @param c The code unit to search for. 1225 * @param start The offset at which searching will start. 1226 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1227 * @stable ICU 2.0 1228 */ 1229 inline int32_t lastIndexOf(UChar c, 1230 int32_t start) const; 1231 1232 /** 1233 * Locate in this the last occurrence of the code point <TT>c</TT> 1234 * starting at offset <TT>start</TT>, using bitwise comparison. 1235 * 1236 * @param c The code point to search for. 1237 * @param start The offset at which searching will start. 1238 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1239 * @stable ICU 2.0 1240 */ 1241 inline int32_t lastIndexOf(UChar32 c, 1242 int32_t start) const; 1243 1244 /** 1245 * Locate in this the last occurrence of the BMP code point <code>c</code> 1246 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1247 * using bitwise comparison. 1248 * @param c The code unit to search for. 1249 * @param start the offset into this at which to start matching 1250 * @param length the number of characters in this to search 1251 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1252 * @stable ICU 2.0 1253 */ 1254 inline int32_t lastIndexOf(UChar c, 1255 int32_t start, 1256 int32_t length) const; 1257 1258 /** 1259 * Locate in this the last occurrence of the code point <TT>c</TT> 1260 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1261 * using bitwise comparison. 1262 * 1263 * @param c The code point to search for. 1264 * @param start the offset into this at which to start matching 1265 * @param length the number of characters in this to search 1266 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1267 * @stable ICU 2.0 1268 */ 1269 inline int32_t lastIndexOf(UChar32 c, 1270 int32_t start, 1271 int32_t length) const; 1272 1273 1274 /* Character access */ 1275 1276 /** 1277 * Return the code unit at offset <tt>offset</tt>. 1278 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1279 * @param offset a valid offset into the text 1280 * @return the code unit at offset <tt>offset</tt> 1281 * or 0xffff if the offset is not valid for this string 1282 * @stable ICU 2.0 1283 */ 1284 inline UChar charAt(int32_t offset) const; 1285 1286 /** 1287 * Return the code unit at offset <tt>offset</tt>. 1288 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1289 * @param offset a valid offset into the text 1290 * @return the code unit at offset <tt>offset</tt> 1291 * @stable ICU 2.0 1292 */ 1293 inline UChar operator[] (int32_t offset) const; 1294 1295 /** 1296 * Return the code point that contains the code unit 1297 * at offset <tt>offset</tt>. 1298 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1299 * @param offset a valid offset into the text 1300 * that indicates the text offset of any of the code units 1301 * that will be assembled into a code point (21-bit value) and returned 1302 * @return the code point of text at <tt>offset</tt> 1303 * or 0xffff if the offset is not valid for this string 1304 * @stable ICU 2.0 1305 */ 1306 UChar32 char32At(int32_t offset) const; 1307 1308 /** 1309 * Adjust a random-access offset so that 1310 * it points to the beginning of a Unicode character. 1311 * The offset that is passed in points to 1312 * any code unit of a code point, 1313 * while the returned offset will point to the first code unit 1314 * of the same code point. 1315 * In UTF-16, if the input offset points to a second surrogate 1316 * of a surrogate pair, then the returned offset will point 1317 * to the first surrogate. 1318 * @param offset a valid offset into one code point of the text 1319 * @return offset of the first code unit of the same code point 1320 * @see U16_SET_CP_START 1321 * @stable ICU 2.0 1322 */ 1323 int32_t getChar32Start(int32_t offset) const; 1324 1325 /** 1326 * Adjust a random-access offset so that 1327 * it points behind a Unicode character. 1328 * The offset that is passed in points behind 1329 * any code unit of a code point, 1330 * while the returned offset will point behind the last code unit 1331 * of the same code point. 1332 * In UTF-16, if the input offset points behind the first surrogate 1333 * (i.e., to the second surrogate) 1334 * of a surrogate pair, then the returned offset will point 1335 * behind the second surrogate (i.e., to the first surrogate). 1336 * @param offset a valid offset after any code unit of a code point of the text 1337 * @return offset of the first code unit after the same code point 1338 * @see U16_SET_CP_LIMIT 1339 * @stable ICU 2.0 1340 */ 1341 int32_t getChar32Limit(int32_t offset) const; 1342 1343 /** 1344 * Move the code unit index along the string by delta code points. 1345 * Interpret the input index as a code unit-based offset into the string, 1346 * move the index forward or backward by delta code points, and 1347 * return the resulting index. 1348 * The input index should point to the first code unit of a code point, 1349 * if there is more than one. 1350 * 1351 * Both input and output indexes are code unit-based as for all 1352 * string indexes/offsets in ICU (and other libraries, like MBCS char*). 1353 * If delta<0 then the index is moved backward (toward the start of the string). 1354 * If delta>0 then the index is moved forward (toward the end of the string). 1355 * 1356 * This behaves like CharacterIterator::move32(delta, kCurrent). 1357 * 1358 * Behavior for out-of-bounds indexes: 1359 * <code>moveIndex32</code> pins the input index to 0..length(), i.e., 1360 * if the input index<0 then it is pinned to 0; 1361 * if it is index>length() then it is pinned to length(). 1362 * Afterwards, the index is moved by <code>delta</code> code points 1363 * forward or backward, 1364 * but no further backward than to 0 and no further forward than to length(). 1365 * The resulting index return value will be in between 0 and length(), inclusively. 1366 * 1367 * Examples: 1368 * <pre> 1369 * // s has code points 'a' U+10000 'b' U+10ffff U+2029 1370 * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape(); 1371 * 1372 * // initial index: position of U+10000 1373 * int32_t index=1; 1374 * 1375 * // the following examples will all result in index==4, position of U+10ffff 1376 * 1377 * // skip 2 code points from some position in the string 1378 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b' 1379 * 1380 * // go to the 3rd code point from the start of s (0-based) 1381 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b' 1382 * 1383 * // go to the next-to-last code point of s 1384 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff 1385 * </pre> 1386 * 1387 * @param index input code unit index 1388 * @param delta (signed) code point count to move the index forward or backward 1389 * in the string 1390 * @return the resulting code unit index 1391 * @stable ICU 2.0 1392 */ 1393 int32_t moveIndex32(int32_t index, int32_t delta) const; 1394 1395 /* Substring extraction */ 1396 1397 /** 1398 * Copy the characters in the range 1399 * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>, 1400 * beginning at <tt>dstStart</tt>. 1401 * If the string aliases to <code>dst</code> itself as an external buffer, 1402 * then extract() will not copy the contents. 1403 * 1404 * @param start offset of first character which will be copied into the array 1405 * @param length the number of characters to extract 1406 * @param dst array in which to copy characters. The length of <tt>dst</tt> 1407 * must be at least (<tt>dstStart + length</tt>). 1408 * @param dstStart the offset in <TT>dst</TT> where the first character 1409 * will be extracted 1410 * @stable ICU 2.0 1411 */ 1412 inline void extract(int32_t start, 1413 int32_t length, 1414 UChar *dst, 1415 int32_t dstStart = 0) const; 1416 1417 /** 1418 * Copy the contents of the string into dest. 1419 * This is a convenience function that 1420 * checks if there is enough space in dest, 1421 * extracts the entire string if possible, 1422 * and NUL-terminates dest if possible. 1423 * 1424 * If the string fits into dest but cannot be NUL-terminated 1425 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. 1426 * If the string itself does not fit into dest 1427 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. 1428 * 1429 * If the string aliases to <code>dest</code> itself as an external buffer, 1430 * then extract() will not copy the contents. 1431 * 1432 * @param dest Destination string buffer. 1433 * @param destCapacity Number of UChars available at dest. 1434 * @param errorCode ICU error code. 1435 * @return length() 1436 * @stable ICU 2.0 1437 */ 1438 int32_t 1439 extract(UChar *dest, int32_t destCapacity, 1440 UErrorCode &errorCode) const; 1441 1442 /** 1443 * Copy the characters in the range 1444 * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString 1445 * <tt>target</tt>. 1446 * @param start offset of first character which will be copied 1447 * @param length the number of characters to extract 1448 * @param target UnicodeString into which to copy characters. 1449 * @return A reference to <TT>target</TT> 1450 * @stable ICU 2.0 1451 */ 1452 inline void extract(int32_t start, 1453 int32_t length, 1454 UnicodeString& target) const; 1455 1456 /** 1457 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 1458 * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>. 1459 * @param start offset of first character which will be copied into the array 1460 * @param limit offset immediately following the last character to be copied 1461 * @param dst array in which to copy characters. The length of <tt>dst</tt> 1462 * must be at least (<tt>dstStart + (limit - start)</tt>). 1463 * @param dstStart the offset in <TT>dst</TT> where the first character 1464 * will be extracted 1465 * @stable ICU 2.0 1466 */ 1467 inline void extractBetween(int32_t start, 1468 int32_t limit, 1469 UChar *dst, 1470 int32_t dstStart = 0) const; 1471 1472 /** 1473 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 1474 * into the UnicodeString <tt>target</tt>. Replaceable API. 1475 * @param start offset of first character which will be copied 1476 * @param limit offset immediately following the last character to be copied 1477 * @param target UnicodeString into which to copy characters. 1478 * @return A reference to <TT>target</TT> 1479 * @stable ICU 2.0 1480 */ 1481 virtual void extractBetween(int32_t start, 1482 int32_t limit, 1483 UnicodeString& target) const; 1484 1485 /** 1486 * Copy the characters in the range 1487 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters. 1488 * All characters must be invariant (see utypes.h). 1489 * Use US_INV as the last, signature-distinguishing parameter. 1490 * 1491 * This function does not write any more than <code>targetLength</code> 1492 * characters but returns the length of the entire output string 1493 * so that one can allocate a larger buffer and call the function again 1494 * if necessary. 1495 * The output string is NUL-terminated if possible. 1496 * 1497 * @param start offset of first character which will be copied 1498 * @param startLength the number of characters to extract 1499 * @param target the target buffer for extraction, can be NULL 1500 * if targetLength is 0 1501 * @param targetCapacity the length of the target buffer 1502 * @param inv Signature-distinguishing paramater, use US_INV. 1503 * @return the output string length, not including the terminating NUL 1504 * @stable ICU 3.2 1505 */ 1506 int32_t extract(int32_t start, 1507 int32_t startLength, 1508 char *target, 1509 int32_t targetCapacity, 1510 enum EInvariant inv) const; 1511 1512 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 1513 1514 /** 1515 * Copy the characters in the range 1516 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1517 * in the platform's default codepage. 1518 * This function does not write any more than <code>targetLength</code> 1519 * characters but returns the length of the entire output string 1520 * so that one can allocate a larger buffer and call the function again 1521 * if necessary. 1522 * The output string is NUL-terminated if possible. 1523 * 1524 * @param start offset of first character which will be copied 1525 * @param startLength the number of characters to extract 1526 * @param target the target buffer for extraction 1527 * @param targetLength the length of the target buffer 1528 * If <TT>target</TT> is NULL, then the number of bytes required for 1529 * <TT>target</TT> is returned. 1530 * @return the output string length, not including the terminating NUL 1531 * @stable ICU 2.0 1532 */ 1533 int32_t extract(int32_t start, 1534 int32_t startLength, 1535 char *target, 1536 uint32_t targetLength) const; 1537 1538 #endif 1539 1540 #if !UCONFIG_NO_CONVERSION 1541 1542 /** 1543 * Copy the characters in the range 1544 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1545 * in a specified codepage. 1546 * The output string is NUL-terminated. 1547 * 1548 * Recommendation: For invariant-character strings use 1549 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1550 * because it avoids object code dependencies of UnicodeString on 1551 * the conversion code. 1552 * 1553 * @param start offset of first character which will be copied 1554 * @param startLength the number of characters to extract 1555 * @param target the target buffer for extraction 1556 * @param codepage the desired codepage for the characters. 0 has 1557 * the special meaning of the default codepage 1558 * If <code>codepage</code> is an empty string (<code>""</code>), 1559 * then a simple conversion is performed on the codepage-invariant 1560 * subset ("invariant characters") of the platform encoding. See utypes.h. 1561 * If <TT>target</TT> is NULL, then the number of bytes required for 1562 * <TT>target</TT> is returned. It is assumed that the target is big enough 1563 * to fit all of the characters. 1564 * @return the output string length, not including the terminating NUL 1565 * @stable ICU 2.0 1566 */ 1567 inline int32_t extract(int32_t start, 1568 int32_t startLength, 1569 char *target, 1570 const char *codepage = 0) const; 1571 1572 /** 1573 * Copy the characters in the range 1574 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1575 * in a specified codepage. 1576 * This function does not write any more than <code>targetLength</code> 1577 * characters but returns the length of the entire output string 1578 * so that one can allocate a larger buffer and call the function again 1579 * if necessary. 1580 * The output string is NUL-terminated if possible. 1581 * 1582 * Recommendation: For invariant-character strings use 1583 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1584 * because it avoids object code dependencies of UnicodeString on 1585 * the conversion code. 1586 * 1587 * @param start offset of first character which will be copied 1588 * @param startLength the number of characters to extract 1589 * @param target the target buffer for extraction 1590 * @param targetLength the length of the target buffer 1591 * @param codepage the desired codepage for the characters. 0 has 1592 * the special meaning of the default codepage 1593 * If <code>codepage</code> is an empty string (<code>""</code>), 1594 * then a simple conversion is performed on the codepage-invariant 1595 * subset ("invariant characters") of the platform encoding. See utypes.h. 1596 * If <TT>target</TT> is NULL, then the number of bytes required for 1597 * <TT>target</TT> is returned. 1598 * @return the output string length, not including the terminating NUL 1599 * @stable ICU 2.0 1600 */ 1601 int32_t extract(int32_t start, 1602 int32_t startLength, 1603 char *target, 1604 uint32_t targetLength, 1605 const char *codepage) const; 1606 1607 /** 1608 * Convert the UnicodeString into a codepage string using an existing UConverter. 1609 * The output string is NUL-terminated if possible. 1610 * 1611 * This function avoids the overhead of opening and closing a converter if 1612 * multiple strings are extracted. 1613 * 1614 * @param dest destination string buffer, can be NULL if destCapacity==0 1615 * @param destCapacity the number of chars available at dest 1616 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), 1617 * or NULL for the default converter 1618 * @param errorCode normal ICU error code 1619 * @return the length of the output string, not counting the terminating NUL; 1620 * if the length is greater than destCapacity, then the string will not fit 1621 * and a buffer of the indicated length would need to be passed in 1622 * @stable ICU 2.0 1623 */ 1624 int32_t extract(char *dest, int32_t destCapacity, 1625 UConverter *cnv, 1626 UErrorCode &errorCode) const; 1627 1628 #endif 1629 1630 /** 1631 * Create a temporary substring for the specified range. 1632 * Unlike the substring constructor and setTo() functions, 1633 * the object returned here will be a read-only alias (using getBuffer()) 1634 * rather than copying the text. 1635 * As a result, this substring operation is much faster but requires 1636 * that the original string not be modified or deleted during the lifetime 1637 * of the returned substring object. 1638 * @param start offset of the first character visible in the substring 1639 * @param length length of the substring 1640 * @return a read-only alias UnicodeString object for the substring 1641 * @stable ICU 4.4 1642 */ 1643 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; 1644 1645 /** 1646 * Create a temporary substring for the specified range. 1647 * Same as tempSubString(start, length) except that the substring range 1648 * is specified as a (start, limit) pair (with an exclusive limit index) 1649 * rather than a (start, length) pair. 1650 * @param start offset of the first character visible in the substring 1651 * @param limit offset immediately following the last character visible in the substring 1652 * @return a read-only alias UnicodeString object for the substring 1653 * @stable ICU 4.4 1654 */ 1655 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; 1656 1657 /** 1658 * Convert the UnicodeString to UTF-8 and write the result 1659 * to a ByteSink. This is called by toUTF8String(). 1660 * Unpaired surrogates are replaced with U+FFFD. 1661 * Calls u_strToUTF8WithSub(). 1662 * 1663 * @param sink A ByteSink to which the UTF-8 version of the string is written. 1664 * sink.Flush() is called at the end. 1665 * @stable ICU 4.2 1666 * @see toUTF8String 1667 */ 1668 void toUTF8(ByteSink &sink) const; 1669 1670 #if U_HAVE_STD_STRING 1671 1672 /** 1673 * Convert the UnicodeString to UTF-8 and append the result 1674 * to a standard string. 1675 * Unpaired surrogates are replaced with U+FFFD. 1676 * Calls toUTF8(). 1677 * 1678 * @param result A standard string (or a compatible object) 1679 * to which the UTF-8 version of the string is appended. 1680 * @return The string object. 1681 * @stable ICU 4.2 1682 * @see toUTF8 1683 */ 1684 template<typename StringClass> 1685 StringClass &toUTF8String(StringClass &result) const { 1686 StringByteSink<StringClass> sbs(&result); 1687 toUTF8(sbs); 1688 return result; 1689 } 1690 1691 #endif 1692 1693 /** 1694 * Convert the UnicodeString to UTF-32. 1695 * Unpaired surrogates are replaced with U+FFFD. 1696 * Calls u_strToUTF32WithSub(). 1697 * 1698 * @param utf32 destination string buffer, can be NULL if capacity==0 1699 * @param capacity the number of UChar32s available at utf32 1700 * @param errorCode Standard ICU error code. Its input value must 1701 * pass the U_SUCCESS() test, or else the function returns 1702 * immediately. Check for U_FAILURE() on output or use with 1703 * function chaining. (See User Guide for details.) 1704 * @return The length of the UTF-32 string. 1705 * @see fromUTF32 1706 * @stable ICU 4.2 1707 */ 1708 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; 1709 1710 /* Length operations */ 1711 1712 /** 1713 * Return the length of the UnicodeString object. 1714 * The length is the number of UChar code units are in the UnicodeString. 1715 * If you want the number of code points, please use countChar32(). 1716 * @return the length of the UnicodeString object 1717 * @see countChar32 1718 * @stable ICU 2.0 1719 */ 1720 inline int32_t length(void) const; 1721 1722 /** 1723 * Count Unicode code points in the length UChar code units of the string. 1724 * A code point may occupy either one or two UChar code units. 1725 * Counting code points involves reading all code units. 1726 * 1727 * This functions is basically the inverse of moveIndex32(). 1728 * 1729 * @param start the index of the first code unit to check 1730 * @param length the number of UChar code units to check 1731 * @return the number of code points in the specified code units 1732 * @see length 1733 * @stable ICU 2.0 1734 */ 1735 int32_t 1736 countChar32(int32_t start=0, int32_t length=INT32_MAX) const; 1737 1738 /** 1739 * Check if the length UChar code units of the string 1740 * contain more Unicode code points than a certain number. 1741 * This is more efficient than counting all code points in this part of the string 1742 * and comparing that number with a threshold. 1743 * This function may not need to scan the string at all if the length 1744 * falls within a certain range, and 1745 * never needs to count more than 'number+1' code points. 1746 * Logically equivalent to (countChar32(start, length)>number). 1747 * A Unicode code point may occupy either one or two UChar code units. 1748 * 1749 * @param start the index of the first code unit to check (0 for the entire string) 1750 * @param length the number of UChar code units to check 1751 * (use INT32_MAX for the entire string; remember that start/length 1752 * values are pinned) 1753 * @param number The number of code points in the (sub)string is compared against 1754 * the 'number' parameter. 1755 * @return Boolean value for whether the string contains more Unicode code points 1756 * than 'number'. Same as (u_countChar32(s, length)>number). 1757 * @see countChar32 1758 * @see u_strHasMoreChar32Than 1759 * @stable ICU 2.4 1760 */ 1761 UBool 1762 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; 1763 1764 /** 1765 * Determine if this string is empty. 1766 * @return TRUE if this string contains 0 characters, FALSE otherwise. 1767 * @stable ICU 2.0 1768 */ 1769 inline UBool isEmpty(void) const; 1770 1771 /** 1772 * Return the capacity of the internal buffer of the UnicodeString object. 1773 * This is useful together with the getBuffer functions. 1774 * See there for details. 1775 * 1776 * @return the number of UChars available in the internal buffer 1777 * @see getBuffer 1778 * @stable ICU 2.0 1779 */ 1780 inline int32_t getCapacity(void) const; 1781 1782 /* Other operations */ 1783 1784 /** 1785 * Generate a hash code for this object. 1786 * @return The hash code of this UnicodeString. 1787 * @stable ICU 2.0 1788 */ 1789 inline int32_t hashCode(void) const; 1790 1791 /** 1792 * Determine if this object contains a valid string. 1793 * A bogus string has no value. It is different from an empty string, 1794 * although in both cases isEmpty() returns TRUE and length() returns 0. 1795 * setToBogus() and isBogus() can be used to indicate that no string value is available. 1796 * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and 1797 * length() returns 0. 1798 * 1799 * @return TRUE if the string is valid, FALSE otherwise 1800 * @see setToBogus() 1801 * @stable ICU 2.0 1802 */ 1803 inline UBool isBogus(void) const; 1804 1805 1806 //======================================== 1807 // Write operations 1808 //======================================== 1809 1810 /* Assignment operations */ 1811 1812 /** 1813 * Assignment operator. Replace the characters in this UnicodeString 1814 * with the characters from <TT>srcText</TT>. 1815 * @param srcText The text containing the characters to replace 1816 * @return a reference to this 1817 * @stable ICU 2.0 1818 */ 1819 UnicodeString &operator=(const UnicodeString &srcText); 1820 1821 /** 1822 * Almost the same as the assignment operator. 1823 * Replace the characters in this UnicodeString 1824 * with the characters from <code>srcText</code>. 1825 * 1826 * This function works the same as the assignment operator 1827 * for all strings except for ones that are readonly aliases. 1828 * 1829 * Starting with ICU 2.4, the assignment operator and the copy constructor 1830 * allocate a new buffer and copy the buffer contents even for readonly aliases. 1831 * This function implements the old, more efficient but less safe behavior 1832 * of making this string also a readonly alias to the same buffer. 1833 * 1834 * The fastCopyFrom function must be used only if it is known that the lifetime of 1835 * this UnicodeString does not exceed the lifetime of the aliased buffer 1836 * including its contents, for example for strings from resource bundles 1837 * or aliases to string constants. 1838 * 1839 * @param src The text containing the characters to replace. 1840 * @return a reference to this 1841 * @stable ICU 2.4 1842 */ 1843 UnicodeString &fastCopyFrom(const UnicodeString &src); 1844 1845 /** 1846 * Assignment operator. Replace the characters in this UnicodeString 1847 * with the code unit <TT>ch</TT>. 1848 * @param ch the code unit to replace 1849 * @return a reference to this 1850 * @stable ICU 2.0 1851 */ 1852 inline UnicodeString& operator= (UChar ch); 1853 1854 /** 1855 * Assignment operator. Replace the characters in this UnicodeString 1856 * with the code point <TT>ch</TT>. 1857 * @param ch the code point to replace 1858 * @return a reference to this 1859 * @stable ICU 2.0 1860 */ 1861 inline UnicodeString& operator= (UChar32 ch); 1862 1863 /** 1864 * Set the text in the UnicodeString object to the characters 1865 * in <TT>srcText</TT> in the range 1866 * [<TT>srcStart</TT>, <TT>srcText.length()</TT>). 1867 * <TT>srcText</TT> is not modified. 1868 * @param srcText the source for the new characters 1869 * @param srcStart the offset into <TT>srcText</TT> where new characters 1870 * will be obtained 1871 * @return a reference to this 1872 * @stable ICU 2.2 1873 */ 1874 inline UnicodeString& setTo(const UnicodeString& srcText, 1875 int32_t srcStart); 1876 1877 /** 1878 * Set the text in the UnicodeString object to the characters 1879 * in <TT>srcText</TT> in the range 1880 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 1881 * <TT>srcText</TT> is not modified. 1882 * @param srcText the source for the new characters 1883 * @param srcStart the offset into <TT>srcText</TT> where new characters 1884 * will be obtained 1885 * @param srcLength the number of characters in <TT>srcText</TT> in the 1886 * replace string. 1887 * @return a reference to this 1888 * @stable ICU 2.0 1889 */ 1890 inline UnicodeString& setTo(const UnicodeString& srcText, 1891 int32_t srcStart, 1892 int32_t srcLength); 1893 1894 /** 1895 * Set the text in the UnicodeString object to the characters in 1896 * <TT>srcText</TT>. 1897 * <TT>srcText</TT> is not modified. 1898 * @param srcText the source for the new characters 1899 * @return a reference to this 1900 * @stable ICU 2.0 1901 */ 1902 inline UnicodeString& setTo(const UnicodeString& srcText); 1903 1904 /** 1905 * Set the characters in the UnicodeString object to the characters 1906 * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. 1907 * @param srcChars the source for the new characters 1908 * @param srcLength the number of Unicode characters in srcChars. 1909 * @return a reference to this 1910 * @stable ICU 2.0 1911 */ 1912 inline UnicodeString& setTo(const UChar *srcChars, 1913 int32_t srcLength); 1914 1915 /** 1916 * Set the characters in the UnicodeString object to the code unit 1917 * <TT>srcChar</TT>. 1918 * @param srcChar the code unit which becomes the UnicodeString's character 1919 * content 1920 * @return a reference to this 1921 * @stable ICU 2.0 1922 */ 1923 UnicodeString& setTo(UChar srcChar); 1924 1925 /** 1926 * Set the characters in the UnicodeString object to the code point 1927 * <TT>srcChar</TT>. 1928 * @param srcChar the code point which becomes the UnicodeString's character 1929 * content 1930 * @return a reference to this 1931 * @stable ICU 2.0 1932 */ 1933 UnicodeString& setTo(UChar32 srcChar); 1934 1935 /** 1936 * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. 1937 * The text will be used for the UnicodeString object, but 1938 * it will not be released when the UnicodeString is destroyed. 1939 * This has copy-on-write semantics: 1940 * When the string is modified, then the buffer is first copied into 1941 * newly allocated memory. 1942 * The aliased buffer is never modified. 1943 * 1944 * In an assignment to another UnicodeString, when using the copy constructor 1945 * or the assignment operator, the text will be copied. 1946 * When using fastCopyFrom(), the text will be aliased again, 1947 * so that both strings then alias the same readonly-text. 1948 * 1949 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. 1950 * This must be true if <code>textLength==-1</code>. 1951 * @param text The characters to alias for the UnicodeString. 1952 * @param textLength The number of Unicode characters in <code>text</code> to alias. 1953 * If -1, then this constructor will determine the length 1954 * by calling <code>u_strlen()</code>. 1955 * @return a reference to this 1956 * @stable ICU 2.0 1957 */ 1958 UnicodeString &setTo(UBool isTerminated, 1959 const UChar *text, 1960 int32_t textLength); 1961 1962 /** 1963 * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor. 1964 * The text will be used for the UnicodeString object, but 1965 * it will not be released when the UnicodeString is destroyed. 1966 * This has write-through semantics: 1967 * For as long as the capacity of the buffer is sufficient, write operations 1968 * will directly affect the buffer. When more capacity is necessary, then 1969 * a new buffer will be allocated and the contents copied as with regularly 1970 * constructed strings. 1971 * In an assignment to another UnicodeString, the buffer will be copied. 1972 * The extract(UChar *dst) function detects whether the dst pointer is the same 1973 * as the string buffer itself and will in this case not copy the contents. 1974 * 1975 * @param buffer The characters to alias for the UnicodeString. 1976 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. 1977 * @param buffCapacity The size of <code>buffer</code> in UChars. 1978 * @return a reference to this 1979 * @stable ICU 2.0 1980 */ 1981 UnicodeString &setTo(UChar *buffer, 1982 int32_t buffLength, 1983 int32_t buffCapacity); 1984 1985 /** 1986 * Make this UnicodeString object invalid. 1987 * The string will test TRUE with isBogus(). 1988 * 1989 * A bogus string has no value. It is different from an empty string. 1990 * It can be used to indicate that no string value is available. 1991 * getBuffer() and getTerminatedBuffer() return NULL, and 1992 * length() returns 0. 1993 * 1994 * This utility function is used throughout the UnicodeString 1995 * implementation to indicate that a UnicodeString operation failed, 1996 * and may be used in other functions, 1997 * especially but not exclusively when such functions do not 1998 * take a UErrorCode for simplicity. 1999 * 2000 * The following methods, and no others, will clear a string object's bogus flag: 2001 * - remove() 2002 * - remove(0, INT32_MAX) 2003 * - truncate(0) 2004 * - operator=() (assignment operator) 2005 * - setTo(...) 2006 * 2007 * The simplest ways to turn a bogus string into an empty one 2008 * is to use the remove() function. 2009 * Examples for other functions that are equivalent to "set to empty string": 2010 * \code 2011 * if(s.isBogus()) { 2012 * s.remove(); // set to an empty string (remove all), or 2013 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or 2014 * s.truncate(0); // set to an empty string (complete truncation), or 2015 * s=UnicodeString(); // assign an empty string, or 2016 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or 2017 * static const UChar nul=0; 2018 * s.setTo(&nul, 0); // set to an empty C Unicode string 2019 * } 2020 * \endcode 2021 * 2022 * @see isBogus() 2023 * @stable ICU 2.0 2024 */ 2025 void setToBogus(); 2026 2027 /** 2028 * Set the character at the specified offset to the specified character. 2029 * @param offset A valid offset into the text of the character to set 2030 * @param ch The new character 2031 * @return A reference to this 2032 * @stable ICU 2.0 2033 */ 2034 UnicodeString& setCharAt(int32_t offset, 2035 UChar ch); 2036 2037 2038 /* Append operations */ 2039 2040 /** 2041 * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString 2042 * object. 2043 * @param ch the code unit to be appended 2044 * @return a reference to this 2045 * @stable ICU 2.0 2046 */ 2047 inline UnicodeString& operator+= (UChar ch); 2048 2049 /** 2050 * Append operator. Append the code point <TT>ch</TT> to the UnicodeString 2051 * object. 2052 * @param ch the code point to be appended 2053 * @return a reference to this 2054 * @stable ICU 2.0 2055 */ 2056 inline UnicodeString& operator+= (UChar32 ch); 2057 2058 /** 2059 * Append operator. Append the characters in <TT>srcText</TT> to the 2060 * UnicodeString object. <TT>srcText</TT> is not modified. 2061 * @param srcText the source for the new characters 2062 * @return a reference to this 2063 * @stable ICU 2.0 2064 */ 2065 inline UnicodeString& operator+= (const UnicodeString& srcText); 2066 2067 /** 2068 * Append the characters 2069 * in <TT>srcText</TT> in the range 2070 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the 2071 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> 2072 * is not modified. 2073 * @param srcText the source for the new characters 2074 * @param srcStart the offset into <TT>srcText</TT> where new characters 2075 * will be obtained 2076 * @param srcLength the number of characters in <TT>srcText</TT> in 2077 * the append string 2078 * @return a reference to this 2079 * @stable ICU 2.0 2080 */ 2081 inline UnicodeString& append(const UnicodeString& srcText, 2082 int32_t srcStart, 2083 int32_t srcLength); 2084 2085 /** 2086 * Append the characters in <TT>srcText</TT> to the UnicodeString object. 2087 * <TT>srcText</TT> is not modified. 2088 * @param srcText the source for the new characters 2089 * @return a reference to this 2090 * @stable ICU 2.0 2091 */ 2092 inline UnicodeString& append(const UnicodeString& srcText); 2093 2094 /** 2095 * Append the characters in <TT>srcChars</TT> in the range 2096 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString 2097 * object at offset 2098 * <TT>start</TT>. <TT>srcChars</TT> is not modified. 2099 * @param srcChars the source for the new characters 2100 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2101 * will be obtained 2102 * @param srcLength the number of characters in <TT>srcChars</TT> in 2103 * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated 2104 * @return a reference to this 2105 * @stable ICU 2.0 2106 */ 2107 inline UnicodeString& append(const UChar *srcChars, 2108 int32_t srcStart, 2109 int32_t srcLength); 2110 2111 /** 2112 * Append the characters in <TT>srcChars</TT> to the UnicodeString object 2113 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2114 * @param srcChars the source for the new characters 2115 * @param srcLength the number of Unicode characters in <TT>srcChars</TT>; 2116 * can be -1 if <TT>srcChars</TT> is NUL-terminated 2117 * @return a reference to this 2118 * @stable ICU 2.0 2119 */ 2120 inline UnicodeString& append(const UChar *srcChars, 2121 int32_t srcLength); 2122 2123 /** 2124 * Append the code unit <TT>srcChar</TT> to the UnicodeString object. 2125 * @param srcChar the code unit to append 2126 * @return a reference to this 2127 * @stable ICU 2.0 2128 */ 2129 inline UnicodeString& append(UChar srcChar); 2130 2131 /** 2132 * Append the code point <TT>srcChar</TT> to the UnicodeString object. 2133 * @param srcChar the code point to append 2134 * @return a reference to this 2135 * @stable ICU 2.0 2136 */ 2137 UnicodeString& append(UChar32 srcChar); 2138 2139 2140 /* Insert operations */ 2141 2142 /** 2143 * Insert the characters in <TT>srcText</TT> in the range 2144 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 2145 * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified. 2146 * @param start the offset where the insertion begins 2147 * @param srcText the source for the new characters 2148 * @param srcStart the offset into <TT>srcText</TT> where new characters 2149 * will be obtained 2150 * @param srcLength the number of characters in <TT>srcText</TT> in 2151 * the insert string 2152 * @return a reference to this 2153 * @stable ICU 2.0 2154 */ 2155 inline UnicodeString& insert(int32_t start, 2156 const UnicodeString& srcText, 2157 int32_t srcStart, 2158 int32_t srcLength); 2159 2160 /** 2161 * Insert the characters in <TT>srcText</TT> into the UnicodeString object 2162 * at offset <TT>start</TT>. <TT>srcText</TT> is not modified. 2163 * @param start the offset where the insertion begins 2164 * @param srcText the source for the new characters 2165 * @return a reference to this 2166 * @stable ICU 2.0 2167 */ 2168 inline UnicodeString& insert(int32_t start, 2169 const UnicodeString& srcText); 2170 2171 /** 2172 * Insert the characters in <TT>srcChars</TT> in the range 2173 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 2174 * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2175 * @param start the offset at which the insertion begins 2176 * @param srcChars the source for the new characters 2177 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2178 * will be obtained 2179 * @param srcLength the number of characters in <TT>srcChars</TT> 2180 * in the insert string 2181 * @return a reference to this 2182 * @stable ICU 2.0 2183 */ 2184 inline UnicodeString& insert(int32_t start, 2185 const UChar *srcChars, 2186 int32_t srcStart, 2187 int32_t srcLength); 2188 2189 /** 2190 * Insert the characters in <TT>srcChars</TT> into the UnicodeString object 2191 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2192 * @param start the offset where the insertion begins 2193 * @param srcChars the source for the new characters 2194 * @param srcLength the number of Unicode characters in srcChars. 2195 * @return a reference to this 2196 * @stable ICU 2.0 2197 */ 2198 inline UnicodeString& insert(int32_t start, 2199 const UChar *srcChars, 2200 int32_t srcLength); 2201 2202 /** 2203 * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at 2204 * offset <TT>start</TT>. 2205 * @param start the offset at which the insertion occurs 2206 * @param srcChar the code unit to insert 2207 * @return a reference to this 2208 * @stable ICU 2.0 2209 */ 2210 inline UnicodeString& insert(int32_t start, 2211 UChar srcChar); 2212 2213 /** 2214 * Insert the code point <TT>srcChar</TT> into the UnicodeString object at 2215 * offset <TT>start</TT>. 2216 * @param start the offset at which the insertion occurs 2217 * @param srcChar the code point to insert 2218 * @return a reference to this 2219 * @stable ICU 2.0 2220 */ 2221 inline UnicodeString& insert(int32_t start, 2222 UChar32 srcChar); 2223 2224 2225 /* Replace operations */ 2226 2227 /** 2228 * Replace the characters in the range 2229 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2230 * <TT>srcText</TT> in the range 2231 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 2232 * <TT>srcText</TT> is not modified. 2233 * @param start the offset at which the replace operation begins 2234 * @param length the number of characters to replace. The character at 2235 * <TT>start + length</TT> is not modified. 2236 * @param srcText the source for the new characters 2237 * @param srcStart the offset into <TT>srcText</TT> where new characters 2238 * will be obtained 2239 * @param srcLength the number of characters in <TT>srcText</TT> in 2240 * the replace string 2241 * @return a reference to this 2242 * @stable ICU 2.0 2243 */ 2244 UnicodeString& replace(int32_t start, 2245 int32_t length, 2246 const UnicodeString& srcText, 2247 int32_t srcStart, 2248 int32_t srcLength); 2249 2250 /** 2251 * Replace the characters in the range 2252 * [<TT>start</TT>, <TT>start + length</TT>) 2253 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is 2254 * not modified. 2255 * @param start the offset at which the replace operation begins 2256 * @param length the number of characters to replace. The character at 2257 * <TT>start + length</TT> is not modified. 2258 * @param srcText the source for the new characters 2259 * @return a reference to this 2260 * @stable ICU 2.0 2261 */ 2262 UnicodeString& replace(int32_t start, 2263 int32_t length, 2264 const UnicodeString& srcText); 2265 2266 /** 2267 * Replace the characters in the range 2268 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2269 * <TT>srcChars</TT> in the range 2270 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT> 2271 * is not modified. 2272 * @param start the offset at which the replace operation begins 2273 * @param length the number of characters to replace. The character at 2274 * <TT>start + length</TT> is not modified. 2275 * @param srcChars the source for the new characters 2276 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2277 * will be obtained 2278 * @param srcLength the number of characters in <TT>srcChars</TT> 2279 * in the replace string 2280 * @return a reference to this 2281 * @stable ICU 2.0 2282 */ 2283 UnicodeString& replace(int32_t start, 2284 int32_t length, 2285 const UChar *srcChars, 2286 int32_t srcStart, 2287 int32_t srcLength); 2288 2289 /** 2290 * Replace the characters in the range 2291 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2292 * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. 2293 * @param start the offset at which the replace operation begins 2294 * @param length number of characters to replace. The character at 2295 * <TT>start + length</TT> is not modified. 2296 * @param srcChars the source for the new characters 2297 * @param srcLength the number of Unicode characters in srcChars 2298 * @return a reference to this 2299 * @stable ICU 2.0 2300 */ 2301 inline UnicodeString& replace(int32_t start, 2302 int32_t length, 2303 const UChar *srcChars, 2304 int32_t srcLength); 2305 2306 /** 2307 * Replace the characters in the range 2308 * [<TT>start</TT>, <TT>start + length</TT>) with the code unit 2309 * <TT>srcChar</TT>. 2310 * @param start the offset at which the replace operation begins 2311 * @param length the number of characters to replace. The character at 2312 * <TT>start + length</TT> is not modified. 2313 * @param srcChar the new code unit 2314 * @return a reference to this 2315 * @stable ICU 2.0 2316 */ 2317 inline UnicodeString& replace(int32_t start, 2318 int32_t length, 2319 UChar srcChar); 2320 2321 /** 2322 * Replace the characters in the range 2323 * [<TT>start</TT>, <TT>start + length</TT>) with the code point 2324 * <TT>srcChar</TT>. 2325 * @param start the offset at which the replace operation begins 2326 * @param length the number of characters to replace. The character at 2327 * <TT>start + length</TT> is not modified. 2328 * @param srcChar the new code point 2329 * @return a reference to this 2330 * @stable ICU 2.0 2331 */ 2332 UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); 2333 2334 /** 2335 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 2336 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified. 2337 * @param start the offset at which the replace operation begins 2338 * @param limit the offset immediately following the replace range 2339 * @param srcText the source for the new characters 2340 * @return a reference to this 2341 * @stable ICU 2.0 2342 */ 2343 inline UnicodeString& replaceBetween(int32_t start, 2344 int32_t limit, 2345 const UnicodeString& srcText); 2346 2347 /** 2348 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 2349 * with the characters in <TT>srcText</TT> in the range 2350 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified. 2351 * @param start the offset at which the replace operation begins 2352 * @param limit the offset immediately following the replace range 2353 * @param srcText the source for the new characters 2354 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2355 * will be obtained 2356 * @param srcLimit the offset immediately following the range to copy 2357 * in <TT>srcText</TT> 2358 * @return a reference to this 2359 * @stable ICU 2.0 2360 */ 2361 inline UnicodeString& replaceBetween(int32_t start, 2362 int32_t limit, 2363 const UnicodeString& srcText, 2364 int32_t srcStart, 2365 int32_t srcLimit); 2366 2367 /** 2368 * Replace a substring of this object with the given text. 2369 * @param start the beginning index, inclusive; <code>0 <= start 2370 * <= limit</code>. 2371 * @param limit the ending index, exclusive; <code>start <= limit 2372 * <= length()</code>. 2373 * @param text the text to replace characters <code>start</code> 2374 * to <code>limit - 1</code> 2375 * @stable ICU 2.0 2376 */ 2377 virtual void handleReplaceBetween(int32_t start, 2378 int32_t limit, 2379 const UnicodeString& text); 2380 2381 /** 2382 * Replaceable API 2383 * @return TRUE if it has MetaData 2384 * @stable ICU 2.4 2385 */ 2386 virtual UBool hasMetaData() const; 2387 2388 /** 2389 * Copy a substring of this object, retaining attribute (out-of-band) 2390 * information. This method is used to duplicate or reorder substrings. 2391 * The destination index must not overlap the source range. 2392 * 2393 * @param start the beginning index, inclusive; <code>0 <= start <= 2394 * limit</code>. 2395 * @param limit the ending index, exclusive; <code>start <= limit <= 2396 * length()</code>. 2397 * @param dest the destination index. The characters from 2398 * <code>start..limit-1</code> will be copied to <code>dest</code>. 2399 * Implementations of this method may assume that <code>dest <= start || 2400 * dest >= limit</code>. 2401 * @stable ICU 2.0 2402 */ 2403 virtual void copy(int32_t start, int32_t limit, int32_t dest); 2404 2405 /* Search and replace operations */ 2406 2407 /** 2408 * Replace all occurrences of characters in oldText with the characters 2409 * in newText 2410 * @param oldText the text containing the search text 2411 * @param newText the text containing the replacement text 2412 * @return a reference to this 2413 * @stable ICU 2.0 2414 */ 2415 inline UnicodeString& findAndReplace(const UnicodeString& oldText, 2416 const UnicodeString& newText); 2417 2418 /** 2419 * Replace all occurrences of characters in oldText with characters 2420 * in newText 2421 * in the range [<TT>start</TT>, <TT>start + length</TT>). 2422 * @param start the start of the range in which replace will performed 2423 * @param length the length of the range in which replace will be performed 2424 * @param oldText the text containing the search text 2425 * @param newText the text containing the replacement text 2426 * @return a reference to this 2427 * @stable ICU 2.0 2428 */ 2429 inline UnicodeString& findAndReplace(int32_t start, 2430 int32_t length, 2431 const UnicodeString& oldText, 2432 const UnicodeString& newText); 2433 2434 /** 2435 * Replace all occurrences of characters in oldText in the range 2436 * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters 2437 * in newText in the range 2438 * [<TT>newStart</TT>, <TT>newStart + newLength</TT>) 2439 * in the range [<TT>start</TT>, <TT>start + length</TT>). 2440 * @param start the start of the range in which replace will performed 2441 * @param length the length of the range in which replace will be performed 2442 * @param oldText the text containing the search text 2443 * @param oldStart the start of the search range in <TT>oldText</TT> 2444 * @param oldLength the length of the search range in <TT>oldText</TT> 2445 * @param newText the text containing the replacement text 2446 * @param newStart the start of the replacement range in <TT>newText</TT> 2447 * @param newLength the length of the replacement range in <TT>newText</TT> 2448 * @return a reference to this 2449 * @stable ICU 2.0 2450 */ 2451 UnicodeString& findAndReplace(int32_t start, 2452 int32_t length, 2453 const UnicodeString& oldText, 2454 int32_t oldStart, 2455 int32_t oldLength, 2456 const UnicodeString& newText, 2457 int32_t newStart, 2458 int32_t newLength); 2459 2460 2461 /* Remove operations */ 2462 2463 /** 2464 * Remove all characters from the UnicodeString object. 2465 * @return a reference to this 2466 * @stable ICU 2.0 2467 */ 2468 inline UnicodeString& remove(void); 2469 2470 /** 2471 * Remove the characters in the range 2472 * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object. 2473 * @param start the offset of the first character to remove 2474 * @param length the number of characters to remove 2475 * @return a reference to this 2476 * @stable ICU 2.0 2477 */ 2478 inline UnicodeString& remove(int32_t start, 2479 int32_t length = (int32_t)INT32_MAX); 2480 2481 /** 2482 * Remove the characters in the range 2483 * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object. 2484 * @param start the offset of the first character to remove 2485 * @param limit the offset immediately following the range to remove 2486 * @return a reference to this 2487 * @stable ICU 2.0 2488 */ 2489 inline UnicodeString& removeBetween(int32_t start, 2490 int32_t limit = (int32_t)INT32_MAX); 2491 2492 /** 2493 * Retain only the characters in the range 2494 * [<code>start</code>, <code>limit</code>) from the UnicodeString object. 2495 * Removes characters before <code>start</code> and at and after <code>limit</code>. 2496 * @param start the offset of the first character to retain 2497 * @param limit the offset immediately following the range to retain 2498 * @return a reference to this 2499 * @stable ICU 4.4 2500 */ 2501 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); 2502 2503 /* Length operations */ 2504 2505 /** 2506 * Pad the start of this UnicodeString with the character <TT>padChar</TT>. 2507 * If the length of this UnicodeString is less than targetLength, 2508 * length() - targetLength copies of padChar will be added to the 2509 * beginning of this UnicodeString. 2510 * @param targetLength the desired length of the string 2511 * @param padChar the character to use for padding. Defaults to 2512 * space (U+0020) 2513 * @return TRUE if the text was padded, FALSE otherwise. 2514 * @stable ICU 2.0 2515 */ 2516 UBool padLeading(int32_t targetLength, 2517 UChar padChar = 0x0020); 2518 2519 /** 2520 * Pad the end of this UnicodeString with the character <TT>padChar</TT>. 2521 * If the length of this UnicodeString is less than targetLength, 2522 * length() - targetLength copies of padChar will be added to the 2523 * end of this UnicodeString. 2524 * @param targetLength the desired length of the string 2525 * @param padChar the character to use for padding. Defaults to 2526 * space (U+0020) 2527 * @return TRUE if the text was padded, FALSE otherwise. 2528 * @stable ICU 2.0 2529 */ 2530 UBool padTrailing(int32_t targetLength, 2531 UChar padChar = 0x0020); 2532 2533 /** 2534 * Truncate this UnicodeString to the <TT>targetLength</TT>. 2535 * @param targetLength the desired length of this UnicodeString. 2536 * @return TRUE if the text was truncated, FALSE otherwise 2537 * @stable ICU 2.0 2538 */ 2539 inline UBool truncate(int32_t targetLength); 2540 2541 /** 2542 * Trims leading and trailing whitespace from this UnicodeString. 2543 * @return a reference to this 2544 * @stable ICU 2.0 2545 */ 2546 UnicodeString& trim(void); 2547 2548 2549 /* Miscellaneous operations */ 2550 2551 /** 2552 * Reverse this UnicodeString in place. 2553 * @return a reference to this 2554 * @stable ICU 2.0 2555 */ 2556 inline UnicodeString& reverse(void); 2557 2558 /** 2559 * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in 2560 * this UnicodeString. 2561 * @param start the start of the range to reverse 2562 * @param length the number of characters to to reverse 2563 * @return a reference to this 2564 * @stable ICU 2.0 2565 */ 2566 inline UnicodeString& reverse(int32_t start, 2567 int32_t length); 2568 2569 /** 2570 * Convert the characters in this to UPPER CASE following the conventions of 2571 * the default locale. 2572 * @return A reference to this. 2573 * @stable ICU 2.0 2574 */ 2575 UnicodeString& toUpper(void); 2576 2577 /** 2578 * Convert the characters in this to UPPER CASE following the conventions of 2579 * a specific locale. 2580 * @param locale The locale containing the conventions to use. 2581 * @return A reference to this. 2582 * @stable ICU 2.0 2583 */ 2584 UnicodeString& toUpper(const Locale& locale); 2585 2586 /** 2587 * Convert the characters in this to lower case following the conventions of 2588 * the default locale. 2589 * @return A reference to this. 2590 * @stable ICU 2.0 2591 */ 2592 UnicodeString& toLower(void); 2593 2594 /** 2595 * Convert the characters in this to lower case following the conventions of 2596 * a specific locale. 2597 * @param locale The locale containing the conventions to use. 2598 * @return A reference to this. 2599 * @stable ICU 2.0 2600 */ 2601 UnicodeString& toLower(const Locale& locale); 2602 2603 #if !UCONFIG_NO_BREAK_ITERATION 2604 2605 /** 2606 * Titlecase this string, convenience function using the default locale. 2607 * 2608 * Casing is locale-dependent and context-sensitive. 2609 * Titlecasing uses a break iterator to find the first characters of words 2610 * that are to be titlecased. It titlecases those characters and lowercases 2611 * all others. 2612 * 2613 * The titlecase break iterator can be provided to customize for arbitrary 2614 * styles, using rules and dictionaries beyond the standard iterators. 2615 * It may be more efficient to always provide an iterator to avoid 2616 * opening and closing one for each string. 2617 * The standard titlecase iterator for the root locale implements the 2618 * algorithm of Unicode TR 21. 2619 * 2620 * This function uses only the setText(), first() and next() methods of the 2621 * provided break iterator. 2622 * 2623 * @param titleIter A break iterator to find the first characters of words 2624 * that are to be titlecased. 2625 * If none is provided (0), then a standard titlecase 2626 * break iterator is opened. 2627 * Otherwise the provided iterator is set to the string's text. 2628 * @return A reference to this. 2629 * @stable ICU 2.1 2630 */ 2631 UnicodeString &toTitle(BreakIterator *titleIter); 2632 2633 /** 2634 * Titlecase this string. 2635 * 2636 * Casing is locale-dependent and context-sensitive. 2637 * Titlecasing uses a break iterator to find the first characters of words 2638 * that are to be titlecased. It titlecases those characters and lowercases 2639 * all others. 2640 * 2641 * The titlecase break iterator can be provided to customize for arbitrary 2642 * styles, using rules and dictionaries beyond the standard iterators. 2643 * It may be more efficient to always provide an iterator to avoid 2644 * opening and closing one for each string. 2645 * The standard titlecase iterator for the root locale implements the 2646 * algorithm of Unicode TR 21. 2647 * 2648 * This function uses only the setText(), first() and next() methods of the 2649 * provided break iterator. 2650 * 2651 * @param titleIter A break iterator to find the first characters of words 2652 * that are to be titlecased. 2653 * If none is provided (0), then a standard titlecase 2654 * break iterator is opened. 2655 * Otherwise the provided iterator is set to the string's text. 2656 * @param locale The locale to consider. 2657 * @return A reference to this. 2658 * @stable ICU 2.1 2659 */ 2660 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); 2661 2662 /** 2663 * Titlecase this string, with options. 2664 * 2665 * Casing is locale-dependent and context-sensitive. 2666 * Titlecasing uses a break iterator to find the first characters of words 2667 * that are to be titlecased. It titlecases those characters and lowercases 2668 * all others. (This can be modified with options.) 2669 * 2670 * The titlecase break iterator can be provided to customize for arbitrary 2671 * styles, using rules and dictionaries beyond the standard iterators. 2672 * It may be more efficient to always provide an iterator to avoid 2673 * opening and closing one for each string. 2674 * The standard titlecase iterator for the root locale implements the 2675 * algorithm of Unicode TR 21. 2676 * 2677 * This function uses only the setText(), first() and next() methods of the 2678 * provided break iterator. 2679 * 2680 * @param titleIter A break iterator to find the first characters of words 2681 * that are to be titlecased. 2682 * If none is provided (0), then a standard titlecase 2683 * break iterator is opened. 2684 * Otherwise the provided iterator is set to the string's text. 2685 * @param locale The locale to consider. 2686 * @param options Options bit set, see ucasemap_open(). 2687 * @return A reference to this. 2688 * @see U_TITLECASE_NO_LOWERCASE 2689 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT 2690 * @see ucasemap_open 2691 * @stable ICU 3.8 2692 */ 2693 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); 2694 2695 #endif 2696 2697 /** 2698 * Case-folds the characters in this string. 2699 * 2700 * Case-folding is locale-independent and not context-sensitive, 2701 * but there is an option for whether to include or exclude mappings for dotted I 2702 * and dotless i that are marked with 'T' in CaseFolding.txt. 2703 * 2704 * The result may be longer or shorter than the original. 2705 * 2706 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I 2707 * @return A reference to this. 2708 * @stable ICU 2.0 2709 */ 2710 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); 2711 2712 //======================================== 2713 // Access to the internal buffer 2714 //======================================== 2715 2716 /** 2717 * Get a read/write pointer to the internal buffer. 2718 * The buffer is guaranteed to be large enough for at least minCapacity UChars, 2719 * writable, and is still owned by the UnicodeString object. 2720 * Calls to getBuffer(minCapacity) must not be nested, and 2721 * must be matched with calls to releaseBuffer(newLength). 2722 * If the string buffer was read-only or shared, 2723 * then it will be reallocated and copied. 2724 * 2725 * An attempted nested call will return 0, and will not further modify the 2726 * state of the UnicodeString object. 2727 * It also returns 0 if the string is bogus. 2728 * 2729 * The actual capacity of the string buffer may be larger than minCapacity. 2730 * getCapacity() returns the actual capacity. 2731 * For many operations, the full capacity should be used to avoid reallocations. 2732 * 2733 * While the buffer is "open" between getBuffer(minCapacity) 2734 * and releaseBuffer(newLength), the following applies: 2735 * - The string length is set to 0. 2736 * - Any read API call on the UnicodeString object will behave like on a 0-length string. 2737 * - Any write API call on the UnicodeString object is disallowed and will have no effect. 2738 * - You can read from and write to the returned buffer. 2739 * - The previous string contents will still be in the buffer; 2740 * if you want to use it, then you need to call length() before getBuffer(minCapacity). 2741 * If the length() was greater than minCapacity, then any contents after minCapacity 2742 * may be lost. 2743 * The buffer contents is not NUL-terminated by getBuffer(). 2744 * If length()<getCapacity() then you can terminate it by writing a NUL 2745 * at index length(). 2746 * - You must call releaseBuffer(newLength) before and in order to 2747 * return to normal UnicodeString operation. 2748 * 2749 * @param minCapacity the minimum number of UChars that are to be available 2750 * in the buffer, starting at the returned pointer; 2751 * default to the current string capacity if minCapacity==-1 2752 * @return a writable pointer to the internal string buffer, 2753 * or 0 if an error occurs (nested calls, out of memory) 2754 * 2755 * @see releaseBuffer 2756 * @see getTerminatedBuffer() 2757 * @stable ICU 2.0 2758 */ 2759 UChar *getBuffer(int32_t minCapacity); 2760 2761 /** 2762 * Release a read/write buffer on a UnicodeString object with an 2763 * "open" getBuffer(minCapacity). 2764 * This function must be called in a matched pair with getBuffer(minCapacity). 2765 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open". 2766 * 2767 * It will set the string length to newLength, at most to the current capacity. 2768 * If newLength==-1 then it will set the length according to the 2769 * first NUL in the buffer, or to the capacity if there is no NUL. 2770 * 2771 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation. 2772 * 2773 * @param newLength the new length of the UnicodeString object; 2774 * defaults to the current capacity if newLength is greater than that; 2775 * if newLength==-1, it defaults to u_strlen(buffer) but not more than 2776 * the current capacity of the string 2777 * 2778 * @see getBuffer(int32_t minCapacity) 2779 * @stable ICU 2.0 2780 */ 2781 void releaseBuffer(int32_t newLength=-1); 2782 2783 /** 2784 * Get a read-only pointer to the internal buffer. 2785 * This can be called at any time on a valid UnicodeString. 2786 * 2787 * It returns 0 if the string is bogus, or 2788 * during an "open" getBuffer(minCapacity). 2789 * 2790 * It can be called as many times as desired. 2791 * The pointer that it returns will remain valid until the UnicodeString object is modified, 2792 * at which time the pointer is semantically invalidated and must not be used any more. 2793 * 2794 * The capacity of the buffer can be determined with getCapacity(). 2795 * The part after length() may or may not be initialized and valid, 2796 * depending on the history of the UnicodeString object. 2797 * 2798 * The buffer contents is (probably) not NUL-terminated. 2799 * You can check if it is with 2800 * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>. 2801 * (See getTerminatedBuffer().) 2802 * 2803 * The buffer may reside in read-only memory. Its contents must not 2804 * be modified. 2805 * 2806 * @return a read-only pointer to the internal string buffer, 2807 * or 0 if the string is empty or bogus 2808 * 2809 * @see getBuffer(int32_t minCapacity) 2810 * @see getTerminatedBuffer() 2811 * @stable ICU 2.0 2812 */ 2813 inline const UChar *getBuffer() const; 2814 2815 /** 2816 * Get a read-only pointer to the internal buffer, 2817 * making sure that it is NUL-terminated. 2818 * This can be called at any time on a valid UnicodeString. 2819 * 2820 * It returns 0 if the string is bogus, or 2821 * during an "open" getBuffer(minCapacity), or if the buffer cannot 2822 * be NUL-terminated (because memory allocation failed). 2823 * 2824 * It can be called as many times as desired. 2825 * The pointer that it returns will remain valid until the UnicodeString object is modified, 2826 * at which time the pointer is semantically invalidated and must not be used any more. 2827 * 2828 * The capacity of the buffer can be determined with getCapacity(). 2829 * The part after length()+1 may or may not be initialized and valid, 2830 * depending on the history of the UnicodeString object. 2831 * 2832 * The buffer contents is guaranteed to be NUL-terminated. 2833 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL 2834 * is written. 2835 * For this reason, this function is not const, unlike getBuffer(). 2836 * Note that a UnicodeString may also contain NUL characters as part of its contents. 2837 * 2838 * The buffer may reside in read-only memory. Its contents must not 2839 * be modified. 2840 * 2841 * @return a read-only pointer to the internal string buffer, 2842 * or 0 if the string is empty or bogus 2843 * 2844 * @see getBuffer(int32_t minCapacity) 2845 * @see getBuffer() 2846 * @stable ICU 2.2 2847 */ 2848 inline const UChar *getTerminatedBuffer(); 2849 2850 //======================================== 2851 // Constructors 2852 //======================================== 2853 2854 /** Construct an empty UnicodeString. 2855 * @stable ICU 2.0 2856 */ 2857 inline UnicodeString(); 2858 2859 /** 2860 * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars 2861 * @param capacity the number of UChars this UnicodeString should hold 2862 * before a resize is necessary; if count is greater than 0 and count 2863 * code points c take up more space than capacity, then capacity is adjusted 2864 * accordingly. 2865 * @param c is used to initially fill the string 2866 * @param count specifies how many code points c are to be written in the 2867 * string 2868 * @stable ICU 2.0 2869 */ 2870 UnicodeString(int32_t capacity, UChar32 c, int32_t count); 2871 2872 /** 2873 * Single UChar (code unit) constructor. 2874 * 2875 * It is recommended to mark this constructor "explicit" by 2876 * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> 2877 * on the compiler command line or similar. 2878 * @param ch the character to place in the UnicodeString 2879 * @stable ICU 2.0 2880 */ 2881 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch); 2882 2883 /** 2884 * Single UChar32 (code point) constructor. 2885 * 2886 * It is recommended to mark this constructor "explicit" by 2887 * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> 2888 * on the compiler command line or similar. 2889 * @param ch the character to place in the UnicodeString 2890 * @stable ICU 2.0 2891 */ 2892 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); 2893 2894 /** 2895 * UChar* constructor. 2896 * 2897 * It is recommended to mark this constructor "explicit" by 2898 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 2899 * on the compiler command line or similar. 2900 * @param text The characters to place in the UnicodeString. <TT>text</TT> 2901 * must be NULL (U+0000) terminated. 2902 * @stable ICU 2.0 2903 */ 2904 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); 2905 2906 /** 2907 * UChar* constructor. 2908 * @param text The characters to place in the UnicodeString. 2909 * @param textLength The number of Unicode characters in <TT>text</TT> 2910 * to copy. 2911 * @stable ICU 2.0 2912 */ 2913 UnicodeString(const UChar *text, 2914 int32_t textLength); 2915 2916 /** 2917 * Readonly-aliasing UChar* constructor. 2918 * The text will be used for the UnicodeString object, but 2919 * it will not be released when the UnicodeString is destroyed. 2920 * This has copy-on-write semantics: 2921 * When the string is modified, then the buffer is first copied into 2922 * newly allocated memory. 2923 * The aliased buffer is never modified. 2924 * 2925 * In an assignment to another UnicodeString, when using the copy constructor 2926 * or the assignment operator, the text will be copied. 2927 * When using fastCopyFrom(), the text will be aliased again, 2928 * so that both strings then alias the same readonly-text. 2929 * 2930 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. 2931 * This must be true if <code>textLength==-1</code>. 2932 * @param text The characters to alias for the UnicodeString. 2933 * @param textLength The number of Unicode characters in <code>text</code> to alias. 2934 * If -1, then this constructor will determine the length 2935 * by calling <code>u_strlen()</code>. 2936 * @stable ICU 2.0 2937 */ 2938 UnicodeString(UBool isTerminated, 2939 const UChar *text, 2940 int32_t textLength); 2941 2942 /** 2943 * Writable-aliasing UChar* constructor. 2944 * The text will be used for the UnicodeString object, but 2945 * it will not be released when the UnicodeString is destroyed. 2946 * This has write-through semantics: 2947 * For as long as the capacity of the buffer is sufficient, write operations 2948 * will directly affect the buffer. When more capacity is necessary, then 2949 * a new buffer will be allocated and the contents copied as with regularly 2950 * constructed strings. 2951 * In an assignment to another UnicodeString, the buffer will be copied. 2952 * The extract(UChar *dst) function detects whether the dst pointer is the same 2953 * as the string buffer itself and will in this case not copy the contents. 2954 * 2955 * @param buffer The characters to alias for the UnicodeString. 2956 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. 2957 * @param buffCapacity The size of <code>buffer</code> in UChars. 2958 * @stable ICU 2.0 2959 */ 2960 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); 2961 2962 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 2963 2964 /** 2965 * char* constructor. 2966 * Uses the default converter (and thus depends on the ICU conversion code) 2967 * unless U_CHARSET_IS_UTF8 is set to 1. 2968 * 2969 * For ASCII (really "invariant character") strings it is more efficient to use 2970 * the constructor that takes a US_INV (for its enum EInvariant). 2971 * For ASCII (invariant-character) string literals, see UNICODE_STRING and 2972 * UNICODE_STRING_SIMPLE. 2973 * 2974 * It is recommended to mark this constructor "explicit" by 2975 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 2976 * on the compiler command line or similar. 2977 * @param codepageData an array of bytes, null-terminated, 2978 * in the platform's default codepage. 2979 * @stable ICU 2.0 2980 * @see UNICODE_STRING 2981 * @see UNICODE_STRING_SIMPLE 2982 */ 2983 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); 2984 2985 /** 2986 * char* constructor. 2987 * Uses the default converter (and thus depends on the ICU conversion code) 2988 * unless U_CHARSET_IS_UTF8 is set to 1. 2989 * @param codepageData an array of bytes in the platform's default codepage. 2990 * @param dataLength The number of bytes in <TT>codepageData</TT>. 2991 * @stable ICU 2.0 2992 */ 2993 UnicodeString(const char *codepageData, int32_t dataLength); 2994 2995 #endif 2996 2997 #if !UCONFIG_NO_CONVERSION 2998 2999 /** 3000 * char* constructor. 3001 * @param codepageData an array of bytes, null-terminated 3002 * @param codepage the encoding of <TT>codepageData</TT>. The special 3003 * value 0 for <TT>codepage</TT> indicates that the text is in the 3004 * platform's default codepage. 3005 * 3006 * If <code>codepage</code> is an empty string (<code>""</code>), 3007 * then a simple conversion is performed on the codepage-invariant 3008 * subset ("invariant characters") of the platform encoding. See utypes.h. 3009 * Recommendation: For invariant-character strings use the constructor 3010 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 3011 * because it avoids object code dependencies of UnicodeString on 3012 * the conversion code. 3013 * 3014 * @stable ICU 2.0 3015 */ 3016 UnicodeString(const char *codepageData, const char *codepage); 3017 3018 /** 3019 * char* constructor. 3020 * @param codepageData an array of bytes. 3021 * @param dataLength The number of bytes in <TT>codepageData</TT>. 3022 * @param codepage the encoding of <TT>codepageData</TT>. The special 3023 * value 0 for <TT>codepage</TT> indicates that the text is in the 3024 * platform's default codepage. 3025 * If <code>codepage</code> is an empty string (<code>""</code>), 3026 * then a simple conversion is performed on the codepage-invariant 3027 * subset ("invariant characters") of the platform encoding. See utypes.h. 3028 * Recommendation: For invariant-character strings use the constructor 3029 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 3030 * because it avoids object code dependencies of UnicodeString on 3031 * the conversion code. 3032 * 3033 * @stable ICU 2.0 3034 */ 3035 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); 3036 3037 /** 3038 * char * / UConverter constructor. 3039 * This constructor uses an existing UConverter object to 3040 * convert the codepage string to Unicode and construct a UnicodeString 3041 * from that. 3042 * 3043 * The converter is reset at first. 3044 * If the error code indicates a failure before this constructor is called, 3045 * or if an error occurs during conversion or construction, 3046 * then the string will be bogus. 3047 * 3048 * This function avoids the overhead of opening and closing a converter if 3049 * multiple strings are constructed. 3050 * 3051 * @param src input codepage string 3052 * @param srcLength length of the input string, can be -1 for NUL-terminated strings 3053 * @param cnv converter object (ucnv_resetToUnicode() will be called), 3054 * can be NULL for the default converter 3055 * @param errorCode normal ICU error code 3056 * @stable ICU 2.0 3057 */ 3058 UnicodeString( 3059 const char *src, int32_t srcLength, 3060 UConverter *cnv, 3061 UErrorCode &errorCode); 3062 3063 #endif 3064 3065 /** 3066 * Constructs a Unicode string from an invariant-character char * string. 3067 * About invariant characters see utypes.h. 3068 * This constructor has no runtime dependency on conversion code and is 3069 * therefore recommended over ones taking a charset name string 3070 * (where the empty string "" indicates invariant-character conversion). 3071 * 3072 * Use the macro US_INV as the third, signature-distinguishing parameter. 3073 * 3074 * For example: 3075 * \code 3076 * void fn(const char *s) { 3077 * UnicodeString ustr(s, -1, US_INV); 3078 * // use ustr ... 3079 * } 3080 * \endcode 3081 * 3082 * @param src String using only invariant characters. 3083 * @param length Length of src, or -1 if NUL-terminated. 3084 * @param inv Signature-distinguishing paramater, use US_INV. 3085 * 3086 * @see US_INV 3087 * @stable ICU 3.2 3088 */ 3089 UnicodeString(const char *src, int32_t length, enum EInvariant inv); 3090 3091 3092 /** 3093 * Copy constructor. 3094 * @param that The UnicodeString object to copy. 3095 * @stable ICU 2.0 3096 */ 3097 UnicodeString(const UnicodeString& that); 3098 3099 /** 3100 * 'Substring' constructor from tail of source string. 3101 * @param src The UnicodeString object to copy. 3102 * @param srcStart The offset into <tt>src</tt> at which to start copying. 3103 * @stable ICU 2.2 3104 */ 3105 UnicodeString(const UnicodeString& src, int32_t srcStart); 3106 3107 /** 3108 * 'Substring' constructor from subrange of source string. 3109 * @param src The UnicodeString object to copy. 3110 * @param srcStart The offset into <tt>src</tt> at which to start copying. 3111 * @param srcLength The number of characters from <tt>src</tt> to copy. 3112 * @stable ICU 2.2 3113 */ 3114 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 3115 3116 /** 3117 * Clone this object, an instance of a subclass of Replaceable. 3118 * Clones can be used concurrently in multiple threads. 3119 * If a subclass does not implement clone(), or if an error occurs, 3120 * then NULL is returned. 3121 * The clone functions in all subclasses return a pointer to a Replaceable 3122 * because some compilers do not support covariant (same-as-this) 3123 * return types; cast to the appropriate subclass if necessary. 3124 * The caller must delete the clone. 3125 * 3126 * @return a clone of this object 3127 * 3128 * @see Replaceable::clone 3129 * @see getDynamicClassID 3130 * @stable ICU 2.6 3131 */ 3132 virtual Replaceable *clone() const; 3133 3134 /** Destructor. 3135 * @stable ICU 2.0 3136 */ 3137 virtual ~UnicodeString(); 3138 3139 /** 3140 * Create a UnicodeString from a UTF-8 string. 3141 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 3142 * Calls u_strFromUTF8WithSub(). 3143 * 3144 * @param utf8 UTF-8 input string. 3145 * Note that a StringPiece can be implicitly constructed 3146 * from a std::string or a NUL-terminated const char * string. 3147 * @return A UnicodeString with equivalent UTF-16 contents. 3148 * @see toUTF8 3149 * @see toUTF8String 3150 * @stable ICU 4.2 3151 */ 3152 static UnicodeString fromUTF8(const StringPiece &utf8); 3153 3154 /** 3155 * Create a UnicodeString from a UTF-32 string. 3156 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 3157 * Calls u_strFromUTF32WithSub(). 3158 * 3159 * @param utf32 UTF-32 input string. Must not be NULL. 3160 * @param length Length of the input string, or -1 if NUL-terminated. 3161 * @return A UnicodeString with equivalent UTF-16 contents. 3162 * @see toUTF32 3163 * @stable ICU 4.2 3164 */ 3165 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); 3166 3167 /* Miscellaneous operations */ 3168 3169 /** 3170 * Unescape a string of characters and return a string containing 3171 * the result. The following escape sequences are recognized: 3172 * 3173 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] 3174 * \\Uhhhhhhhh 8 hex digits 3175 * \\xhh 1-2 hex digits 3176 * \\ooo 1-3 octal digits; o in [0-7] 3177 * \\cX control-X; X is masked with 0x1F 3178 * 3179 * as well as the standard ANSI C escapes: 3180 * 3181 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, 3182 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, 3183 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C 3184 * 3185 * Anything else following a backslash is generically escaped. For 3186 * example, "[a\\-z]" returns "[a-z]". 3187 * 3188 * If an escape sequence is ill-formed, this method returns an empty 3189 * string. An example of an ill-formed sequence is "\\u" followed by 3190 * fewer than 4 hex digits. 3191 * 3192 * This function is similar to u_unescape() but not identical to it. 3193 * The latter takes a source char*, so it does escape recognition 3194 * and also invariant conversion. 3195 * 3196 * @return a string with backslash escapes interpreted, or an 3197 * empty string on error. 3198 * @see UnicodeString#unescapeAt() 3199 * @see u_unescape() 3200 * @see u_unescapeAt() 3201 * @stable ICU 2.0 3202 */ 3203 UnicodeString unescape() const; 3204 3205 /** 3206 * Unescape a single escape sequence and return the represented 3207 * character. See unescape() for a listing of the recognized escape 3208 * sequences. The character at offset-1 is assumed (without 3209 * checking) to be a backslash. If the escape sequence is 3210 * ill-formed, or the offset is out of range, U_SENTINEL=-1 is 3211 * returned. 3212 * 3213 * @param offset an input output parameter. On input, it is the 3214 * offset into this string where the escape sequence is located, 3215 * after the initial backslash. On output, it is advanced after the 3216 * last character parsed. On error, it is not advanced at all. 3217 * @return the character represented by the escape sequence at 3218 * offset, or U_SENTINEL=-1 on error. 3219 * @see UnicodeString#unescape() 3220 * @see u_unescape() 3221 * @see u_unescapeAt() 3222 * @stable ICU 2.0 3223 */ 3224 UChar32 unescapeAt(int32_t &offset) const; 3225 3226 /** 3227 * ICU "poor man's RTTI", returns a UClassID for this class. 3228 * 3229 * @stable ICU 2.2 3230 */ 3231 static UClassID U_EXPORT2 getStaticClassID(); 3232 3233 /** 3234 * ICU "poor man's RTTI", returns a UClassID for the actual class. 3235 * 3236 * @stable ICU 2.2 3237 */ 3238 virtual UClassID getDynamicClassID() const; 3239 3240 //======================================== 3241 // Implementation methods 3242 //======================================== 3243 3244 protected: 3245 /** 3246 * Implement Replaceable::getLength() (see jitterbug 1027). 3247 * @stable ICU 2.4 3248 */ 3249 virtual int32_t getLength() const; 3250 3251 /** 3252 * The change in Replaceable to use virtual getCharAt() allows 3253 * UnicodeString::charAt() to be inline again (see jitterbug 709). 3254 * @stable ICU 2.4 3255 */ 3256 virtual UChar getCharAt(int32_t offset) const; 3257 3258 /** 3259 * The change in Replaceable to use virtual getChar32At() allows 3260 * UnicodeString::char32At() to be inline again (see jitterbug 709). 3261 * @stable ICU 2.4 3262 */ 3263 virtual UChar32 getChar32At(int32_t offset) const; 3264 3265 private: 3266 // For char* constructors. Could be made public. 3267 UnicodeString &setToUTF8(const StringPiece &utf8); 3268 // For extract(char*). 3269 // We could make a toUTF8(target, capacity, errorCode) public but not 3270 // this version: New API will be cleaner if we make callers create substrings 3271 // rather than having start+length on every method, 3272 // and it should take a UErrorCode&. 3273 int32_t 3274 toUTF8(int32_t start, int32_t len, 3275 char *target, int32_t capacity) const; 3276 3277 /** 3278 * Internal string contents comparison, called by operator==. 3279 * Requires: this & text not bogus and have same lengths. 3280 */ 3281 UBool doEquals(const UnicodeString &text, int32_t len) const; 3282 3283 inline int8_t 3284 doCompare(int32_t start, 3285 int32_t length, 3286 const UnicodeString& srcText, 3287 int32_t srcStart, 3288 int32_t srcLength) const; 3289 3290 int8_t doCompare(int32_t start, 3291 int32_t length, 3292 const UChar *srcChars, 3293 int32_t srcStart, 3294 int32_t srcLength) const; 3295 3296 inline int8_t 3297 doCompareCodePointOrder(int32_t start, 3298 int32_t length, 3299 const UnicodeString& srcText, 3300 int32_t srcStart, 3301 int32_t srcLength) const; 3302 3303 int8_t doCompareCodePointOrder(int32_t start, 3304 int32_t length, 3305 const UChar *srcChars, 3306 int32_t srcStart, 3307 int32_t srcLength) const; 3308 3309 inline int8_t 3310 doCaseCompare(int32_t start, 3311 int32_t length, 3312 const UnicodeString &srcText, 3313 int32_t srcStart, 3314 int32_t srcLength, 3315 uint32_t options) const; 3316 3317 int8_t 3318 doCaseCompare(int32_t start, 3319 int32_t length, 3320 const UChar *srcChars, 3321 int32_t srcStart, 3322 int32_t srcLength, 3323 uint32_t options) const; 3324 3325 int32_t doIndexOf(UChar c, 3326 int32_t start, 3327 int32_t length) const; 3328 3329 int32_t doIndexOf(UChar32 c, 3330 int32_t start, 3331 int32_t length) const; 3332 3333 int32_t doLastIndexOf(UChar c, 3334 int32_t start, 3335 int32_t length) const; 3336 3337 int32_t doLastIndexOf(UChar32 c, 3338 int32_t start, 3339 int32_t length) const; 3340 3341 void doExtract(int32_t start, 3342 int32_t length, 3343 UChar *dst, 3344 int32_t dstStart) const; 3345 3346 inline void doExtract(int32_t start, 3347 int32_t length, 3348 UnicodeString& target) const; 3349 3350 inline UChar doCharAt(int32_t offset) const; 3351 3352 UnicodeString& doReplace(int32_t start, 3353 int32_t length, 3354 const UnicodeString& srcText, 3355 int32_t srcStart, 3356 int32_t srcLength); 3357 3358 UnicodeString& doReplace(int32_t start, 3359 int32_t length, 3360 const UChar *srcChars, 3361 int32_t srcStart, 3362 int32_t srcLength); 3363 3364 UnicodeString& doReverse(int32_t start, 3365 int32_t length); 3366 3367 // calculate hash code 3368 int32_t doHashCode(void) const; 3369 3370 // get pointer to start of array 3371 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 3372 inline UChar* getArrayStart(void); 3373 inline const UChar* getArrayStart(void) const; 3374 3375 // A UnicodeString object (not necessarily its current buffer) 3376 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 3377 inline UBool isWritable() const; 3378 3379 // Is the current buffer writable? 3380 inline UBool isBufferWritable() const; 3381 3382 // None of the following does releaseArray(). 3383 inline void setLength(int32_t len); // sets only fShortLength and fLength 3384 inline void setToEmpty(); // sets fFlags=kShortString 3385 inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags 3386 3387 // allocate the array; result may be fStackBuffer 3388 // sets refCount to 1 if appropriate 3389 // sets fArray, fCapacity, and fFlags 3390 // returns boolean for success or failure 3391 UBool allocate(int32_t capacity); 3392 3393 // release the array if owned 3394 void releaseArray(void); 3395 3396 // turn a bogus string into an empty one 3397 void unBogus(); 3398 3399 // implements assigment operator, copy constructor, and fastCopyFrom() 3400 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); 3401 3402 // Pin start and limit to acceptable values. 3403 inline void pinIndex(int32_t& start) const; 3404 inline void pinIndices(int32_t& start, 3405 int32_t& length) const; 3406 3407 #if !UCONFIG_NO_CONVERSION 3408 3409 /* Internal extract() using UConverter. */ 3410 int32_t doExtract(int32_t start, int32_t length, 3411 char *dest, int32_t destCapacity, 3412 UConverter *cnv, 3413 UErrorCode &errorCode) const; 3414 3415 /* 3416 * Real constructor for converting from codepage data. 3417 * It assumes that it is called with !fRefCounted. 3418 * 3419 * If <code>codepage==0</code>, then the default converter 3420 * is used for the platform encoding. 3421 * If <code>codepage</code> is an empty string (<code>""</code>), 3422 * then a simple conversion is performed on the codepage-invariant 3423 * subset ("invariant characters") of the platform encoding. See utypes.h. 3424 */ 3425 void doCodepageCreate(const char *codepageData, 3426 int32_t dataLength, 3427 const char *codepage); 3428 3429 /* 3430 * Worker function for creating a UnicodeString from 3431 * a codepage string using a UConverter. 3432 */ 3433 void 3434 doCodepageCreate(const char *codepageData, 3435 int32_t dataLength, 3436 UConverter *converter, 3437 UErrorCode &status); 3438 3439 #endif 3440 3441 /* 3442 * This function is called when write access to the array 3443 * is necessary. 3444 * 3445 * We need to make a copy of the array if 3446 * the buffer is read-only, or 3447 * the buffer is refCounted (shared), and refCount>1, or 3448 * the buffer is too small. 3449 * 3450 * Return FALSE if memory could not be allocated. 3451 */ 3452 UBool cloneArrayIfNeeded(int32_t newCapacity = -1, 3453 int32_t growCapacity = -1, 3454 UBool doCopyArray = TRUE, 3455 int32_t **pBufferToDelete = 0, 3456 UBool forceClone = FALSE); 3457 3458 /** 3459 * Common function for UnicodeString case mappings. 3460 * The stringCaseMapper has the same type UStringCaseMapper 3461 * as in ustr_imp.h for ustrcase_map(). 3462 */ 3463 UnicodeString & 3464 caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper); 3465 3466 // ref counting 3467 void addRef(void); 3468 int32_t removeRef(void); 3469 int32_t refCount(void) const; 3470 3471 // constants 3472 enum { 3473 // Set the stack buffer size so that sizeof(UnicodeString) is, 3474 // naturally (without padding), a multiple of sizeof(pointer). 3475 US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings 3476 kInvalidUChar=0xffff, // invalid UChar index 3477 kGrowSize=128, // grow size for this buffer 3478 kInvalidHashCode=0, // invalid hash code 3479 kEmptyHashCode=1, // hash code for empty string 3480 3481 // bit flag values for fFlags 3482 kIsBogus=1, // this string is bogus, i.e., not valid or NULL 3483 kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields 3484 kRefCounted=4, // there is a refCount field before the characters in fArray 3485 kBufferIsReadonly=8,// do not write to this buffer 3486 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), 3487 // and releaseBuffer(newLength) must be called 3488 3489 // combined values for convenience 3490 kShortString=kUsingStackBuffer, 3491 kLongString=kRefCounted, 3492 kReadonlyAlias=kBufferIsReadonly, 3493 kWritableAlias=0 3494 }; 3495 3496 friend class StringThreadTest; 3497 friend class UnicodeStringAppendable; 3498 3499 union StackBufferOrFields; // forward declaration necessary before friend declaration 3500 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion 3501 3502 /* 3503 * The following are all the class fields that are stored 3504 * in each UnicodeString object. 3505 * Note that UnicodeString has virtual functions, 3506 * therefore there is an implicit vtable pointer 3507 * as the first real field. 3508 * The fields should be aligned such that no padding is necessary. 3509 * On 32-bit machines, the size should be 32 bytes, 3510 * on 64-bit machines (8-byte pointers), it should be 40 bytes. 3511 * 3512 * We use a hack to achieve this. 3513 * 3514 * With at least some compilers, each of the following is forced to 3515 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], 3516 * rounded up with additional padding if the fields do not already fit that requirement: 3517 * - sizeof(class UnicodeString) 3518 * - offsetof(UnicodeString, fUnion) 3519 * - sizeof(fUnion) 3520 * - sizeof(fFields) 3521 * 3522 * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars) 3523 * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines. 3524 * (Padding at the end of fFields is ok: 3525 * As long as there is no padding after fStackBuffer, it is not wasted space.) 3526 * 3527 * We further assume that the compiler does not reorder the fields, 3528 * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion, 3529 * with at most some padding (but no other field) in between. 3530 * (Padding there would be wasted space, but functionally harmless.) 3531 * 3532 * We use a few more sizeof(pointer)'s chunks of space with 3533 * fRestOfStackBuffer, fShortLength and fFlags, 3534 * to get up exactly to the intended sizeof(UnicodeString). 3535 */ 3536 // (implicit) *vtable; 3537 union StackBufferOrFields { 3538 // fStackBuffer is used iff (fFlags&kUsingStackBuffer) 3539 // else fFields is used 3540 UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer 3541 struct { 3542 UChar *fArray; // the Unicode data 3543 int32_t fCapacity; // capacity of fArray (in UChars) 3544 int32_t fLength; // number of characters in fArray if >127; else undefined 3545 } fFields; 3546 } fUnion; 3547 UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8]; 3548 int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength 3549 uint8_t fFlags; // bit flags: see constants above 3550 }; 3551 3552 /** 3553 * Create a new UnicodeString with the concatenation of two others. 3554 * 3555 * @param s1 The first string to be copied to the new one. 3556 * @param s2 The second string to be copied to the new one, after s1. 3557 * @return UnicodeString(s1).append(s2) 3558 * @stable ICU 2.8 3559 */ 3560 U_COMMON_API UnicodeString U_EXPORT2 3561 operator+ (const UnicodeString &s1, const UnicodeString &s2); 3562 3563 //======================================== 3564 // Inline members 3565 //======================================== 3566 3567 //======================================== 3568 // Privates 3569 //======================================== 3570 3571 inline void 3572 UnicodeString::pinIndex(int32_t& start) const 3573 { 3574 // pin index 3575 if(start < 0) { 3576 start = 0; 3577 } else if(start > length()) { 3578 start = length(); 3579 } 3580 } 3581 3582 inline void 3583 UnicodeString::pinIndices(int32_t& start, 3584 int32_t& _length) const 3585 { 3586 // pin indices 3587 int32_t len = length(); 3588 if(start < 0) { 3589 start = 0; 3590 } else if(start > len) { 3591 start = len; 3592 } 3593 if(_length < 0) { 3594 _length = 0; 3595 } else if(_length > (len - start)) { 3596 _length = (len - start); 3597 } 3598 } 3599 3600 inline UChar* 3601 UnicodeString::getArrayStart() 3602 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 3603 3604 inline const UChar* 3605 UnicodeString::getArrayStart() const 3606 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 3607 3608 //======================================== 3609 // Default constructor 3610 //======================================== 3611 3612 inline 3613 UnicodeString::UnicodeString() 3614 : fShortLength(0), 3615 fFlags(kShortString) 3616 {} 3617 3618 //======================================== 3619 // Read-only implementation methods 3620 //======================================== 3621 inline int32_t 3622 UnicodeString::length() const 3623 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; } 3624 3625 inline int32_t 3626 UnicodeString::getCapacity() const 3627 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; } 3628 3629 inline int32_t 3630 UnicodeString::hashCode() const 3631 { return doHashCode(); } 3632 3633 inline UBool 3634 UnicodeString::isBogus() const 3635 { return (UBool)(fFlags & kIsBogus); } 3636 3637 inline UBool 3638 UnicodeString::isWritable() const 3639 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); } 3640 3641 inline UBool 3642 UnicodeString::isBufferWritable() const 3643 { 3644 return (UBool)( 3645 !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && 3646 (!(fFlags&kRefCounted) || refCount()==1)); 3647 } 3648 3649 inline const UChar * 3650 UnicodeString::getBuffer() const { 3651 if(fFlags&(kIsBogus|kOpenGetBuffer)) { 3652 return 0; 3653 } else if(fFlags&kUsingStackBuffer) { 3654 return fUnion.fStackBuffer; 3655 } else { 3656 return fUnion.fFields.fArray; 3657 } 3658 } 3659 3660 //======================================== 3661 // Read-only alias methods 3662 //======================================== 3663 inline int8_t 3664 UnicodeString::doCompare(int32_t start, 3665 int32_t thisLength, 3666 const UnicodeString& srcText, 3667 int32_t srcStart, 3668 int32_t srcLength) const 3669 { 3670 if(srcText.isBogus()) { 3671 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 3672 } else { 3673 srcText.pinIndices(srcStart, srcLength); 3674 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 3675 } 3676 } 3677 3678 inline UBool 3679 UnicodeString::operator== (const UnicodeString& text) const 3680 { 3681 if(isBogus()) { 3682 return text.isBogus(); 3683 } else { 3684 int32_t len = length(), textLength = text.length(); 3685 return !text.isBogus() && len == textLength && doEquals(text, len); 3686 } 3687 } 3688 3689 inline UBool 3690 UnicodeString::operator!= (const UnicodeString& text) const 3691 { return (! operator==(text)); } 3692 3693 inline UBool 3694 UnicodeString::operator> (const UnicodeString& text) const 3695 { return doCompare(0, length(), text, 0, text.length()) == 1; } 3696 3697 inline UBool 3698 UnicodeString::operator< (const UnicodeString& text) const 3699 { return doCompare(0, length(), text, 0, text.length()) == -1; } 3700 3701 inline UBool 3702 UnicodeString::operator>= (const UnicodeString& text) const 3703 { return doCompare(0, length(), text, 0, text.length()) != -1; } 3704 3705 inline UBool 3706 UnicodeString::operator<= (const UnicodeString& text) const 3707 { return doCompare(0, length(), text, 0, text.length()) != 1; } 3708 3709 inline int8_t 3710 UnicodeString::compare(const UnicodeString& text) const 3711 { return doCompare(0, length(), text, 0, text.length()); } 3712 3713 inline int8_t 3714 UnicodeString::compare(int32_t start, 3715 int32_t _length, 3716 const UnicodeString& srcText) const 3717 { return doCompare(start, _length, srcText, 0, srcText.length()); } 3718 3719 inline int8_t 3720 UnicodeString::compare(const UChar *srcChars, 3721 int32_t srcLength) const 3722 { return doCompare(0, length(), srcChars, 0, srcLength); } 3723 3724 inline int8_t 3725 UnicodeString::compare(int32_t start, 3726 int32_t _length, 3727 const UnicodeString& srcText, 3728 int32_t srcStart, 3729 int32_t srcLength) const 3730 { return doCompare(start, _length, srcText, srcStart, srcLength); } 3731 3732 inline int8_t 3733 UnicodeString::compare(int32_t start, 3734 int32_t _length, 3735 const UChar *srcChars) const 3736 { return doCompare(start, _length, srcChars, 0, _length); } 3737 3738 inline int8_t 3739 UnicodeString::compare(int32_t start, 3740 int32_t _length, 3741 const UChar *srcChars, 3742 int32_t srcStart, 3743 int32_t srcLength) const 3744 { return doCompare(start, _length, srcChars, srcStart, srcLength); } 3745 3746 inline int8_t 3747 UnicodeString::compareBetween(int32_t start, 3748 int32_t limit, 3749 const UnicodeString& srcText, 3750 int32_t srcStart, 3751 int32_t srcLimit) const 3752 { return doCompare(start, limit - start, 3753 srcText, srcStart, srcLimit - srcStart); } 3754 3755 inline int8_t 3756 UnicodeString::doCompareCodePointOrder(int32_t start, 3757 int32_t thisLength, 3758 const UnicodeString& srcText, 3759 int32_t srcStart, 3760 int32_t srcLength) const 3761 { 3762 if(srcText.isBogus()) { 3763 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 3764 } else { 3765 srcText.pinIndices(srcStart, srcLength); 3766 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 3767 } 3768 } 3769 3770 inline int8_t 3771 UnicodeString::compareCodePointOrder(const UnicodeString& text) const 3772 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); } 3773 3774 inline int8_t 3775 UnicodeString::compareCodePointOrder(int32_t start, 3776 int32_t _length, 3777 const UnicodeString& srcText) const 3778 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } 3779 3780 inline int8_t 3781 UnicodeString::compareCodePointOrder(const UChar *srcChars, 3782 int32_t srcLength) const 3783 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } 3784 3785 inline int8_t 3786 UnicodeString::compareCodePointOrder(int32_t start, 3787 int32_t _length, 3788 const UnicodeString& srcText, 3789 int32_t srcStart, 3790 int32_t srcLength) const 3791 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } 3792 3793 inline int8_t 3794 UnicodeString::compareCodePointOrder(int32_t start, 3795 int32_t _length, 3796 const UChar *srcChars) const 3797 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } 3798 3799 inline int8_t 3800 UnicodeString::compareCodePointOrder(int32_t start, 3801 int32_t _length, 3802 const UChar *srcChars, 3803 int32_t srcStart, 3804 int32_t srcLength) const 3805 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } 3806 3807 inline int8_t 3808 UnicodeString::compareCodePointOrderBetween(int32_t start, 3809 int32_t limit, 3810 const UnicodeString& srcText, 3811 int32_t srcStart, 3812 int32_t srcLimit) const 3813 { return doCompareCodePointOrder(start, limit - start, 3814 srcText, srcStart, srcLimit - srcStart); } 3815 3816 inline int8_t 3817 UnicodeString::doCaseCompare(int32_t start, 3818 int32_t thisLength, 3819 const UnicodeString &srcText, 3820 int32_t srcStart, 3821 int32_t srcLength, 3822 uint32_t options) const 3823 { 3824 if(srcText.isBogus()) { 3825 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 3826 } else { 3827 srcText.pinIndices(srcStart, srcLength); 3828 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); 3829 } 3830 } 3831 3832 inline int8_t 3833 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { 3834 return doCaseCompare(0, length(), text, 0, text.length(), options); 3835 } 3836 3837 inline int8_t 3838 UnicodeString::caseCompare(int32_t start, 3839 int32_t _length, 3840 const UnicodeString &srcText, 3841 uint32_t options) const { 3842 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); 3843 } 3844 3845 inline int8_t 3846 UnicodeString::caseCompare(const UChar *srcChars, 3847 int32_t srcLength, 3848 uint32_t options) const { 3849 return doCaseCompare(0, length(), srcChars, 0, srcLength, options); 3850 } 3851 3852 inline int8_t 3853 UnicodeString::caseCompare(int32_t start, 3854 int32_t _length, 3855 const UnicodeString &srcText, 3856 int32_t srcStart, 3857 int32_t srcLength, 3858 uint32_t options) const { 3859 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); 3860 } 3861 3862 inline int8_t 3863 UnicodeString::caseCompare(int32_t start, 3864 int32_t _length, 3865 const UChar *srcChars, 3866 uint32_t options) const { 3867 return doCaseCompare(start, _length, srcChars, 0, _length, options); 3868 } 3869 3870 inline int8_t 3871 UnicodeString::caseCompare(int32_t start, 3872 int32_t _length, 3873 const UChar *srcChars, 3874 int32_t srcStart, 3875 int32_t srcLength, 3876 uint32_t options) const { 3877 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); 3878 } 3879 3880 inline int8_t 3881 UnicodeString::caseCompareBetween(int32_t start, 3882 int32_t limit, 3883 const UnicodeString &srcText, 3884 int32_t srcStart, 3885 int32_t srcLimit, 3886 uint32_t options) const { 3887 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); 3888 } 3889 3890 inline int32_t 3891 UnicodeString::indexOf(const UnicodeString& srcText, 3892 int32_t srcStart, 3893 int32_t srcLength, 3894 int32_t start, 3895 int32_t _length) const 3896 { 3897 if(!srcText.isBogus()) { 3898 srcText.pinIndices(srcStart, srcLength); 3899 if(srcLength > 0) { 3900 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 3901 } 3902 } 3903 return -1; 3904 } 3905 3906 inline int32_t 3907 UnicodeString::indexOf(const UnicodeString& text) const 3908 { return indexOf(text, 0, text.length(), 0, length()); } 3909 3910 inline int32_t 3911 UnicodeString::indexOf(const UnicodeString& text, 3912 int32_t start) const { 3913 pinIndex(start); 3914 return indexOf(text, 0, text.length(), start, length() - start); 3915 } 3916 3917 inline int32_t 3918 UnicodeString::indexOf(const UnicodeString& text, 3919 int32_t start, 3920 int32_t _length) const 3921 { return indexOf(text, 0, text.length(), start, _length); } 3922 3923 inline int32_t 3924 UnicodeString::indexOf(const UChar *srcChars, 3925 int32_t srcLength, 3926 int32_t start) const { 3927 pinIndex(start); 3928 return indexOf(srcChars, 0, srcLength, start, length() - start); 3929 } 3930 3931 inline int32_t 3932 UnicodeString::indexOf(const UChar *srcChars, 3933 int32_t srcLength, 3934 int32_t start, 3935 int32_t _length) const 3936 { return indexOf(srcChars, 0, srcLength, start, _length); } 3937 3938 inline int32_t 3939 UnicodeString::indexOf(UChar c, 3940 int32_t start, 3941 int32_t _length) const 3942 { return doIndexOf(c, start, _length); } 3943 3944 inline int32_t 3945 UnicodeString::indexOf(UChar32 c, 3946 int32_t start, 3947 int32_t _length) const 3948 { return doIndexOf(c, start, _length); } 3949 3950 inline int32_t 3951 UnicodeString::indexOf(UChar c) const 3952 { return doIndexOf(c, 0, length()); } 3953 3954 inline int32_t 3955 UnicodeString::indexOf(UChar32 c) const 3956 { return indexOf(c, 0, length()); } 3957 3958 inline int32_t 3959 UnicodeString::indexOf(UChar c, 3960 int32_t start) const { 3961 pinIndex(start); 3962 return doIndexOf(c, start, length() - start); 3963 } 3964 3965 inline int32_t 3966 UnicodeString::indexOf(UChar32 c, 3967 int32_t start) const { 3968 pinIndex(start); 3969 return indexOf(c, start, length() - start); 3970 } 3971 3972 inline int32_t 3973 UnicodeString::lastIndexOf(const UChar *srcChars, 3974 int32_t srcLength, 3975 int32_t start, 3976 int32_t _length) const 3977 { return lastIndexOf(srcChars, 0, srcLength, start, _length); } 3978 3979 inline int32_t 3980 UnicodeString::lastIndexOf(const UChar *srcChars, 3981 int32_t srcLength, 3982 int32_t start) const { 3983 pinIndex(start); 3984 return lastIndexOf(srcChars, 0, srcLength, start, length() - start); 3985 } 3986 3987 inline int32_t 3988 UnicodeString::lastIndexOf(const UnicodeString& srcText, 3989 int32_t srcStart, 3990 int32_t srcLength, 3991 int32_t start, 3992 int32_t _length) const 3993 { 3994 if(!srcText.isBogus()) { 3995 srcText.pinIndices(srcStart, srcLength); 3996 if(srcLength > 0) { 3997 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 3998 } 3999 } 4000 return -1; 4001 } 4002 4003 inline int32_t 4004 UnicodeString::lastIndexOf(const UnicodeString& text, 4005 int32_t start, 4006 int32_t _length) const 4007 { return lastIndexOf(text, 0, text.length(), start, _length); } 4008 4009 inline int32_t 4010 UnicodeString::lastIndexOf(const UnicodeString& text, 4011 int32_t start) const { 4012 pinIndex(start); 4013 return lastIndexOf(text, 0, text.length(), start, length() - start); 4014 } 4015 4016 inline int32_t 4017 UnicodeString::lastIndexOf(const UnicodeString& text) const 4018 { return lastIndexOf(text, 0, text.length(), 0, length()); } 4019 4020 inline int32_t 4021 UnicodeString::lastIndexOf(UChar c, 4022 int32_t start, 4023 int32_t _length) const 4024 { return doLastIndexOf(c, start, _length); } 4025 4026 inline int32_t 4027 UnicodeString::lastIndexOf(UChar32 c, 4028 int32_t start, 4029 int32_t _length) const { 4030 return doLastIndexOf(c, start, _length); 4031 } 4032 4033 inline int32_t 4034 UnicodeString::lastIndexOf(UChar c) const 4035 { return doLastIndexOf(c, 0, length()); } 4036 4037 inline int32_t 4038 UnicodeString::lastIndexOf(UChar32 c) const { 4039 return lastIndexOf(c, 0, length()); 4040 } 4041 4042 inline int32_t 4043 UnicodeString::lastIndexOf(UChar c, 4044 int32_t start) const { 4045 pinIndex(start); 4046 return doLastIndexOf(c, start, length() - start); 4047 } 4048 4049 inline int32_t 4050 UnicodeString::lastIndexOf(UChar32 c, 4051 int32_t start) const { 4052 pinIndex(start); 4053 return lastIndexOf(c, start, length() - start); 4054 } 4055 4056 inline UBool 4057 UnicodeString::startsWith(const UnicodeString& text) const 4058 { return compare(0, text.length(), text, 0, text.length()) == 0; } 4059 4060 inline UBool 4061 UnicodeString::startsWith(const UnicodeString& srcText, 4062 int32_t srcStart, 4063 int32_t srcLength) const 4064 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } 4065 4066 inline UBool 4067 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const { 4068 if(srcLength < 0) { 4069 srcLength = u_strlen(srcChars); 4070 } 4071 return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; 4072 } 4073 4074 inline UBool 4075 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const { 4076 if(srcLength < 0) { 4077 srcLength = u_strlen(srcChars); 4078 } 4079 return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; 4080 } 4081 4082 inline UBool 4083 UnicodeString::endsWith(const UnicodeString& text) const 4084 { return doCompare(length() - text.length(), text.length(), 4085 text, 0, text.length()) == 0; } 4086 4087 inline UBool 4088 UnicodeString::endsWith(const UnicodeString& srcText, 4089 int32_t srcStart, 4090 int32_t srcLength) const { 4091 srcText.pinIndices(srcStart, srcLength); 4092 return doCompare(length() - srcLength, srcLength, 4093 srcText, srcStart, srcLength) == 0; 4094 } 4095 4096 inline UBool 4097 UnicodeString::endsWith(const UChar *srcChars, 4098 int32_t srcLength) const { 4099 if(srcLength < 0) { 4100 srcLength = u_strlen(srcChars); 4101 } 4102 return doCompare(length() - srcLength, srcLength, 4103 srcChars, 0, srcLength) == 0; 4104 } 4105 4106 inline UBool 4107 UnicodeString::endsWith(const UChar *srcChars, 4108 int32_t srcStart, 4109 int32_t srcLength) const { 4110 if(srcLength < 0) { 4111 srcLength = u_strlen(srcChars + srcStart); 4112 } 4113 return doCompare(length() - srcLength, srcLength, 4114 srcChars, srcStart, srcLength) == 0; 4115 } 4116 4117 //======================================== 4118 // replace 4119 //======================================== 4120 inline UnicodeString& 4121 UnicodeString::replace(int32_t start, 4122 int32_t _length, 4123 const UnicodeString& srcText) 4124 { return doReplace(start, _length, srcText, 0, srcText.length()); } 4125 4126 inline UnicodeString& 4127 UnicodeString::replace(int32_t start, 4128 int32_t _length, 4129 const UnicodeString& srcText, 4130 int32_t srcStart, 4131 int32_t srcLength) 4132 { return doReplace(start, _length, srcText, srcStart, srcLength); } 4133 4134 inline UnicodeString& 4135 UnicodeString::replace(int32_t start, 4136 int32_t _length, 4137 const UChar *srcChars, 4138 int32_t srcLength) 4139 { return doReplace(start, _length, srcChars, 0, srcLength); } 4140 4141 inline UnicodeString& 4142 UnicodeString::replace(int32_t start, 4143 int32_t _length, 4144 const UChar *srcChars, 4145 int32_t srcStart, 4146 int32_t srcLength) 4147 { return doReplace(start, _length, srcChars, srcStart, srcLength); } 4148 4149 inline UnicodeString& 4150 UnicodeString::replace(int32_t start, 4151 int32_t _length, 4152 UChar srcChar) 4153 { return doReplace(start, _length, &srcChar, 0, 1); } 4154 4155 inline UnicodeString& 4156 UnicodeString::replaceBetween(int32_t start, 4157 int32_t limit, 4158 const UnicodeString& srcText) 4159 { return doReplace(start, limit - start, srcText, 0, srcText.length()); } 4160 4161 inline UnicodeString& 4162 UnicodeString::replaceBetween(int32_t start, 4163 int32_t limit, 4164 const UnicodeString& srcText, 4165 int32_t srcStart, 4166 int32_t srcLimit) 4167 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } 4168 4169 inline UnicodeString& 4170 UnicodeString::findAndReplace(const UnicodeString& oldText, 4171 const UnicodeString& newText) 4172 { return findAndReplace(0, length(), oldText, 0, oldText.length(), 4173 newText, 0, newText.length()); } 4174 4175 inline UnicodeString& 4176 UnicodeString::findAndReplace(int32_t start, 4177 int32_t _length, 4178 const UnicodeString& oldText, 4179 const UnicodeString& newText) 4180 { return findAndReplace(start, _length, oldText, 0, oldText.length(), 4181 newText, 0, newText.length()); } 4182 4183 // ============================ 4184 // extract 4185 // ============================ 4186 inline void 4187 UnicodeString::doExtract(int32_t start, 4188 int32_t _length, 4189 UnicodeString& target) const 4190 { target.replace(0, target.length(), *this, start, _length); } 4191 4192 inline void 4193 UnicodeString::extract(int32_t start, 4194 int32_t _length, 4195 UChar *target, 4196 int32_t targetStart) const 4197 { doExtract(start, _length, target, targetStart); } 4198 4199 inline void 4200 UnicodeString::extract(int32_t start, 4201 int32_t _length, 4202 UnicodeString& target) const 4203 { doExtract(start, _length, target); } 4204 4205 #if !UCONFIG_NO_CONVERSION 4206 4207 inline int32_t 4208 UnicodeString::extract(int32_t start, 4209 int32_t _length, 4210 char *dst, 4211 const char *codepage) const 4212 4213 { 4214 // This dstSize value will be checked explicitly 4215 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); 4216 } 4217 4218 #endif 4219 4220 inline void 4221 UnicodeString::extractBetween(int32_t start, 4222 int32_t limit, 4223 UChar *dst, 4224 int32_t dstStart) const { 4225 pinIndex(start); 4226 pinIndex(limit); 4227 doExtract(start, limit - start, dst, dstStart); 4228 } 4229 4230 inline UnicodeString 4231 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { 4232 return tempSubString(start, limit - start); 4233 } 4234 4235 inline UChar 4236 UnicodeString::doCharAt(int32_t offset) const 4237 { 4238 if((uint32_t)offset < (uint32_t)length()) { 4239 return getArrayStart()[offset]; 4240 } else { 4241 return kInvalidUChar; 4242 } 4243 } 4244 4245 inline UChar 4246 UnicodeString::charAt(int32_t offset) const 4247 { return doCharAt(offset); } 4248 4249 inline UChar 4250 UnicodeString::operator[] (int32_t offset) const 4251 { return doCharAt(offset); } 4252 4253 inline UBool 4254 UnicodeString::isEmpty() const { 4255 return fShortLength == 0; 4256 } 4257 4258 //======================================== 4259 // Write implementation methods 4260 //======================================== 4261 inline void 4262 UnicodeString::setLength(int32_t len) { 4263 if(len <= 127) { 4264 fShortLength = (int8_t)len; 4265 } else { 4266 fShortLength = (int8_t)-1; 4267 fUnion.fFields.fLength = len; 4268 } 4269 } 4270 4271 inline void 4272 UnicodeString::setToEmpty() { 4273 fShortLength = 0; 4274 fFlags = kShortString; 4275 } 4276 4277 inline void 4278 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { 4279 setLength(len); 4280 fUnion.fFields.fArray = array; 4281 fUnion.fFields.fCapacity = capacity; 4282 } 4283 4284 inline const UChar * 4285 UnicodeString::getTerminatedBuffer() { 4286 if(!isWritable()) { 4287 return 0; 4288 } else { 4289 UChar *array = getArrayStart(); 4290 int32_t len = length(); 4291 if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) { 4292 /* 4293 * kRefCounted: Do not write the NUL if the buffer is shared. 4294 * That is mostly safe, except when the length of one copy was modified 4295 * without copy-on-write, e.g., via truncate(newLength) or remove(void). 4296 * Then the NUL would be written into the middle of another copy's string. 4297 */ 4298 if(!(fFlags&kBufferIsReadonly)) { 4299 /* 4300 * We must not write to a readonly buffer, but it is known to be 4301 * NUL-terminated if len<capacity. 4302 * A shared, allocated buffer (refCount()>1) must not have its contents 4303 * modified, but the NUL at [len] is beyond the string contents, 4304 * and multiple string objects and threads writing the same NUL into the 4305 * same location is harmless. 4306 * In all other cases, the buffer is fully writable and it is anyway safe 4307 * to write the NUL. 4308 * 4309 * Note: An earlier version of this code tested whether there is a NUL 4310 * at [len] already, but, while safe, it generated lots of warnings from 4311 * tools like valgrind and Purify. 4312 */ 4313 array[len] = 0; 4314 } 4315 return array; 4316 } else if(cloneArrayIfNeeded(len+1)) { 4317 array = getArrayStart(); 4318 array[len] = 0; 4319 return array; 4320 } else { 4321 return 0; 4322 } 4323 } 4324 } 4325 4326 inline UnicodeString& 4327 UnicodeString::operator= (UChar ch) 4328 { return doReplace(0, length(), &ch, 0, 1); } 4329 4330 inline UnicodeString& 4331 UnicodeString::operator= (UChar32 ch) 4332 { return replace(0, length(), ch); } 4333 4334 inline UnicodeString& 4335 UnicodeString::setTo(const UnicodeString& srcText, 4336 int32_t srcStart, 4337 int32_t srcLength) 4338 { 4339 unBogus(); 4340 return doReplace(0, length(), srcText, srcStart, srcLength); 4341 } 4342 4343 inline UnicodeString& 4344 UnicodeString::setTo(const UnicodeString& srcText, 4345 int32_t srcStart) 4346 { 4347 unBogus(); 4348 srcText.pinIndex(srcStart); 4349 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); 4350 } 4351 4352 inline UnicodeString& 4353 UnicodeString::setTo(const UnicodeString& srcText) 4354 { 4355 return copyFrom(srcText); 4356 } 4357 4358 inline UnicodeString& 4359 UnicodeString::setTo(const UChar *srcChars, 4360 int32_t srcLength) 4361 { 4362 unBogus(); 4363 return doReplace(0, length(), srcChars, 0, srcLength); 4364 } 4365 4366 inline UnicodeString& 4367 UnicodeString::setTo(UChar srcChar) 4368 { 4369 unBogus(); 4370 return doReplace(0, length(), &srcChar, 0, 1); 4371 } 4372 4373 inline UnicodeString& 4374 UnicodeString::setTo(UChar32 srcChar) 4375 { 4376 unBogus(); 4377 return replace(0, length(), srcChar); 4378 } 4379 4380 inline UnicodeString& 4381 UnicodeString::append(const UnicodeString& srcText, 4382 int32_t srcStart, 4383 int32_t srcLength) 4384 { return doReplace(length(), 0, srcText, srcStart, srcLength); } 4385 4386 inline UnicodeString& 4387 UnicodeString::append(const UnicodeString& srcText) 4388 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 4389 4390 inline UnicodeString& 4391 UnicodeString::append(const UChar *srcChars, 4392 int32_t srcStart, 4393 int32_t srcLength) 4394 { return doReplace(length(), 0, srcChars, srcStart, srcLength); } 4395 4396 inline UnicodeString& 4397 UnicodeString::append(const UChar *srcChars, 4398 int32_t srcLength) 4399 { return doReplace(length(), 0, srcChars, 0, srcLength); } 4400 4401 inline UnicodeString& 4402 UnicodeString::append(UChar srcChar) 4403 { return doReplace(length(), 0, &srcChar, 0, 1); } 4404 4405 inline UnicodeString& 4406 UnicodeString::operator+= (UChar ch) 4407 { return doReplace(length(), 0, &ch, 0, 1); } 4408 4409 inline UnicodeString& 4410 UnicodeString::operator+= (UChar32 ch) { 4411 return append(ch); 4412 } 4413 4414 inline UnicodeString& 4415 UnicodeString::operator+= (const UnicodeString& srcText) 4416 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 4417 4418 inline UnicodeString& 4419 UnicodeString::insert(int32_t start, 4420 const UnicodeString& srcText, 4421 int32_t srcStart, 4422 int32_t srcLength) 4423 { return doReplace(start, 0, srcText, srcStart, srcLength); } 4424 4425 inline UnicodeString& 4426 UnicodeString::insert(int32_t start, 4427 const UnicodeString& srcText) 4428 { return doReplace(start, 0, srcText, 0, srcText.length()); } 4429 4430 inline UnicodeString& 4431 UnicodeString::insert(int32_t start, 4432 const UChar *srcChars, 4433 int32_t srcStart, 4434 int32_t srcLength) 4435 { return doReplace(start, 0, srcChars, srcStart, srcLength); } 4436 4437 inline UnicodeString& 4438 UnicodeString::insert(int32_t start, 4439 const UChar *srcChars, 4440 int32_t srcLength) 4441 { return doReplace(start, 0, srcChars, 0, srcLength); } 4442 4443 inline UnicodeString& 4444 UnicodeString::insert(int32_t start, 4445 UChar srcChar) 4446 { return doReplace(start, 0, &srcChar, 0, 1); } 4447 4448 inline UnicodeString& 4449 UnicodeString::insert(int32_t start, 4450 UChar32 srcChar) 4451 { return replace(start, 0, srcChar); } 4452 4453 4454 inline UnicodeString& 4455 UnicodeString::remove() 4456 { 4457 // remove() of a bogus string makes the string empty and non-bogus 4458 // we also un-alias a read-only alias to deal with NUL-termination 4459 // issues with getTerminatedBuffer() 4460 if(fFlags & (kIsBogus|kBufferIsReadonly)) { 4461 setToEmpty(); 4462 } else { 4463 fShortLength = 0; 4464 } 4465 return *this; 4466 } 4467 4468 inline UnicodeString& 4469 UnicodeString::remove(int32_t start, 4470 int32_t _length) 4471 { 4472 if(start <= 0 && _length == INT32_MAX) { 4473 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 4474 return remove(); 4475 } 4476 return doReplace(start, _length, NULL, 0, 0); 4477 } 4478 4479 inline UnicodeString& 4480 UnicodeString::removeBetween(int32_t start, 4481 int32_t limit) 4482 { return doReplace(start, limit - start, NULL, 0, 0); } 4483 4484 inline UnicodeString & 4485 UnicodeString::retainBetween(int32_t start, int32_t limit) { 4486 truncate(limit); 4487 return doReplace(0, start, NULL, 0, 0); 4488 } 4489 4490 inline UBool 4491 UnicodeString::truncate(int32_t targetLength) 4492 { 4493 if(isBogus() && targetLength == 0) { 4494 // truncate(0) of a bogus string makes the string empty and non-bogus 4495 unBogus(); 4496 return FALSE; 4497 } else if((uint32_t)targetLength < (uint32_t)length()) { 4498 setLength(targetLength); 4499 if(fFlags&kBufferIsReadonly) { 4500 fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more 4501 } 4502 return TRUE; 4503 } else { 4504 return FALSE; 4505 } 4506 } 4507 4508 inline UnicodeString& 4509 UnicodeString::reverse() 4510 { return doReverse(0, length()); } 4511 4512 inline UnicodeString& 4513 UnicodeString::reverse(int32_t start, 4514 int32_t _length) 4515 { return doReverse(start, _length); } 4516 4517 U_NAMESPACE_END 4518 4519 #endif 4520