1 /* 2 ********************************************************************** 3 * Copyright (C) 1998-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * 7 * File unistr.h 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 09/25/98 stephen Creation. 13 * 11/11/98 stephen Changed per 11/9 code review. 14 * 04/20/99 stephen Overhauled per 4/16 code review. 15 * 11/18/99 aliu Made to inherit from Replaceable. Added method 16 * handleReplaceBetween(); other methods unchanged. 17 * 06/25/01 grhoten Remove dependency on iostream. 18 ****************************************************************************** 19 */ 20 21 #ifndef UNISTR_H 22 #define UNISTR_H 23 24 /** 25 * \file 26 * \brief C++ API: Unicode String 27 */ 28 29 #include "unicode/utypes.h" 30 #include "unicode/rep.h" 31 #include "unicode/std_string.h" 32 #include "unicode/stringpiece.h" 33 #include "unicode/bytestream.h" 34 #include "unicode/ucasemap.h" 35 36 struct UConverter; // unicode/ucnv.h 37 class StringThreadTest; 38 39 #ifndef U_COMPARE_CODE_POINT_ORDER 40 /* see also ustring.h and unorm.h */ 41 /** 42 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: 43 * Compare strings in code point order instead of code unit order. 44 * @stable ICU 2.2 45 */ 46 #define U_COMPARE_CODE_POINT_ORDER 0x8000 47 #endif 48 49 #ifndef USTRING_H 50 /** 51 * \ingroup ustring_ustrlen 52 */ 53 U_STABLE int32_t U_EXPORT2 54 u_strlen(const UChar *s); 55 #endif 56 57 /** 58 * \def U_STRING_CASE_MAPPER_DEFINED 59 * @internal 60 */ 61 #ifndef U_STRING_CASE_MAPPER_DEFINED 62 #define U_STRING_CASE_MAPPER_DEFINED 63 64 /** 65 * Internal string case mapping function type. 66 * @internal 67 */ 68 typedef int32_t U_CALLCONV 69 UStringCaseMapper(const UCaseMap *csm, 70 UChar *dest, int32_t destCapacity, 71 const UChar *src, int32_t srcLength, 72 UErrorCode *pErrorCode); 73 74 #endif 75 76 U_NAMESPACE_BEGIN 77 78 class BreakIterator; // unicode/brkiter.h 79 class Locale; // unicode/locid.h 80 class StringCharacterIterator; 81 class UnicodeStringAppendable; // unicode/appendable.h 82 83 /* The <iostream> include has been moved to unicode/ustream.h */ 84 85 /** 86 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 87 * which constructs a Unicode string from an invariant-character char * string. 88 * About invariant characters see utypes.h. 89 * This constructor has no runtime dependency on conversion code and is 90 * therefore recommended over ones taking a charset name string 91 * (where the empty string "" indicates invariant-character conversion). 92 * 93 * @stable ICU 3.2 94 */ 95 #define US_INV icu::UnicodeString::kInvariant 96 97 /** 98 * Unicode String literals in C++. 99 * Dependent on the platform properties, different UnicodeString 100 * constructors should be used to create a UnicodeString object from 101 * a string literal. 102 * The macros are defined for maximum performance. 103 * They work only for strings that contain "invariant characters", i.e., 104 * only latin letters, digits, and some punctuation. 105 * See utypes.h for details. 106 * 107 * The string parameter must be a C string literal. 108 * The length of the string, not including the terminating 109 * <code>NUL</code>, must be specified as a constant. 110 * The U_STRING_DECL macro should be invoked exactly once for one 111 * such string variable before it is used. 112 * @stable ICU 2.0 113 */ 114 #if defined(U_DECLARE_UTF16) 115 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) 116 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) 117 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length) 118 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY 119 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length) 120 #else 121 # define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV) 122 #endif 123 124 /** 125 * Unicode String literals in C++. 126 * Dependent on the platform properties, different UnicodeString 127 * constructors should be used to create a UnicodeString object from 128 * a string literal. 129 * The macros are defined for improved performance. 130 * They work only for strings that contain "invariant characters", i.e., 131 * only latin letters, digits, and some punctuation. 132 * See utypes.h for details. 133 * 134 * The string parameter must be a C string literal. 135 * @stable ICU 2.0 136 */ 137 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 138 139 /** 140 * \def UNISTR_FROM_CHAR_EXPLICIT 141 * This can be defined to be empty or "explicit". 142 * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32) 143 * constructors are marked as explicit, preventing their inadvertent use. 144 * @stable ICU 49 145 */ 146 #ifndef UNISTR_FROM_CHAR_EXPLICIT 147 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 148 // Auto-"explicit" in ICU library code. 149 # define UNISTR_FROM_CHAR_EXPLICIT explicit 150 # else 151 // Empty by default for source code compatibility. 152 # define UNISTR_FROM_CHAR_EXPLICIT 153 # endif 154 #endif 155 156 /** 157 * \def UNISTR_FROM_STRING_EXPLICIT 158 * This can be defined to be empty or "explicit". 159 * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *) 160 * constructors are marked as explicit, preventing their inadvertent use. 161 * 162 * In particular, this helps prevent accidentally depending on ICU conversion code 163 * by passing a string literal into an API with a const UnicodeString & parameter. 164 * @stable ICU 49 165 */ 166 #ifndef UNISTR_FROM_STRING_EXPLICIT 167 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 168 // Auto-"explicit" in ICU library code. 169 # define UNISTR_FROM_STRING_EXPLICIT explicit 170 # else 171 // Empty by default for source code compatibility. 172 # define UNISTR_FROM_STRING_EXPLICIT 173 # endif 174 #endif 175 176 /** 177 * UnicodeString is a string class that stores Unicode characters directly and provides 178 * similar functionality as the Java String and StringBuffer classes. 179 * It is a concrete implementation of the abstract class Replaceable (for transliteration). 180 * 181 * The UnicodeString class is not suitable for subclassing. 182 * 183 * <p>For an overview of Unicode strings in C and C++ see the 184 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p> 185 * 186 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>. 187 * A Unicode character may be stored with either one code unit 188 * (the most common case) or with a matched pair of special code units 189 * ("surrogates"). The data type for code units is UChar. 190 * For single-character handling, a Unicode character code <em>point</em> is a value 191 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p> 192 * 193 * <p>Indexes and offsets into and lengths of strings always count code units, not code points. 194 * This is the same as with multi-byte char* strings in traditional string handling. 195 * Operations on partial strings typically do not test for code point boundaries. 196 * If necessary, the user needs to take care of such boundaries by testing for the code unit 197 * values or by using functions like 198 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() 199 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p> 200 * 201 * UnicodeString methods are more lenient with regard to input parameter values 202 * than other ICU APIs. In particular: 203 * - If indexes are out of bounds for a UnicodeString object 204 * (<0 or >length()) then they are "pinned" to the nearest boundary. 205 * - If primitive string pointer values (e.g., const UChar * or char *) 206 * for input strings are NULL, then those input string parameters are treated 207 * as if they pointed to an empty string. 208 * However, this is <em>not</em> the case for char * parameters for charset names 209 * or other IDs. 210 * - Most UnicodeString methods do not take a UErrorCode parameter because 211 * there are usually very few opportunities for failure other than a shortage 212 * of memory, error codes in low-level C++ string methods would be inconvenient, 213 * and the error code as the last parameter (ICU convention) would prevent 214 * the use of default parameter values. 215 * Instead, such methods set the UnicodeString into a "bogus" state 216 * (see isBogus()) if an error occurs. 217 * 218 * In string comparisons, two UnicodeString objects that are both "bogus" 219 * compare equal (to be transitive and prevent endless loops in sorting), 220 * and a "bogus" string compares less than any non-"bogus" one. 221 * 222 * Const UnicodeString methods are thread-safe. Multiple threads can use 223 * const methods on the same UnicodeString object simultaneously, 224 * but non-const methods must not be called concurrently (in multiple threads) 225 * with any other (const or non-const) methods. 226 * 227 * Similarly, const UnicodeString & parameters are thread-safe. 228 * One object may be passed in as such a parameter concurrently in multiple threads. 229 * This includes the const UnicodeString & parameters for 230 * copy construction, assignment, and cloning. 231 * 232 * <p>UnicodeString uses several storage methods. 233 * String contents can be stored inside the UnicodeString object itself, 234 * in an allocated and shared buffer, or in an outside buffer that is "aliased". 235 * Most of this is done transparently, but careful aliasing in particular provides 236 * significant performance improvements. 237 * Also, the internal buffer is accessible via special functions. 238 * For details see the 239 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p> 240 * 241 * @see utf.h 242 * @see CharacterIterator 243 * @stable ICU 2.0 244 */ 245 class U_COMMON_API UnicodeString : public Replaceable 246 { 247 public: 248 249 /** 250 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 251 * which constructs a Unicode string from an invariant-character char * string. 252 * Use the macro US_INV instead of the full qualification for this value. 253 * 254 * @see US_INV 255 * @stable ICU 3.2 256 */ 257 enum EInvariant { 258 /** 259 * @see EInvariant 260 * @stable ICU 3.2 261 */ 262 kInvariant 263 }; 264 265 //======================================== 266 // Read-only operations 267 //======================================== 268 269 /* Comparison - bitwise only - for international comparison use collation */ 270 271 /** 272 * Equality operator. Performs only bitwise comparison. 273 * @param text The UnicodeString to compare to this one. 274 * @return TRUE if <TT>text</TT> contains the same characters as this one, 275 * FALSE otherwise. 276 * @stable ICU 2.0 277 */ 278 inline UBool operator== (const UnicodeString& text) const; 279 280 /** 281 * Inequality operator. Performs only bitwise comparison. 282 * @param text The UnicodeString to compare to this one. 283 * @return FALSE if <TT>text</TT> contains the same characters as this one, 284 * TRUE otherwise. 285 * @stable ICU 2.0 286 */ 287 inline UBool operator!= (const UnicodeString& text) const; 288 289 /** 290 * Greater than operator. Performs only bitwise comparison. 291 * @param text The UnicodeString to compare to this one. 292 * @return TRUE if the characters in this are bitwise 293 * greater than the characters in <code>text</code>, FALSE otherwise 294 * @stable ICU 2.0 295 */ 296 inline UBool operator> (const UnicodeString& text) const; 297 298 /** 299 * Less than operator. Performs only bitwise comparison. 300 * @param text The UnicodeString to compare to this one. 301 * @return TRUE if the characters in this are bitwise 302 * less than the characters in <code>text</code>, FALSE otherwise 303 * @stable ICU 2.0 304 */ 305 inline UBool operator< (const UnicodeString& text) const; 306 307 /** 308 * Greater than or equal operator. Performs only bitwise comparison. 309 * @param text The UnicodeString to compare to this one. 310 * @return TRUE if the characters in this are bitwise 311 * greater than or equal to the characters in <code>text</code>, FALSE otherwise 312 * @stable ICU 2.0 313 */ 314 inline UBool operator>= (const UnicodeString& text) const; 315 316 /** 317 * Less than or equal operator. Performs only bitwise comparison. 318 * @param text The UnicodeString to compare to this one. 319 * @return TRUE if the characters in this are bitwise 320 * less than or equal to the characters in <code>text</code>, FALSE otherwise 321 * @stable ICU 2.0 322 */ 323 inline UBool operator<= (const UnicodeString& text) const; 324 325 /** 326 * Compare the characters bitwise in this UnicodeString to 327 * the characters in <code>text</code>. 328 * @param text The UnicodeString to compare to this one. 329 * @return The result of bitwise character comparison: 0 if this 330 * contains the same characters as <code>text</code>, -1 if the characters in 331 * this are bitwise less than the characters in <code>text</code>, +1 if the 332 * characters in this are bitwise greater than the characters 333 * in <code>text</code>. 334 * @stable ICU 2.0 335 */ 336 inline int8_t compare(const UnicodeString& text) const; 337 338 /** 339 * Compare the characters bitwise in the range 340 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 341 * in the <b>entire string</b> <TT>text</TT>. 342 * (The parameters "start" and "length" are not applied to the other text "text".) 343 * @param start the offset at which the compare operation begins 344 * @param length the number of characters of text to compare. 345 * @param text the other text to be compared against this string. 346 * @return The result of bitwise character comparison: 0 if this 347 * contains the same characters as <code>text</code>, -1 if the characters in 348 * this are bitwise less than the characters in <code>text</code>, +1 if the 349 * characters in this are bitwise greater than the characters 350 * in <code>text</code>. 351 * @stable ICU 2.0 352 */ 353 inline int8_t compare(int32_t start, 354 int32_t length, 355 const UnicodeString& text) const; 356 357 /** 358 * Compare the characters bitwise in the range 359 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 360 * in <TT>srcText</TT> in the range 361 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 362 * @param start the offset at which the compare operation begins 363 * @param length the number of characters in this to compare. 364 * @param srcText the text to be compared 365 * @param srcStart the offset into <TT>srcText</TT> to start comparison 366 * @param srcLength the number of characters in <TT>src</TT> to compare 367 * @return The result of bitwise character comparison: 0 if this 368 * contains the same characters as <code>srcText</code>, -1 if the characters in 369 * this are bitwise less than the characters in <code>srcText</code>, +1 if the 370 * characters in this are bitwise greater than the characters 371 * in <code>srcText</code>. 372 * @stable ICU 2.0 373 */ 374 inline int8_t compare(int32_t start, 375 int32_t length, 376 const UnicodeString& srcText, 377 int32_t srcStart, 378 int32_t srcLength) const; 379 380 /** 381 * Compare the characters bitwise in this UnicodeString with the first 382 * <TT>srcLength</TT> characters in <TT>srcChars</TT>. 383 * @param srcChars The characters to compare to this UnicodeString. 384 * @param srcLength the number of characters in <TT>srcChars</TT> to compare 385 * @return The result of bitwise character comparison: 0 if this 386 * contains the same characters as <code>srcChars</code>, -1 if the characters in 387 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 388 * characters in this are bitwise greater than the characters 389 * in <code>srcChars</code>. 390 * @stable ICU 2.0 391 */ 392 inline int8_t compare(const UChar *srcChars, 393 int32_t srcLength) const; 394 395 /** 396 * Compare the characters bitwise in the range 397 * [<TT>start</TT>, <TT>start + length</TT>) with the first 398 * <TT>length</TT> characters in <TT>srcChars</TT> 399 * @param start the offset at which the compare operation begins 400 * @param length the number of characters to compare. 401 * @param srcChars the characters to be compared 402 * @return The result of bitwise character comparison: 0 if this 403 * contains the same characters as <code>srcChars</code>, -1 if the characters in 404 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 405 * characters in this are bitwise greater than the characters 406 * in <code>srcChars</code>. 407 * @stable ICU 2.0 408 */ 409 inline int8_t compare(int32_t start, 410 int32_t length, 411 const UChar *srcChars) const; 412 413 /** 414 * Compare the characters bitwise in the range 415 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 416 * in <TT>srcChars</TT> in the range 417 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 418 * @param start the offset at which the compare operation begins 419 * @param length the number of characters in this to compare 420 * @param srcChars the characters to be compared 421 * @param srcStart the offset into <TT>srcChars</TT> to start comparison 422 * @param srcLength the number of characters in <TT>srcChars</TT> to compare 423 * @return The result of bitwise character comparison: 0 if this 424 * contains the same characters as <code>srcChars</code>, -1 if the characters in 425 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 426 * characters in this are bitwise greater than the characters 427 * in <code>srcChars</code>. 428 * @stable ICU 2.0 429 */ 430 inline int8_t compare(int32_t start, 431 int32_t length, 432 const UChar *srcChars, 433 int32_t srcStart, 434 int32_t srcLength) const; 435 436 /** 437 * Compare the characters bitwise in the range 438 * [<TT>start</TT>, <TT>limit</TT>) with the characters 439 * in <TT>srcText</TT> in the range 440 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). 441 * @param start the offset at which the compare operation begins 442 * @param limit the offset immediately following the compare operation 443 * @param srcText the text to be compared 444 * @param srcStart the offset into <TT>srcText</TT> to start comparison 445 * @param srcLimit the offset into <TT>srcText</TT> to limit comparison 446 * @return The result of bitwise character comparison: 0 if this 447 * contains the same characters as <code>srcText</code>, -1 if the characters in 448 * this are bitwise less than the characters in <code>srcText</code>, +1 if the 449 * characters in this are bitwise greater than the characters 450 * in <code>srcText</code>. 451 * @stable ICU 2.0 452 */ 453 inline int8_t compareBetween(int32_t start, 454 int32_t limit, 455 const UnicodeString& srcText, 456 int32_t srcStart, 457 int32_t srcLimit) const; 458 459 /** 460 * Compare two Unicode strings in code point order. 461 * The result may be different from the results of compare(), operator<, etc. 462 * if supplementary characters are present: 463 * 464 * In UTF-16, supplementary characters (with code points U+10000 and above) are 465 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 466 * which means that they compare as less than some other BMP characters like U+feff. 467 * This function compares Unicode strings in code point order. 468 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 469 * 470 * @param text Another string to compare this one to. 471 * @return a negative/zero/positive integer corresponding to whether 472 * this string is less than/equal to/greater than the second one 473 * in code point order 474 * @stable ICU 2.0 475 */ 476 inline int8_t compareCodePointOrder(const UnicodeString& text) const; 477 478 /** 479 * Compare two Unicode strings in code point order. 480 * The result may be different from the results of compare(), operator<, etc. 481 * if supplementary characters are present: 482 * 483 * In UTF-16, supplementary characters (with code points U+10000 and above) are 484 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 485 * which means that they compare as less than some other BMP characters like U+feff. 486 * This function compares Unicode strings in code point order. 487 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 488 * 489 * @param start The start offset in this string at which the compare operation begins. 490 * @param length The number of code units from this string to compare. 491 * @param srcText Another string to compare this one to. 492 * @return a negative/zero/positive integer corresponding to whether 493 * this string is less than/equal to/greater than the second one 494 * in code point order 495 * @stable ICU 2.0 496 */ 497 inline int8_t compareCodePointOrder(int32_t start, 498 int32_t length, 499 const UnicodeString& srcText) const; 500 501 /** 502 * Compare two Unicode strings in code point order. 503 * The result may be different from the results of compare(), operator<, etc. 504 * if supplementary characters are present: 505 * 506 * In UTF-16, supplementary characters (with code points U+10000 and above) are 507 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 508 * which means that they compare as less than some other BMP characters like U+feff. 509 * This function compares Unicode strings in code point order. 510 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 511 * 512 * @param start The start offset in this string at which the compare operation begins. 513 * @param length The number of code units from this string to compare. 514 * @param srcText Another string to compare this one to. 515 * @param srcStart The start offset in that string at which the compare operation begins. 516 * @param srcLength The number of code units from that string to compare. 517 * @return a negative/zero/positive integer corresponding to whether 518 * this string is less than/equal to/greater than the second one 519 * in code point order 520 * @stable ICU 2.0 521 */ 522 inline int8_t compareCodePointOrder(int32_t start, 523 int32_t length, 524 const UnicodeString& srcText, 525 int32_t srcStart, 526 int32_t srcLength) const; 527 528 /** 529 * Compare two Unicode strings in code point order. 530 * The result may be different from the results of compare(), operator<, etc. 531 * if supplementary characters are present: 532 * 533 * In UTF-16, supplementary characters (with code points U+10000 and above) are 534 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 535 * which means that they compare as less than some other BMP characters like U+feff. 536 * This function compares Unicode strings in code point order. 537 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 538 * 539 * @param srcChars A pointer to another string to compare this one to. 540 * @param srcLength The number of code units from that string to compare. 541 * @return a negative/zero/positive integer corresponding to whether 542 * this string is less than/equal to/greater than the second one 543 * in code point order 544 * @stable ICU 2.0 545 */ 546 inline int8_t compareCodePointOrder(const UChar *srcChars, 547 int32_t srcLength) const; 548 549 /** 550 * Compare two Unicode strings in code point order. 551 * The result may be different from the results of compare(), operator<, etc. 552 * if supplementary characters are present: 553 * 554 * In UTF-16, supplementary characters (with code points U+10000 and above) are 555 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 556 * which means that they compare as less than some other BMP characters like U+feff. 557 * This function compares Unicode strings in code point order. 558 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 559 * 560 * @param start The start offset in this string at which the compare operation begins. 561 * @param length The number of code units from this string to compare. 562 * @param srcChars A pointer to another string to compare this one to. 563 * @return a negative/zero/positive integer corresponding to whether 564 * this string is less than/equal to/greater than the second one 565 * in code point order 566 * @stable ICU 2.0 567 */ 568 inline int8_t compareCodePointOrder(int32_t start, 569 int32_t length, 570 const UChar *srcChars) const; 571 572 /** 573 * Compare two Unicode strings in code point order. 574 * The result may be different from the results of compare(), operator<, etc. 575 * if supplementary characters are present: 576 * 577 * In UTF-16, supplementary characters (with code points U+10000 and above) are 578 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 579 * which means that they compare as less than some other BMP characters like U+feff. 580 * This function compares Unicode strings in code point order. 581 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 582 * 583 * @param start The start offset in this string at which the compare operation begins. 584 * @param length The number of code units from this string to compare. 585 * @param srcChars A pointer to another string to compare this one to. 586 * @param srcStart The start offset in that string at which the compare operation begins. 587 * @param srcLength The number of code units from that string to compare. 588 * @return a negative/zero/positive integer corresponding to whether 589 * this string is less than/equal to/greater than the second one 590 * in code point order 591 * @stable ICU 2.0 592 */ 593 inline int8_t compareCodePointOrder(int32_t start, 594 int32_t length, 595 const UChar *srcChars, 596 int32_t srcStart, 597 int32_t srcLength) const; 598 599 /** 600 * Compare two Unicode strings in code point order. 601 * The result may be different from the results of compare(), operator<, etc. 602 * if supplementary characters are present: 603 * 604 * In UTF-16, supplementary characters (with code points U+10000 and above) are 605 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 606 * which means that they compare as less than some other BMP characters like U+feff. 607 * This function compares Unicode strings in code point order. 608 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 609 * 610 * @param start The start offset in this string at which the compare operation begins. 611 * @param limit The offset after the last code unit from this string to compare. 612 * @param srcText Another string to compare this one to. 613 * @param srcStart The start offset in that string at which the compare operation begins. 614 * @param srcLimit The offset after the last code unit from that string to compare. 615 * @return a negative/zero/positive integer corresponding to whether 616 * this string is less than/equal to/greater than the second one 617 * in code point order 618 * @stable ICU 2.0 619 */ 620 inline int8_t compareCodePointOrderBetween(int32_t start, 621 int32_t limit, 622 const UnicodeString& srcText, 623 int32_t srcStart, 624 int32_t srcLimit) const; 625 626 /** 627 * Compare two strings case-insensitively using full case folding. 628 * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). 629 * 630 * @param text Another string to compare this one to. 631 * @param options A bit set of options: 632 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 633 * Comparison in code unit order with default case folding. 634 * 635 * - U_COMPARE_CODE_POINT_ORDER 636 * Set to choose code point order instead of code unit order 637 * (see u_strCompare for details). 638 * 639 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 640 * 641 * @return A negative, zero, or positive integer indicating the comparison result. 642 * @stable ICU 2.0 643 */ 644 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; 645 646 /** 647 * Compare two strings case-insensitively using full case folding. 648 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 649 * 650 * @param start The start offset in this string at which the compare operation begins. 651 * @param length The number of code units from this string to compare. 652 * @param srcText Another string to compare this one to. 653 * @param options A bit set of options: 654 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 655 * Comparison in code unit order with default case folding. 656 * 657 * - U_COMPARE_CODE_POINT_ORDER 658 * Set to choose code point order instead of code unit order 659 * (see u_strCompare for details). 660 * 661 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 662 * 663 * @return A negative, zero, or positive integer indicating the comparison result. 664 * @stable ICU 2.0 665 */ 666 inline int8_t caseCompare(int32_t start, 667 int32_t length, 668 const UnicodeString& srcText, 669 uint32_t options) const; 670 671 /** 672 * Compare two strings case-insensitively using full case folding. 673 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 674 * 675 * @param start The start offset in this string at which the compare operation begins. 676 * @param length The number of code units from this string to compare. 677 * @param srcText Another string to compare this one to. 678 * @param srcStart The start offset in that string at which the compare operation begins. 679 * @param srcLength The number of code units from that string to compare. 680 * @param options A bit set of options: 681 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 682 * Comparison in code unit order with default case folding. 683 * 684 * - U_COMPARE_CODE_POINT_ORDER 685 * Set to choose code point order instead of code unit order 686 * (see u_strCompare for details). 687 * 688 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 689 * 690 * @return A negative, zero, or positive integer indicating the comparison result. 691 * @stable ICU 2.0 692 */ 693 inline int8_t caseCompare(int32_t start, 694 int32_t length, 695 const UnicodeString& srcText, 696 int32_t srcStart, 697 int32_t srcLength, 698 uint32_t options) const; 699 700 /** 701 * Compare two strings case-insensitively using full case folding. 702 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 703 * 704 * @param srcChars A pointer to another string to compare this one to. 705 * @param srcLength The number of code units from that string to compare. 706 * @param options A bit set of options: 707 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 708 * Comparison in code unit order with default case folding. 709 * 710 * - U_COMPARE_CODE_POINT_ORDER 711 * Set to choose code point order instead of code unit order 712 * (see u_strCompare for details). 713 * 714 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 715 * 716 * @return A negative, zero, or positive integer indicating the comparison result. 717 * @stable ICU 2.0 718 */ 719 inline int8_t caseCompare(const UChar *srcChars, 720 int32_t srcLength, 721 uint32_t options) const; 722 723 /** 724 * Compare two strings case-insensitively using full case folding. 725 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 726 * 727 * @param start The start offset in this string at which the compare operation begins. 728 * @param length The number of code units from this string to compare. 729 * @param srcChars A pointer to another string to compare this one to. 730 * @param options A bit set of options: 731 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 732 * Comparison in code unit order with default case folding. 733 * 734 * - U_COMPARE_CODE_POINT_ORDER 735 * Set to choose code point order instead of code unit order 736 * (see u_strCompare for details). 737 * 738 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 739 * 740 * @return A negative, zero, or positive integer indicating the comparison result. 741 * @stable ICU 2.0 742 */ 743 inline int8_t caseCompare(int32_t start, 744 int32_t length, 745 const UChar *srcChars, 746 uint32_t options) const; 747 748 /** 749 * Compare two strings case-insensitively using full case folding. 750 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 751 * 752 * @param start The start offset in this string at which the compare operation begins. 753 * @param length The number of code units from this string to compare. 754 * @param srcChars A pointer to another string to compare this one to. 755 * @param srcStart The start offset in that string at which the compare operation begins. 756 * @param srcLength The number of code units from that string to compare. 757 * @param options A bit set of options: 758 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 759 * Comparison in code unit order with default case folding. 760 * 761 * - U_COMPARE_CODE_POINT_ORDER 762 * Set to choose code point order instead of code unit order 763 * (see u_strCompare for details). 764 * 765 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 766 * 767 * @return A negative, zero, or positive integer indicating the comparison result. 768 * @stable ICU 2.0 769 */ 770 inline int8_t caseCompare(int32_t start, 771 int32_t length, 772 const UChar *srcChars, 773 int32_t srcStart, 774 int32_t srcLength, 775 uint32_t options) const; 776 777 /** 778 * Compare two strings case-insensitively using full case folding. 779 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). 780 * 781 * @param start The start offset in this string at which the compare operation begins. 782 * @param limit The offset after the last code unit from this string to compare. 783 * @param srcText Another string to compare this one to. 784 * @param srcStart The start offset in that string at which the compare operation begins. 785 * @param srcLimit The offset after the last code unit from that string to compare. 786 * @param options A bit set of options: 787 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 788 * Comparison in code unit order with default case folding. 789 * 790 * - U_COMPARE_CODE_POINT_ORDER 791 * Set to choose code point order instead of code unit order 792 * (see u_strCompare for details). 793 * 794 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 795 * 796 * @return A negative, zero, or positive integer indicating the comparison result. 797 * @stable ICU 2.0 798 */ 799 inline int8_t caseCompareBetween(int32_t start, 800 int32_t limit, 801 const UnicodeString& srcText, 802 int32_t srcStart, 803 int32_t srcLimit, 804 uint32_t options) const; 805 806 /** 807 * Determine if this starts with the characters in <TT>text</TT> 808 * @param text The text to match. 809 * @return TRUE if this starts with the characters in <TT>text</TT>, 810 * FALSE otherwise 811 * @stable ICU 2.0 812 */ 813 inline UBool startsWith(const UnicodeString& text) const; 814 815 /** 816 * Determine if this starts with the characters in <TT>srcText</TT> 817 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 818 * @param srcText The text to match. 819 * @param srcStart the offset into <TT>srcText</TT> to start matching 820 * @param srcLength the number of characters in <TT>srcText</TT> to match 821 * @return TRUE if this starts with the characters in <TT>text</TT>, 822 * FALSE otherwise 823 * @stable ICU 2.0 824 */ 825 inline UBool startsWith(const UnicodeString& srcText, 826 int32_t srcStart, 827 int32_t srcLength) const; 828 829 /** 830 * Determine if this starts with the characters in <TT>srcChars</TT> 831 * @param srcChars The characters to match. 832 * @param srcLength the number of characters in <TT>srcChars</TT> 833 * @return TRUE if this starts with the characters in <TT>srcChars</TT>, 834 * FALSE otherwise 835 * @stable ICU 2.0 836 */ 837 inline UBool startsWith(const UChar *srcChars, 838 int32_t srcLength) const; 839 840 /** 841 * Determine if this ends with the characters in <TT>srcChars</TT> 842 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 843 * @param srcChars The characters to match. 844 * @param srcStart the offset into <TT>srcText</TT> to start matching 845 * @param srcLength the number of characters in <TT>srcChars</TT> to match 846 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise 847 * @stable ICU 2.0 848 */ 849 inline UBool startsWith(const UChar *srcChars, 850 int32_t srcStart, 851 int32_t srcLength) const; 852 853 /** 854 * Determine if this ends with the characters in <TT>text</TT> 855 * @param text The text to match. 856 * @return TRUE if this ends with the characters in <TT>text</TT>, 857 * FALSE otherwise 858 * @stable ICU 2.0 859 */ 860 inline UBool endsWith(const UnicodeString& text) const; 861 862 /** 863 * Determine if this ends with the characters in <TT>srcText</TT> 864 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 865 * @param srcText The text to match. 866 * @param srcStart the offset into <TT>srcText</TT> to start matching 867 * @param srcLength the number of characters in <TT>srcText</TT> to match 868 * @return TRUE if this ends with the characters in <TT>text</TT>, 869 * FALSE otherwise 870 * @stable ICU 2.0 871 */ 872 inline UBool endsWith(const UnicodeString& srcText, 873 int32_t srcStart, 874 int32_t srcLength) const; 875 876 /** 877 * Determine if this ends with the characters in <TT>srcChars</TT> 878 * @param srcChars The characters to match. 879 * @param srcLength the number of characters in <TT>srcChars</TT> 880 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 881 * FALSE otherwise 882 * @stable ICU 2.0 883 */ 884 inline UBool endsWith(const UChar *srcChars, 885 int32_t srcLength) const; 886 887 /** 888 * Determine if this ends with the characters in <TT>srcChars</TT> 889 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 890 * @param srcChars The characters to match. 891 * @param srcStart the offset into <TT>srcText</TT> to start matching 892 * @param srcLength the number of characters in <TT>srcChars</TT> to match 893 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 894 * FALSE otherwise 895 * @stable ICU 2.0 896 */ 897 inline UBool endsWith(const UChar *srcChars, 898 int32_t srcStart, 899 int32_t srcLength) const; 900 901 902 /* Searching - bitwise only */ 903 904 /** 905 * Locate in this the first occurrence of the characters in <TT>text</TT>, 906 * using bitwise comparison. 907 * @param text The text to search for. 908 * @return The offset into this of the start of <TT>text</TT>, 909 * or -1 if not found. 910 * @stable ICU 2.0 911 */ 912 inline int32_t indexOf(const UnicodeString& text) const; 913 914 /** 915 * Locate in this the first occurrence of the characters in <TT>text</TT> 916 * starting at offset <TT>start</TT>, using bitwise comparison. 917 * @param text The text to search for. 918 * @param start The offset at which searching will start. 919 * @return The offset into this of the start of <TT>text</TT>, 920 * or -1 if not found. 921 * @stable ICU 2.0 922 */ 923 inline int32_t indexOf(const UnicodeString& text, 924 int32_t start) const; 925 926 /** 927 * Locate in this the first occurrence in the range 928 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 929 * in <TT>text</TT>, using bitwise comparison. 930 * @param text The text to search for. 931 * @param start The offset at which searching will start. 932 * @param length The number of characters to search 933 * @return The offset into this of the start of <TT>text</TT>, 934 * or -1 if not found. 935 * @stable ICU 2.0 936 */ 937 inline int32_t indexOf(const UnicodeString& text, 938 int32_t start, 939 int32_t length) const; 940 941 /** 942 * Locate in this the first occurrence in the range 943 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 944 * in <TT>srcText</TT> in the range 945 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 946 * using bitwise comparison. 947 * @param srcText The text to search for. 948 * @param srcStart the offset into <TT>srcText</TT> at which 949 * to start matching 950 * @param srcLength the number of characters in <TT>srcText</TT> to match 951 * @param start the offset into this at which to start matching 952 * @param length the number of characters in this to search 953 * @return The offset into this of the start of <TT>text</TT>, 954 * or -1 if not found. 955 * @stable ICU 2.0 956 */ 957 inline int32_t indexOf(const UnicodeString& srcText, 958 int32_t srcStart, 959 int32_t srcLength, 960 int32_t start, 961 int32_t length) const; 962 963 /** 964 * Locate in this the first occurrence of the characters in 965 * <TT>srcChars</TT> 966 * starting at offset <TT>start</TT>, using bitwise comparison. 967 * @param srcChars The text to search for. 968 * @param srcLength the number of characters in <TT>srcChars</TT> to match 969 * @param start the offset into this at which to start matching 970 * @return The offset into this of the start of <TT>text</TT>, 971 * or -1 if not found. 972 * @stable ICU 2.0 973 */ 974 inline int32_t indexOf(const UChar *srcChars, 975 int32_t srcLength, 976 int32_t start) const; 977 978 /** 979 * Locate in this the first occurrence in the range 980 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 981 * in <TT>srcChars</TT>, using bitwise comparison. 982 * @param srcChars The text to search for. 983 * @param srcLength the number of characters in <TT>srcChars</TT> 984 * @param start The offset at which searching will start. 985 * @param length The number of characters to search 986 * @return The offset into this of the start of <TT>srcChars</TT>, 987 * or -1 if not found. 988 * @stable ICU 2.0 989 */ 990 inline int32_t indexOf(const UChar *srcChars, 991 int32_t srcLength, 992 int32_t start, 993 int32_t length) const; 994 995 /** 996 * Locate in this the first occurrence in the range 997 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 998 * in <TT>srcChars</TT> in the range 999 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1000 * using bitwise comparison. 1001 * @param srcChars The text to search for. 1002 * @param srcStart the offset into <TT>srcChars</TT> at which 1003 * to start matching 1004 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1005 * @param start the offset into this at which to start matching 1006 * @param length the number of characters in this to search 1007 * @return The offset into this of the start of <TT>text</TT>, 1008 * or -1 if not found. 1009 * @stable ICU 2.0 1010 */ 1011 int32_t indexOf(const UChar *srcChars, 1012 int32_t srcStart, 1013 int32_t srcLength, 1014 int32_t start, 1015 int32_t length) const; 1016 1017 /** 1018 * Locate in this the first occurrence of the BMP code point <code>c</code>, 1019 * using bitwise comparison. 1020 * @param c The code unit to search for. 1021 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1022 * @stable ICU 2.0 1023 */ 1024 inline int32_t indexOf(UChar c) const; 1025 1026 /** 1027 * Locate in this the first occurrence of the code point <TT>c</TT>, 1028 * using bitwise comparison. 1029 * 1030 * @param c The code point to search for. 1031 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1032 * @stable ICU 2.0 1033 */ 1034 inline int32_t indexOf(UChar32 c) const; 1035 1036 /** 1037 * Locate in this the first occurrence of the BMP code point <code>c</code>, 1038 * starting at offset <TT>start</TT>, using bitwise comparison. 1039 * @param c The code unit to search for. 1040 * @param start The offset at which searching will start. 1041 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1042 * @stable ICU 2.0 1043 */ 1044 inline int32_t indexOf(UChar c, 1045 int32_t start) const; 1046 1047 /** 1048 * Locate in this the first occurrence of the code point <TT>c</TT> 1049 * starting at offset <TT>start</TT>, using bitwise comparison. 1050 * 1051 * @param c The code point to search for. 1052 * @param start The offset at which searching will start. 1053 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1054 * @stable ICU 2.0 1055 */ 1056 inline int32_t indexOf(UChar32 c, 1057 int32_t start) const; 1058 1059 /** 1060 * Locate in this the first occurrence of the BMP code point <code>c</code> 1061 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1062 * using bitwise comparison. 1063 * @param c The code unit to search for. 1064 * @param start the offset into this at which to start matching 1065 * @param length the number of characters in this to search 1066 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1067 * @stable ICU 2.0 1068 */ 1069 inline int32_t indexOf(UChar c, 1070 int32_t start, 1071 int32_t length) const; 1072 1073 /** 1074 * Locate in this the first occurrence of the code point <TT>c</TT> 1075 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1076 * using bitwise comparison. 1077 * 1078 * @param c The code point to search for. 1079 * @param start the offset into this at which to start matching 1080 * @param length the number of characters in this to search 1081 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1082 * @stable ICU 2.0 1083 */ 1084 inline int32_t indexOf(UChar32 c, 1085 int32_t start, 1086 int32_t length) const; 1087 1088 /** 1089 * Locate in this the last occurrence of the characters in <TT>text</TT>, 1090 * using bitwise comparison. 1091 * @param text The text to search for. 1092 * @return The offset into this of the start of <TT>text</TT>, 1093 * or -1 if not found. 1094 * @stable ICU 2.0 1095 */ 1096 inline int32_t lastIndexOf(const UnicodeString& text) const; 1097 1098 /** 1099 * Locate in this the last occurrence of the characters in <TT>text</TT> 1100 * starting at offset <TT>start</TT>, using bitwise comparison. 1101 * @param text The text to search for. 1102 * @param start The offset at which searching will start. 1103 * @return The offset into this of the start of <TT>text</TT>, 1104 * or -1 if not found. 1105 * @stable ICU 2.0 1106 */ 1107 inline int32_t lastIndexOf(const UnicodeString& text, 1108 int32_t start) const; 1109 1110 /** 1111 * Locate in this the last occurrence in the range 1112 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1113 * in <TT>text</TT>, using bitwise comparison. 1114 * @param text The text to search for. 1115 * @param start The offset at which searching will start. 1116 * @param length The number of characters to search 1117 * @return The offset into this of the start of <TT>text</TT>, 1118 * or -1 if not found. 1119 * @stable ICU 2.0 1120 */ 1121 inline int32_t lastIndexOf(const UnicodeString& text, 1122 int32_t start, 1123 int32_t length) const; 1124 1125 /** 1126 * Locate in this the last occurrence in the range 1127 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1128 * in <TT>srcText</TT> in the range 1129 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1130 * using bitwise comparison. 1131 * @param srcText The text to search for. 1132 * @param srcStart the offset into <TT>srcText</TT> at which 1133 * to start matching 1134 * @param srcLength the number of characters in <TT>srcText</TT> to match 1135 * @param start the offset into this at which to start matching 1136 * @param length the number of characters in this to search 1137 * @return The offset into this of the start of <TT>text</TT>, 1138 * or -1 if not found. 1139 * @stable ICU 2.0 1140 */ 1141 inline int32_t lastIndexOf(const UnicodeString& srcText, 1142 int32_t srcStart, 1143 int32_t srcLength, 1144 int32_t start, 1145 int32_t length) const; 1146 1147 /** 1148 * Locate in this the last occurrence of the characters in <TT>srcChars</TT> 1149 * starting at offset <TT>start</TT>, using bitwise comparison. 1150 * @param srcChars The text to search for. 1151 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1152 * @param start the offset into this at which to start matching 1153 * @return The offset into this of the start of <TT>text</TT>, 1154 * or -1 if not found. 1155 * @stable ICU 2.0 1156 */ 1157 inline int32_t lastIndexOf(const UChar *srcChars, 1158 int32_t srcLength, 1159 int32_t start) const; 1160 1161 /** 1162 * Locate in this the last occurrence in the range 1163 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1164 * in <TT>srcChars</TT>, using bitwise comparison. 1165 * @param srcChars The text to search for. 1166 * @param srcLength the number of characters in <TT>srcChars</TT> 1167 * @param start The offset at which searching will start. 1168 * @param length The number of characters to search 1169 * @return The offset into this of the start of <TT>srcChars</TT>, 1170 * or -1 if not found. 1171 * @stable ICU 2.0 1172 */ 1173 inline int32_t lastIndexOf(const UChar *srcChars, 1174 int32_t srcLength, 1175 int32_t start, 1176 int32_t length) const; 1177 1178 /** 1179 * Locate in this the last occurrence in the range 1180 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1181 * in <TT>srcChars</TT> in the range 1182 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1183 * using bitwise comparison. 1184 * @param srcChars The text to search for. 1185 * @param srcStart the offset into <TT>srcChars</TT> at which 1186 * to start matching 1187 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1188 * @param start the offset into this at which to start matching 1189 * @param length the number of characters in this to search 1190 * @return The offset into this of the start of <TT>text</TT>, 1191 * or -1 if not found. 1192 * @stable ICU 2.0 1193 */ 1194 int32_t lastIndexOf(const UChar *srcChars, 1195 int32_t srcStart, 1196 int32_t srcLength, 1197 int32_t start, 1198 int32_t length) const; 1199 1200 /** 1201 * Locate in this the last occurrence of the BMP code point <code>c</code>, 1202 * using bitwise comparison. 1203 * @param c The code unit to search for. 1204 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1205 * @stable ICU 2.0 1206 */ 1207 inline int32_t lastIndexOf(UChar c) const; 1208 1209 /** 1210 * Locate in this the last occurrence of the code point <TT>c</TT>, 1211 * using bitwise comparison. 1212 * 1213 * @param c The code point to search for. 1214 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1215 * @stable ICU 2.0 1216 */ 1217 inline int32_t lastIndexOf(UChar32 c) const; 1218 1219 /** 1220 * Locate in this the last occurrence of the BMP code point <code>c</code> 1221 * starting at offset <TT>start</TT>, using bitwise comparison. 1222 * @param c The code unit to search for. 1223 * @param start The offset at which searching will start. 1224 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1225 * @stable ICU 2.0 1226 */ 1227 inline int32_t lastIndexOf(UChar c, 1228 int32_t start) const; 1229 1230 /** 1231 * Locate in this the last occurrence of the code point <TT>c</TT> 1232 * starting at offset <TT>start</TT>, using bitwise comparison. 1233 * 1234 * @param c The code point to search for. 1235 * @param start The offset at which searching will start. 1236 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1237 * @stable ICU 2.0 1238 */ 1239 inline int32_t lastIndexOf(UChar32 c, 1240 int32_t start) const; 1241 1242 /** 1243 * Locate in this the last occurrence of the BMP code point <code>c</code> 1244 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1245 * using bitwise comparison. 1246 * @param c The code unit to search for. 1247 * @param start the offset into this at which to start matching 1248 * @param length the number of characters in this to search 1249 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1250 * @stable ICU 2.0 1251 */ 1252 inline int32_t lastIndexOf(UChar c, 1253 int32_t start, 1254 int32_t length) const; 1255 1256 /** 1257 * Locate in this the last occurrence of the code point <TT>c</TT> 1258 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1259 * using bitwise comparison. 1260 * 1261 * @param c The code point to search for. 1262 * @param start the offset into this at which to start matching 1263 * @param length the number of characters in this to search 1264 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1265 * @stable ICU 2.0 1266 */ 1267 inline int32_t lastIndexOf(UChar32 c, 1268 int32_t start, 1269 int32_t length) const; 1270 1271 1272 /* Character access */ 1273 1274 /** 1275 * Return the code unit at offset <tt>offset</tt>. 1276 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1277 * @param offset a valid offset into the text 1278 * @return the code unit at offset <tt>offset</tt> 1279 * or 0xffff if the offset is not valid for this string 1280 * @stable ICU 2.0 1281 */ 1282 inline UChar charAt(int32_t offset) const; 1283 1284 /** 1285 * Return the code unit at offset <tt>offset</tt>. 1286 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1287 * @param offset a valid offset into the text 1288 * @return the code unit at offset <tt>offset</tt> 1289 * @stable ICU 2.0 1290 */ 1291 inline UChar operator[] (int32_t offset) const; 1292 1293 /** 1294 * Return the code point that contains the code unit 1295 * at offset <tt>offset</tt>. 1296 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1297 * @param offset a valid offset into the text 1298 * that indicates the text offset of any of the code units 1299 * that will be assembled into a code point (21-bit value) and returned 1300 * @return the code point of text at <tt>offset</tt> 1301 * or 0xffff if the offset is not valid for this string 1302 * @stable ICU 2.0 1303 */ 1304 UChar32 char32At(int32_t offset) const; 1305 1306 /** 1307 * Adjust a random-access offset so that 1308 * it points to the beginning of a Unicode character. 1309 * The offset that is passed in points to 1310 * any code unit of a code point, 1311 * while the returned offset will point to the first code unit 1312 * of the same code point. 1313 * In UTF-16, if the input offset points to a second surrogate 1314 * of a surrogate pair, then the returned offset will point 1315 * to the first surrogate. 1316 * @param offset a valid offset into one code point of the text 1317 * @return offset of the first code unit of the same code point 1318 * @see U16_SET_CP_START 1319 * @stable ICU 2.0 1320 */ 1321 int32_t getChar32Start(int32_t offset) const; 1322 1323 /** 1324 * Adjust a random-access offset so that 1325 * it points behind a Unicode character. 1326 * The offset that is passed in points behind 1327 * any code unit of a code point, 1328 * while the returned offset will point behind the last code unit 1329 * of the same code point. 1330 * In UTF-16, if the input offset points behind the first surrogate 1331 * (i.e., to the second surrogate) 1332 * of a surrogate pair, then the returned offset will point 1333 * behind the second surrogate (i.e., to the first surrogate). 1334 * @param offset a valid offset after any code unit of a code point of the text 1335 * @return offset of the first code unit after the same code point 1336 * @see U16_SET_CP_LIMIT 1337 * @stable ICU 2.0 1338 */ 1339 int32_t getChar32Limit(int32_t offset) const; 1340 1341 /** 1342 * Move the code unit index along the string by delta code points. 1343 * Interpret the input index as a code unit-based offset into the string, 1344 * move the index forward or backward by delta code points, and 1345 * return the resulting index. 1346 * The input index should point to the first code unit of a code point, 1347 * if there is more than one. 1348 * 1349 * Both input and output indexes are code unit-based as for all 1350 * string indexes/offsets in ICU (and other libraries, like MBCS char*). 1351 * If delta<0 then the index is moved backward (toward the start of the string). 1352 * If delta>0 then the index is moved forward (toward the end of the string). 1353 * 1354 * This behaves like CharacterIterator::move32(delta, kCurrent). 1355 * 1356 * Behavior for out-of-bounds indexes: 1357 * <code>moveIndex32</code> pins the input index to 0..length(), i.e., 1358 * if the input index<0 then it is pinned to 0; 1359 * if it is index>length() then it is pinned to length(). 1360 * Afterwards, the index is moved by <code>delta</code> code points 1361 * forward or backward, 1362 * but no further backward than to 0 and no further forward than to length(). 1363 * The resulting index return value will be in between 0 and length(), inclusively. 1364 * 1365 * Examples: 1366 * <pre> 1367 * // s has code points 'a' U+10000 'b' U+10ffff U+2029 1368 * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape(); 1369 * 1370 * // initial index: position of U+10000 1371 * int32_t index=1; 1372 * 1373 * // the following examples will all result in index==4, position of U+10ffff 1374 * 1375 * // skip 2 code points from some position in the string 1376 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b' 1377 * 1378 * // go to the 3rd code point from the start of s (0-based) 1379 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b' 1380 * 1381 * // go to the next-to-last code point of s 1382 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff 1383 * </pre> 1384 * 1385 * @param index input code unit index 1386 * @param delta (signed) code point count to move the index forward or backward 1387 * in the string 1388 * @return the resulting code unit index 1389 * @stable ICU 2.0 1390 */ 1391 int32_t moveIndex32(int32_t index, int32_t delta) const; 1392 1393 /* Substring extraction */ 1394 1395 /** 1396 * Copy the characters in the range 1397 * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>, 1398 * beginning at <tt>dstStart</tt>. 1399 * If the string aliases to <code>dst</code> itself as an external buffer, 1400 * then extract() will not copy the contents. 1401 * 1402 * @param start offset of first character which will be copied into the array 1403 * @param length the number of characters to extract 1404 * @param dst array in which to copy characters. The length of <tt>dst</tt> 1405 * must be at least (<tt>dstStart + length</tt>). 1406 * @param dstStart the offset in <TT>dst</TT> where the first character 1407 * will be extracted 1408 * @stable ICU 2.0 1409 */ 1410 inline void extract(int32_t start, 1411 int32_t length, 1412 UChar *dst, 1413 int32_t dstStart = 0) const; 1414 1415 /** 1416 * Copy the contents of the string into dest. 1417 * This is a convenience function that 1418 * checks if there is enough space in dest, 1419 * extracts the entire string if possible, 1420 * and NUL-terminates dest if possible. 1421 * 1422 * If the string fits into dest but cannot be NUL-terminated 1423 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. 1424 * If the string itself does not fit into dest 1425 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. 1426 * 1427 * If the string aliases to <code>dest</code> itself as an external buffer, 1428 * then extract() will not copy the contents. 1429 * 1430 * @param dest Destination string buffer. 1431 * @param destCapacity Number of UChars available at dest. 1432 * @param errorCode ICU error code. 1433 * @return length() 1434 * @stable ICU 2.0 1435 */ 1436 int32_t 1437 extract(UChar *dest, int32_t destCapacity, 1438 UErrorCode &errorCode) const; 1439 1440 /** 1441 * Copy the characters in the range 1442 * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString 1443 * <tt>target</tt>. 1444 * @param start offset of first character which will be copied 1445 * @param length the number of characters to extract 1446 * @param target UnicodeString into which to copy characters. 1447 * @return A reference to <TT>target</TT> 1448 * @stable ICU 2.0 1449 */ 1450 inline void extract(int32_t start, 1451 int32_t length, 1452 UnicodeString& target) const; 1453 1454 /** 1455 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 1456 * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>. 1457 * @param start offset of first character which will be copied into the array 1458 * @param limit offset immediately following the last character to be copied 1459 * @param dst array in which to copy characters. The length of <tt>dst</tt> 1460 * must be at least (<tt>dstStart + (limit - start)</tt>). 1461 * @param dstStart the offset in <TT>dst</TT> where the first character 1462 * will be extracted 1463 * @stable ICU 2.0 1464 */ 1465 inline void extractBetween(int32_t start, 1466 int32_t limit, 1467 UChar *dst, 1468 int32_t dstStart = 0) const; 1469 1470 /** 1471 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 1472 * into the UnicodeString <tt>target</tt>. Replaceable API. 1473 * @param start offset of first character which will be copied 1474 * @param limit offset immediately following the last character to be copied 1475 * @param target UnicodeString into which to copy characters. 1476 * @return A reference to <TT>target</TT> 1477 * @stable ICU 2.0 1478 */ 1479 virtual void extractBetween(int32_t start, 1480 int32_t limit, 1481 UnicodeString& target) const; 1482 1483 /** 1484 * Copy the characters in the range 1485 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters. 1486 * All characters must be invariant (see utypes.h). 1487 * Use US_INV as the last, signature-distinguishing parameter. 1488 * 1489 * This function does not write any more than <code>targetLength</code> 1490 * characters but returns the length of the entire output string 1491 * so that one can allocate a larger buffer and call the function again 1492 * if necessary. 1493 * The output string is NUL-terminated if possible. 1494 * 1495 * @param start offset of first character which will be copied 1496 * @param startLength the number of characters to extract 1497 * @param target the target buffer for extraction, can be NULL 1498 * if targetLength is 0 1499 * @param targetCapacity the length of the target buffer 1500 * @param inv Signature-distinguishing paramater, use US_INV. 1501 * @return the output string length, not including the terminating NUL 1502 * @stable ICU 3.2 1503 */ 1504 int32_t extract(int32_t start, 1505 int32_t startLength, 1506 char *target, 1507 int32_t targetCapacity, 1508 enum EInvariant inv) const; 1509 1510 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 1511 1512 /** 1513 * Copy the characters in the range 1514 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1515 * in the platform's default codepage. 1516 * This function does not write any more than <code>targetLength</code> 1517 * characters but returns the length of the entire output string 1518 * so that one can allocate a larger buffer and call the function again 1519 * if necessary. 1520 * The output string is NUL-terminated if possible. 1521 * 1522 * @param start offset of first character which will be copied 1523 * @param startLength the number of characters to extract 1524 * @param target the target buffer for extraction 1525 * @param targetLength the length of the target buffer 1526 * If <TT>target</TT> is NULL, then the number of bytes required for 1527 * <TT>target</TT> is returned. 1528 * @return the output string length, not including the terminating NUL 1529 * @stable ICU 2.0 1530 */ 1531 int32_t extract(int32_t start, 1532 int32_t startLength, 1533 char *target, 1534 uint32_t targetLength) const; 1535 1536 #endif 1537 1538 #if !UCONFIG_NO_CONVERSION 1539 1540 /** 1541 * Copy the characters in the range 1542 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1543 * in a specified codepage. 1544 * The output string is NUL-terminated. 1545 * 1546 * Recommendation: For invariant-character strings use 1547 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1548 * because it avoids object code dependencies of UnicodeString on 1549 * the conversion code. 1550 * 1551 * @param start offset of first character which will be copied 1552 * @param startLength the number of characters to extract 1553 * @param target the target buffer for extraction 1554 * @param codepage the desired codepage for the characters. 0 has 1555 * the special meaning of the default codepage 1556 * If <code>codepage</code> is an empty string (<code>""</code>), 1557 * then a simple conversion is performed on the codepage-invariant 1558 * subset ("invariant characters") of the platform encoding. See utypes.h. 1559 * If <TT>target</TT> is NULL, then the number of bytes required for 1560 * <TT>target</TT> is returned. It is assumed that the target is big enough 1561 * to fit all of the characters. 1562 * @return the output string length, not including the terminating NUL 1563 * @stable ICU 2.0 1564 */ 1565 inline int32_t extract(int32_t start, 1566 int32_t startLength, 1567 char *target, 1568 const char *codepage = 0) const; 1569 1570 /** 1571 * Copy the characters in the range 1572 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1573 * in a specified codepage. 1574 * This function does not write any more than <code>targetLength</code> 1575 * characters but returns the length of the entire output string 1576 * so that one can allocate a larger buffer and call the function again 1577 * if necessary. 1578 * The output string is NUL-terminated if possible. 1579 * 1580 * Recommendation: For invariant-character strings use 1581 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1582 * because it avoids object code dependencies of UnicodeString on 1583 * the conversion code. 1584 * 1585 * @param start offset of first character which will be copied 1586 * @param startLength the number of characters to extract 1587 * @param target the target buffer for extraction 1588 * @param targetLength the length of the target buffer 1589 * @param codepage the desired codepage for the characters. 0 has 1590 * the special meaning of the default codepage 1591 * If <code>codepage</code> is an empty string (<code>""</code>), 1592 * then a simple conversion is performed on the codepage-invariant 1593 * subset ("invariant characters") of the platform encoding. See utypes.h. 1594 * If <TT>target</TT> is NULL, then the number of bytes required for 1595 * <TT>target</TT> is returned. 1596 * @return the output string length, not including the terminating NUL 1597 * @stable ICU 2.0 1598 */ 1599 int32_t extract(int32_t start, 1600 int32_t startLength, 1601 char *target, 1602 uint32_t targetLength, 1603 const char *codepage) const; 1604 1605 /** 1606 * Convert the UnicodeString into a codepage string using an existing UConverter. 1607 * The output string is NUL-terminated if possible. 1608 * 1609 * This function avoids the overhead of opening and closing a converter if 1610 * multiple strings are extracted. 1611 * 1612 * @param dest destination string buffer, can be NULL if destCapacity==0 1613 * @param destCapacity the number of chars available at dest 1614 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), 1615 * or NULL for the default converter 1616 * @param errorCode normal ICU error code 1617 * @return the length of the output string, not counting the terminating NUL; 1618 * if the length is greater than destCapacity, then the string will not fit 1619 * and a buffer of the indicated length would need to be passed in 1620 * @stable ICU 2.0 1621 */ 1622 int32_t extract(char *dest, int32_t destCapacity, 1623 UConverter *cnv, 1624 UErrorCode &errorCode) const; 1625 1626 #endif 1627 1628 /** 1629 * Create a temporary substring for the specified range. 1630 * Unlike the substring constructor and setTo() functions, 1631 * the object returned here will be a read-only alias (using getBuffer()) 1632 * rather than copying the text. 1633 * As a result, this substring operation is much faster but requires 1634 * that the original string not be modified or deleted during the lifetime 1635 * of the returned substring object. 1636 * @param start offset of the first character visible in the substring 1637 * @param length length of the substring 1638 * @return a read-only alias UnicodeString object for the substring 1639 * @stable ICU 4.4 1640 */ 1641 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; 1642 1643 /** 1644 * Create a temporary substring for the specified range. 1645 * Same as tempSubString(start, length) except that the substring range 1646 * is specified as a (start, limit) pair (with an exclusive limit index) 1647 * rather than a (start, length) pair. 1648 * @param start offset of the first character visible in the substring 1649 * @param limit offset immediately following the last character visible in the substring 1650 * @return a read-only alias UnicodeString object for the substring 1651 * @stable ICU 4.4 1652 */ 1653 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; 1654 1655 /** 1656 * Convert the UnicodeString to UTF-8 and write the result 1657 * to a ByteSink. This is called by toUTF8String(). 1658 * Unpaired surrogates are replaced with U+FFFD. 1659 * Calls u_strToUTF8WithSub(). 1660 * 1661 * @param sink A ByteSink to which the UTF-8 version of the string is written. 1662 * sink.Flush() is called at the end. 1663 * @stable ICU 4.2 1664 * @see toUTF8String 1665 */ 1666 void toUTF8(ByteSink &sink) const; 1667 1668 #if U_HAVE_STD_STRING 1669 1670 /** 1671 * Convert the UnicodeString to UTF-8 and append the result 1672 * to a standard string. 1673 * Unpaired surrogates are replaced with U+FFFD. 1674 * Calls toUTF8(). 1675 * 1676 * @param result A standard string (or a compatible object) 1677 * to which the UTF-8 version of the string is appended. 1678 * @return The string object. 1679 * @stable ICU 4.2 1680 * @see toUTF8 1681 */ 1682 template<typename StringClass> 1683 StringClass &toUTF8String(StringClass &result) const { 1684 StringByteSink<StringClass> sbs(&result); 1685 toUTF8(sbs); 1686 return result; 1687 } 1688 1689 #endif 1690 1691 /** 1692 * Convert the UnicodeString to UTF-32. 1693 * Unpaired surrogates are replaced with U+FFFD. 1694 * Calls u_strToUTF32WithSub(). 1695 * 1696 * @param utf32 destination string buffer, can be NULL if capacity==0 1697 * @param capacity the number of UChar32s available at utf32 1698 * @param errorCode Standard ICU error code. Its input value must 1699 * pass the U_SUCCESS() test, or else the function returns 1700 * immediately. Check for U_FAILURE() on output or use with 1701 * function chaining. (See User Guide for details.) 1702 * @return The length of the UTF-32 string. 1703 * @see fromUTF32 1704 * @stable ICU 4.2 1705 */ 1706 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; 1707 1708 /* Length operations */ 1709 1710 /** 1711 * Return the length of the UnicodeString object. 1712 * The length is the number of UChar code units are in the UnicodeString. 1713 * If you want the number of code points, please use countChar32(). 1714 * @return the length of the UnicodeString object 1715 * @see countChar32 1716 * @stable ICU 2.0 1717 */ 1718 inline int32_t length(void) const; 1719 1720 /** 1721 * Count Unicode code points in the length UChar code units of the string. 1722 * A code point may occupy either one or two UChar code units. 1723 * Counting code points involves reading all code units. 1724 * 1725 * This functions is basically the inverse of moveIndex32(). 1726 * 1727 * @param start the index of the first code unit to check 1728 * @param length the number of UChar code units to check 1729 * @return the number of code points in the specified code units 1730 * @see length 1731 * @stable ICU 2.0 1732 */ 1733 int32_t 1734 countChar32(int32_t start=0, int32_t length=INT32_MAX) const; 1735 1736 /** 1737 * Check if the length UChar code units of the string 1738 * contain more Unicode code points than a certain number. 1739 * This is more efficient than counting all code points in this part of the string 1740 * and comparing that number with a threshold. 1741 * This function may not need to scan the string at all if the length 1742 * falls within a certain range, and 1743 * never needs to count more than 'number+1' code points. 1744 * Logically equivalent to (countChar32(start, length)>number). 1745 * A Unicode code point may occupy either one or two UChar code units. 1746 * 1747 * @param start the index of the first code unit to check (0 for the entire string) 1748 * @param length the number of UChar code units to check 1749 * (use INT32_MAX for the entire string; remember that start/length 1750 * values are pinned) 1751 * @param number The number of code points in the (sub)string is compared against 1752 * the 'number' parameter. 1753 * @return Boolean value for whether the string contains more Unicode code points 1754 * than 'number'. Same as (u_countChar32(s, length)>number). 1755 * @see countChar32 1756 * @see u_strHasMoreChar32Than 1757 * @stable ICU 2.4 1758 */ 1759 UBool 1760 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; 1761 1762 /** 1763 * Determine if this string is empty. 1764 * @return TRUE if this string contains 0 characters, FALSE otherwise. 1765 * @stable ICU 2.0 1766 */ 1767 inline UBool isEmpty(void) const; 1768 1769 /** 1770 * Return the capacity of the internal buffer of the UnicodeString object. 1771 * This is useful together with the getBuffer functions. 1772 * See there for details. 1773 * 1774 * @return the number of UChars available in the internal buffer 1775 * @see getBuffer 1776 * @stable ICU 2.0 1777 */ 1778 inline int32_t getCapacity(void) const; 1779 1780 /* Other operations */ 1781 1782 /** 1783 * Generate a hash code for this object. 1784 * @return The hash code of this UnicodeString. 1785 * @stable ICU 2.0 1786 */ 1787 inline int32_t hashCode(void) const; 1788 1789 /** 1790 * Determine if this object contains a valid string. 1791 * A bogus string has no value. It is different from an empty string, 1792 * although in both cases isEmpty() returns TRUE and length() returns 0. 1793 * setToBogus() and isBogus() can be used to indicate that no string value is available. 1794 * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and 1795 * length() returns 0. 1796 * 1797 * @return TRUE if the string is bogus/invalid, FALSE otherwise 1798 * @see setToBogus() 1799 * @stable ICU 2.0 1800 */ 1801 inline UBool isBogus(void) const; 1802 1803 1804 //======================================== 1805 // Write operations 1806 //======================================== 1807 1808 /* Assignment operations */ 1809 1810 /** 1811 * Assignment operator. Replace the characters in this UnicodeString 1812 * with the characters from <TT>srcText</TT>. 1813 * @param srcText The text containing the characters to replace 1814 * @return a reference to this 1815 * @stable ICU 2.0 1816 */ 1817 UnicodeString &operator=(const UnicodeString &srcText); 1818 1819 /** 1820 * Almost the same as the assignment operator. 1821 * Replace the characters in this UnicodeString 1822 * with the characters from <code>srcText</code>. 1823 * 1824 * This function works the same as the assignment operator 1825 * for all strings except for ones that are readonly aliases. 1826 * 1827 * Starting with ICU 2.4, the assignment operator and the copy constructor 1828 * allocate a new buffer and copy the buffer contents even for readonly aliases. 1829 * This function implements the old, more efficient but less safe behavior 1830 * of making this string also a readonly alias to the same buffer. 1831 * 1832 * The fastCopyFrom function must be used only if it is known that the lifetime of 1833 * this UnicodeString does not exceed the lifetime of the aliased buffer 1834 * including its contents, for example for strings from resource bundles 1835 * or aliases to string constants. 1836 * 1837 * @param src The text containing the characters to replace. 1838 * @return a reference to this 1839 * @stable ICU 2.4 1840 */ 1841 UnicodeString &fastCopyFrom(const UnicodeString &src); 1842 1843 /** 1844 * Assignment operator. Replace the characters in this UnicodeString 1845 * with the code unit <TT>ch</TT>. 1846 * @param ch the code unit to replace 1847 * @return a reference to this 1848 * @stable ICU 2.0 1849 */ 1850 inline UnicodeString& operator= (UChar ch); 1851 1852 /** 1853 * Assignment operator. Replace the characters in this UnicodeString 1854 * with the code point <TT>ch</TT>. 1855 * @param ch the code point to replace 1856 * @return a reference to this 1857 * @stable ICU 2.0 1858 */ 1859 inline UnicodeString& operator= (UChar32 ch); 1860 1861 /** 1862 * Set the text in the UnicodeString object to the characters 1863 * in <TT>srcText</TT> in the range 1864 * [<TT>srcStart</TT>, <TT>srcText.length()</TT>). 1865 * <TT>srcText</TT> is not modified. 1866 * @param srcText the source for the new characters 1867 * @param srcStart the offset into <TT>srcText</TT> where new characters 1868 * will be obtained 1869 * @return a reference to this 1870 * @stable ICU 2.2 1871 */ 1872 inline UnicodeString& setTo(const UnicodeString& srcText, 1873 int32_t srcStart); 1874 1875 /** 1876 * Set the text in the UnicodeString object to the characters 1877 * in <TT>srcText</TT> in the range 1878 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 1879 * <TT>srcText</TT> is not modified. 1880 * @param srcText the source for the new characters 1881 * @param srcStart the offset into <TT>srcText</TT> where new characters 1882 * will be obtained 1883 * @param srcLength the number of characters in <TT>srcText</TT> in the 1884 * replace string. 1885 * @return a reference to this 1886 * @stable ICU 2.0 1887 */ 1888 inline UnicodeString& setTo(const UnicodeString& srcText, 1889 int32_t srcStart, 1890 int32_t srcLength); 1891 1892 /** 1893 * Set the text in the UnicodeString object to the characters in 1894 * <TT>srcText</TT>. 1895 * <TT>srcText</TT> is not modified. 1896 * @param srcText the source for the new characters 1897 * @return a reference to this 1898 * @stable ICU 2.0 1899 */ 1900 inline UnicodeString& setTo(const UnicodeString& srcText); 1901 1902 /** 1903 * Set the characters in the UnicodeString object to the characters 1904 * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. 1905 * @param srcChars the source for the new characters 1906 * @param srcLength the number of Unicode characters in srcChars. 1907 * @return a reference to this 1908 * @stable ICU 2.0 1909 */ 1910 inline UnicodeString& setTo(const UChar *srcChars, 1911 int32_t srcLength); 1912 1913 /** 1914 * Set the characters in the UnicodeString object to the code unit 1915 * <TT>srcChar</TT>. 1916 * @param srcChar the code unit which becomes the UnicodeString's character 1917 * content 1918 * @return a reference to this 1919 * @stable ICU 2.0 1920 */ 1921 UnicodeString& setTo(UChar srcChar); 1922 1923 /** 1924 * Set the characters in the UnicodeString object to the code point 1925 * <TT>srcChar</TT>. 1926 * @param srcChar the code point which becomes the UnicodeString's character 1927 * content 1928 * @return a reference to this 1929 * @stable ICU 2.0 1930 */ 1931 UnicodeString& setTo(UChar32 srcChar); 1932 1933 /** 1934 * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. 1935 * The text will be used for the UnicodeString object, but 1936 * it will not be released when the UnicodeString is destroyed. 1937 * This has copy-on-write semantics: 1938 * When the string is modified, then the buffer is first copied into 1939 * newly allocated memory. 1940 * The aliased buffer is never modified. 1941 * 1942 * In an assignment to another UnicodeString, when using the copy constructor 1943 * or the assignment operator, the text will be copied. 1944 * When using fastCopyFrom(), the text will be aliased again, 1945 * so that both strings then alias the same readonly-text. 1946 * 1947 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. 1948 * This must be true if <code>textLength==-1</code>. 1949 * @param text The characters to alias for the UnicodeString. 1950 * @param textLength The number of Unicode characters in <code>text</code> to alias. 1951 * If -1, then this constructor will determine the length 1952 * by calling <code>u_strlen()</code>. 1953 * @return a reference to this 1954 * @stable ICU 2.0 1955 */ 1956 UnicodeString &setTo(UBool isTerminated, 1957 const UChar *text, 1958 int32_t textLength); 1959 1960 /** 1961 * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor. 1962 * The text will be used for the UnicodeString object, but 1963 * it will not be released when the UnicodeString is destroyed. 1964 * This has write-through semantics: 1965 * For as long as the capacity of the buffer is sufficient, write operations 1966 * will directly affect the buffer. When more capacity is necessary, then 1967 * a new buffer will be allocated and the contents copied as with regularly 1968 * constructed strings. 1969 * In an assignment to another UnicodeString, the buffer will be copied. 1970 * The extract(UChar *dst) function detects whether the dst pointer is the same 1971 * as the string buffer itself and will in this case not copy the contents. 1972 * 1973 * @param buffer The characters to alias for the UnicodeString. 1974 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. 1975 * @param buffCapacity The size of <code>buffer</code> in UChars. 1976 * @return a reference to this 1977 * @stable ICU 2.0 1978 */ 1979 UnicodeString &setTo(UChar *buffer, 1980 int32_t buffLength, 1981 int32_t buffCapacity); 1982 1983 /** 1984 * Make this UnicodeString object invalid. 1985 * The string will test TRUE with isBogus(). 1986 * 1987 * A bogus string has no value. It is different from an empty string. 1988 * It can be used to indicate that no string value is available. 1989 * getBuffer() and getTerminatedBuffer() return NULL, and 1990 * length() returns 0. 1991 * 1992 * This utility function is used throughout the UnicodeString 1993 * implementation to indicate that a UnicodeString operation failed, 1994 * and may be used in other functions, 1995 * especially but not exclusively when such functions do not 1996 * take a UErrorCode for simplicity. 1997 * 1998 * The following methods, and no others, will clear a string object's bogus flag: 1999 * - remove() 2000 * - remove(0, INT32_MAX) 2001 * - truncate(0) 2002 * - operator=() (assignment operator) 2003 * - setTo(...) 2004 * 2005 * The simplest ways to turn a bogus string into an empty one 2006 * is to use the remove() function. 2007 * Examples for other functions that are equivalent to "set to empty string": 2008 * \code 2009 * if(s.isBogus()) { 2010 * s.remove(); // set to an empty string (remove all), or 2011 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or 2012 * s.truncate(0); // set to an empty string (complete truncation), or 2013 * s=UnicodeString(); // assign an empty string, or 2014 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or 2015 * static const UChar nul=0; 2016 * s.setTo(&nul, 0); // set to an empty C Unicode string 2017 * } 2018 * \endcode 2019 * 2020 * @see isBogus() 2021 * @stable ICU 2.0 2022 */ 2023 void setToBogus(); 2024 2025 /** 2026 * Set the character at the specified offset to the specified character. 2027 * @param offset A valid offset into the text of the character to set 2028 * @param ch The new character 2029 * @return A reference to this 2030 * @stable ICU 2.0 2031 */ 2032 UnicodeString& setCharAt(int32_t offset, 2033 UChar ch); 2034 2035 2036 /* Append operations */ 2037 2038 /** 2039 * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString 2040 * object. 2041 * @param ch the code unit to be appended 2042 * @return a reference to this 2043 * @stable ICU 2.0 2044 */ 2045 inline UnicodeString& operator+= (UChar ch); 2046 2047 /** 2048 * Append operator. Append the code point <TT>ch</TT> to the UnicodeString 2049 * object. 2050 * @param ch the code point to be appended 2051 * @return a reference to this 2052 * @stable ICU 2.0 2053 */ 2054 inline UnicodeString& operator+= (UChar32 ch); 2055 2056 /** 2057 * Append operator. Append the characters in <TT>srcText</TT> to the 2058 * UnicodeString object. <TT>srcText</TT> is not modified. 2059 * @param srcText the source for the new characters 2060 * @return a reference to this 2061 * @stable ICU 2.0 2062 */ 2063 inline UnicodeString& operator+= (const UnicodeString& srcText); 2064 2065 /** 2066 * Append the characters 2067 * in <TT>srcText</TT> in the range 2068 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the 2069 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> 2070 * is not modified. 2071 * @param srcText the source for the new characters 2072 * @param srcStart the offset into <TT>srcText</TT> where new characters 2073 * will be obtained 2074 * @param srcLength the number of characters in <TT>srcText</TT> in 2075 * the append string 2076 * @return a reference to this 2077 * @stable ICU 2.0 2078 */ 2079 inline UnicodeString& append(const UnicodeString& srcText, 2080 int32_t srcStart, 2081 int32_t srcLength); 2082 2083 /** 2084 * Append the characters in <TT>srcText</TT> to the UnicodeString object. 2085 * <TT>srcText</TT> is not modified. 2086 * @param srcText the source for the new characters 2087 * @return a reference to this 2088 * @stable ICU 2.0 2089 */ 2090 inline UnicodeString& append(const UnicodeString& srcText); 2091 2092 /** 2093 * Append the characters in <TT>srcChars</TT> in the range 2094 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString 2095 * object at offset 2096 * <TT>start</TT>. <TT>srcChars</TT> is not modified. 2097 * @param srcChars the source for the new characters 2098 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2099 * will be obtained 2100 * @param srcLength the number of characters in <TT>srcChars</TT> in 2101 * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated 2102 * @return a reference to this 2103 * @stable ICU 2.0 2104 */ 2105 inline UnicodeString& append(const UChar *srcChars, 2106 int32_t srcStart, 2107 int32_t srcLength); 2108 2109 /** 2110 * Append the characters in <TT>srcChars</TT> to the UnicodeString object 2111 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2112 * @param srcChars the source for the new characters 2113 * @param srcLength the number of Unicode characters in <TT>srcChars</TT>; 2114 * can be -1 if <TT>srcChars</TT> is NUL-terminated 2115 * @return a reference to this 2116 * @stable ICU 2.0 2117 */ 2118 inline UnicodeString& append(const UChar *srcChars, 2119 int32_t srcLength); 2120 2121 /** 2122 * Append the code unit <TT>srcChar</TT> to the UnicodeString object. 2123 * @param srcChar the code unit to append 2124 * @return a reference to this 2125 * @stable ICU 2.0 2126 */ 2127 inline UnicodeString& append(UChar srcChar); 2128 2129 /** 2130 * Append the code point <TT>srcChar</TT> to the UnicodeString object. 2131 * @param srcChar the code point to append 2132 * @return a reference to this 2133 * @stable ICU 2.0 2134 */ 2135 UnicodeString& append(UChar32 srcChar); 2136 2137 2138 /* Insert operations */ 2139 2140 /** 2141 * Insert the characters in <TT>srcText</TT> in the range 2142 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 2143 * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified. 2144 * @param start the offset where the insertion begins 2145 * @param srcText the source for the new characters 2146 * @param srcStart the offset into <TT>srcText</TT> where new characters 2147 * will be obtained 2148 * @param srcLength the number of characters in <TT>srcText</TT> in 2149 * the insert string 2150 * @return a reference to this 2151 * @stable ICU 2.0 2152 */ 2153 inline UnicodeString& insert(int32_t start, 2154 const UnicodeString& srcText, 2155 int32_t srcStart, 2156 int32_t srcLength); 2157 2158 /** 2159 * Insert the characters in <TT>srcText</TT> into the UnicodeString object 2160 * at offset <TT>start</TT>. <TT>srcText</TT> is not modified. 2161 * @param start the offset where the insertion begins 2162 * @param srcText the source for the new characters 2163 * @return a reference to this 2164 * @stable ICU 2.0 2165 */ 2166 inline UnicodeString& insert(int32_t start, 2167 const UnicodeString& srcText); 2168 2169 /** 2170 * Insert the characters in <TT>srcChars</TT> in the range 2171 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 2172 * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2173 * @param start the offset at which the insertion begins 2174 * @param srcChars the source for the new characters 2175 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2176 * will be obtained 2177 * @param srcLength the number of characters in <TT>srcChars</TT> 2178 * in the insert string 2179 * @return a reference to this 2180 * @stable ICU 2.0 2181 */ 2182 inline UnicodeString& insert(int32_t start, 2183 const UChar *srcChars, 2184 int32_t srcStart, 2185 int32_t srcLength); 2186 2187 /** 2188 * Insert the characters in <TT>srcChars</TT> into the UnicodeString object 2189 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2190 * @param start the offset where the insertion begins 2191 * @param srcChars the source for the new characters 2192 * @param srcLength the number of Unicode characters in srcChars. 2193 * @return a reference to this 2194 * @stable ICU 2.0 2195 */ 2196 inline UnicodeString& insert(int32_t start, 2197 const UChar *srcChars, 2198 int32_t srcLength); 2199 2200 /** 2201 * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at 2202 * offset <TT>start</TT>. 2203 * @param start the offset at which the insertion occurs 2204 * @param srcChar the code unit to insert 2205 * @return a reference to this 2206 * @stable ICU 2.0 2207 */ 2208 inline UnicodeString& insert(int32_t start, 2209 UChar srcChar); 2210 2211 /** 2212 * Insert the code point <TT>srcChar</TT> into the UnicodeString object at 2213 * offset <TT>start</TT>. 2214 * @param start the offset at which the insertion occurs 2215 * @param srcChar the code point to insert 2216 * @return a reference to this 2217 * @stable ICU 2.0 2218 */ 2219 inline UnicodeString& insert(int32_t start, 2220 UChar32 srcChar); 2221 2222 2223 /* Replace operations */ 2224 2225 /** 2226 * Replace the characters in the range 2227 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2228 * <TT>srcText</TT> in the range 2229 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 2230 * <TT>srcText</TT> is not modified. 2231 * @param start the offset at which the replace operation begins 2232 * @param length the number of characters to replace. The character at 2233 * <TT>start + length</TT> is not modified. 2234 * @param srcText the source for the new characters 2235 * @param srcStart the offset into <TT>srcText</TT> where new characters 2236 * will be obtained 2237 * @param srcLength the number of characters in <TT>srcText</TT> in 2238 * the replace string 2239 * @return a reference to this 2240 * @stable ICU 2.0 2241 */ 2242 UnicodeString& replace(int32_t start, 2243 int32_t length, 2244 const UnicodeString& srcText, 2245 int32_t srcStart, 2246 int32_t srcLength); 2247 2248 /** 2249 * Replace the characters in the range 2250 * [<TT>start</TT>, <TT>start + length</TT>) 2251 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is 2252 * not modified. 2253 * @param start the offset at which the replace operation begins 2254 * @param length the number of characters to replace. The character at 2255 * <TT>start + length</TT> is not modified. 2256 * @param srcText the source for the new characters 2257 * @return a reference to this 2258 * @stable ICU 2.0 2259 */ 2260 UnicodeString& replace(int32_t start, 2261 int32_t length, 2262 const UnicodeString& srcText); 2263 2264 /** 2265 * Replace the characters in the range 2266 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2267 * <TT>srcChars</TT> in the range 2268 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT> 2269 * is not modified. 2270 * @param start the offset at which the replace operation begins 2271 * @param length the number of characters to replace. The character at 2272 * <TT>start + length</TT> is not modified. 2273 * @param srcChars the source for the new characters 2274 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2275 * will be obtained 2276 * @param srcLength the number of characters in <TT>srcChars</TT> 2277 * in the replace string 2278 * @return a reference to this 2279 * @stable ICU 2.0 2280 */ 2281 UnicodeString& replace(int32_t start, 2282 int32_t length, 2283 const UChar *srcChars, 2284 int32_t srcStart, 2285 int32_t srcLength); 2286 2287 /** 2288 * Replace the characters in the range 2289 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2290 * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. 2291 * @param start the offset at which the replace operation begins 2292 * @param length number of characters to replace. The character at 2293 * <TT>start + length</TT> is not modified. 2294 * @param srcChars the source for the new characters 2295 * @param srcLength the number of Unicode characters in srcChars 2296 * @return a reference to this 2297 * @stable ICU 2.0 2298 */ 2299 inline UnicodeString& replace(int32_t start, 2300 int32_t length, 2301 const UChar *srcChars, 2302 int32_t srcLength); 2303 2304 /** 2305 * Replace the characters in the range 2306 * [<TT>start</TT>, <TT>start + length</TT>) with the code unit 2307 * <TT>srcChar</TT>. 2308 * @param start the offset at which the replace operation begins 2309 * @param length the number of characters to replace. The character at 2310 * <TT>start + length</TT> is not modified. 2311 * @param srcChar the new code unit 2312 * @return a reference to this 2313 * @stable ICU 2.0 2314 */ 2315 inline UnicodeString& replace(int32_t start, 2316 int32_t length, 2317 UChar srcChar); 2318 2319 /** 2320 * Replace the characters in the range 2321 * [<TT>start</TT>, <TT>start + length</TT>) with the code point 2322 * <TT>srcChar</TT>. 2323 * @param start the offset at which the replace operation begins 2324 * @param length the number of characters to replace. The character at 2325 * <TT>start + length</TT> is not modified. 2326 * @param srcChar the new code point 2327 * @return a reference to this 2328 * @stable ICU 2.0 2329 */ 2330 UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); 2331 2332 /** 2333 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 2334 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified. 2335 * @param start the offset at which the replace operation begins 2336 * @param limit the offset immediately following the replace range 2337 * @param srcText the source for the new characters 2338 * @return a reference to this 2339 * @stable ICU 2.0 2340 */ 2341 inline UnicodeString& replaceBetween(int32_t start, 2342 int32_t limit, 2343 const UnicodeString& srcText); 2344 2345 /** 2346 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 2347 * with the characters in <TT>srcText</TT> in the range 2348 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified. 2349 * @param start the offset at which the replace operation begins 2350 * @param limit the offset immediately following the replace range 2351 * @param srcText the source for the new characters 2352 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2353 * will be obtained 2354 * @param srcLimit the offset immediately following the range to copy 2355 * in <TT>srcText</TT> 2356 * @return a reference to this 2357 * @stable ICU 2.0 2358 */ 2359 inline UnicodeString& replaceBetween(int32_t start, 2360 int32_t limit, 2361 const UnicodeString& srcText, 2362 int32_t srcStart, 2363 int32_t srcLimit); 2364 2365 /** 2366 * Replace a substring of this object with the given text. 2367 * @param start the beginning index, inclusive; <code>0 <= start 2368 * <= limit</code>. 2369 * @param limit the ending index, exclusive; <code>start <= limit 2370 * <= length()</code>. 2371 * @param text the text to replace characters <code>start</code> 2372 * to <code>limit - 1</code> 2373 * @stable ICU 2.0 2374 */ 2375 virtual void handleReplaceBetween(int32_t start, 2376 int32_t limit, 2377 const UnicodeString& text); 2378 2379 /** 2380 * Replaceable API 2381 * @return TRUE if it has MetaData 2382 * @stable ICU 2.4 2383 */ 2384 virtual UBool hasMetaData() const; 2385 2386 /** 2387 * Copy a substring of this object, retaining attribute (out-of-band) 2388 * information. This method is used to duplicate or reorder substrings. 2389 * The destination index must not overlap the source range. 2390 * 2391 * @param start the beginning index, inclusive; <code>0 <= start <= 2392 * limit</code>. 2393 * @param limit the ending index, exclusive; <code>start <= limit <= 2394 * length()</code>. 2395 * @param dest the destination index. The characters from 2396 * <code>start..limit-1</code> will be copied to <code>dest</code>. 2397 * Implementations of this method may assume that <code>dest <= start || 2398 * dest >= limit</code>. 2399 * @stable ICU 2.0 2400 */ 2401 virtual void copy(int32_t start, int32_t limit, int32_t dest); 2402 2403 /* Search and replace operations */ 2404 2405 /** 2406 * Replace all occurrences of characters in oldText with the characters 2407 * in newText 2408 * @param oldText the text containing the search text 2409 * @param newText the text containing the replacement text 2410 * @return a reference to this 2411 * @stable ICU 2.0 2412 */ 2413 inline UnicodeString& findAndReplace(const UnicodeString& oldText, 2414 const UnicodeString& newText); 2415 2416 /** 2417 * Replace all occurrences of characters in oldText with characters 2418 * in newText 2419 * in the range [<TT>start</TT>, <TT>start + length</TT>). 2420 * @param start the start of the range in which replace will performed 2421 * @param length the length of the range in which replace will be performed 2422 * @param oldText the text containing the search text 2423 * @param newText the text containing the replacement text 2424 * @return a reference to this 2425 * @stable ICU 2.0 2426 */ 2427 inline UnicodeString& findAndReplace(int32_t start, 2428 int32_t length, 2429 const UnicodeString& oldText, 2430 const UnicodeString& newText); 2431 2432 /** 2433 * Replace all occurrences of characters in oldText in the range 2434 * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters 2435 * in newText in the range 2436 * [<TT>newStart</TT>, <TT>newStart + newLength</TT>) 2437 * in the range [<TT>start</TT>, <TT>start + length</TT>). 2438 * @param start the start of the range in which replace will performed 2439 * @param length the length of the range in which replace will be performed 2440 * @param oldText the text containing the search text 2441 * @param oldStart the start of the search range in <TT>oldText</TT> 2442 * @param oldLength the length of the search range in <TT>oldText</TT> 2443 * @param newText the text containing the replacement text 2444 * @param newStart the start of the replacement range in <TT>newText</TT> 2445 * @param newLength the length of the replacement range in <TT>newText</TT> 2446 * @return a reference to this 2447 * @stable ICU 2.0 2448 */ 2449 UnicodeString& findAndReplace(int32_t start, 2450 int32_t length, 2451 const UnicodeString& oldText, 2452 int32_t oldStart, 2453 int32_t oldLength, 2454 const UnicodeString& newText, 2455 int32_t newStart, 2456 int32_t newLength); 2457 2458 2459 /* Remove operations */ 2460 2461 /** 2462 * Remove all characters from the UnicodeString object. 2463 * @return a reference to this 2464 * @stable ICU 2.0 2465 */ 2466 inline UnicodeString& remove(void); 2467 2468 /** 2469 * Remove the characters in the range 2470 * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object. 2471 * @param start the offset of the first character to remove 2472 * @param length the number of characters to remove 2473 * @return a reference to this 2474 * @stable ICU 2.0 2475 */ 2476 inline UnicodeString& remove(int32_t start, 2477 int32_t length = (int32_t)INT32_MAX); 2478 2479 /** 2480 * Remove the characters in the range 2481 * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object. 2482 * @param start the offset of the first character to remove 2483 * @param limit the offset immediately following the range to remove 2484 * @return a reference to this 2485 * @stable ICU 2.0 2486 */ 2487 inline UnicodeString& removeBetween(int32_t start, 2488 int32_t limit = (int32_t)INT32_MAX); 2489 2490 /** 2491 * Retain only the characters in the range 2492 * [<code>start</code>, <code>limit</code>) from the UnicodeString object. 2493 * Removes characters before <code>start</code> and at and after <code>limit</code>. 2494 * @param start the offset of the first character to retain 2495 * @param limit the offset immediately following the range to retain 2496 * @return a reference to this 2497 * @stable ICU 4.4 2498 */ 2499 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); 2500 2501 /* Length operations */ 2502 2503 /** 2504 * Pad the start of this UnicodeString with the character <TT>padChar</TT>. 2505 * If the length of this UnicodeString is less than targetLength, 2506 * length() - targetLength copies of padChar will be added to the 2507 * beginning of this UnicodeString. 2508 * @param targetLength the desired length of the string 2509 * @param padChar the character to use for padding. Defaults to 2510 * space (U+0020) 2511 * @return TRUE if the text was padded, FALSE otherwise. 2512 * @stable ICU 2.0 2513 */ 2514 UBool padLeading(int32_t targetLength, 2515 UChar padChar = 0x0020); 2516 2517 /** 2518 * Pad the end of this UnicodeString with the character <TT>padChar</TT>. 2519 * If the length of this UnicodeString is less than targetLength, 2520 * length() - targetLength copies of padChar will be added to the 2521 * end of this UnicodeString. 2522 * @param targetLength the desired length of the string 2523 * @param padChar the character to use for padding. Defaults to 2524 * space (U+0020) 2525 * @return TRUE if the text was padded, FALSE otherwise. 2526 * @stable ICU 2.0 2527 */ 2528 UBool padTrailing(int32_t targetLength, 2529 UChar padChar = 0x0020); 2530 2531 /** 2532 * Truncate this UnicodeString to the <TT>targetLength</TT>. 2533 * @param targetLength the desired length of this UnicodeString. 2534 * @return TRUE if the text was truncated, FALSE otherwise 2535 * @stable ICU 2.0 2536 */ 2537 inline UBool truncate(int32_t targetLength); 2538 2539 /** 2540 * Trims leading and trailing whitespace from this UnicodeString. 2541 * @return a reference to this 2542 * @stable ICU 2.0 2543 */ 2544 UnicodeString& trim(void); 2545 2546 2547 /* Miscellaneous operations */ 2548 2549 /** 2550 * Reverse this UnicodeString in place. 2551 * @return a reference to this 2552 * @stable ICU 2.0 2553 */ 2554 inline UnicodeString& reverse(void); 2555 2556 /** 2557 * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in 2558 * this UnicodeString. 2559 * @param start the start of the range to reverse 2560 * @param length the number of characters to to reverse 2561 * @return a reference to this 2562 * @stable ICU 2.0 2563 */ 2564 inline UnicodeString& reverse(int32_t start, 2565 int32_t length); 2566 2567 /** 2568 * Convert the characters in this to UPPER CASE following the conventions of 2569 * the default locale. 2570 * @return A reference to this. 2571 * @stable ICU 2.0 2572 */ 2573 UnicodeString& toUpper(void); 2574 2575 /** 2576 * Convert the characters in this to UPPER CASE following the conventions of 2577 * a specific locale. 2578 * @param locale The locale containing the conventions to use. 2579 * @return A reference to this. 2580 * @stable ICU 2.0 2581 */ 2582 UnicodeString& toUpper(const Locale& locale); 2583 2584 /** 2585 * Convert the characters in this to lower case following the conventions of 2586 * the default locale. 2587 * @return A reference to this. 2588 * @stable ICU 2.0 2589 */ 2590 UnicodeString& toLower(void); 2591 2592 /** 2593 * Convert the characters in this to lower case following the conventions of 2594 * a specific locale. 2595 * @param locale The locale containing the conventions to use. 2596 * @return A reference to this. 2597 * @stable ICU 2.0 2598 */ 2599 UnicodeString& toLower(const Locale& locale); 2600 2601 #if !UCONFIG_NO_BREAK_ITERATION 2602 2603 /** 2604 * Titlecase this string, convenience function using the default locale. 2605 * 2606 * Casing is locale-dependent and context-sensitive. 2607 * Titlecasing uses a break iterator to find the first characters of words 2608 * that are to be titlecased. It titlecases those characters and lowercases 2609 * all others. 2610 * 2611 * The titlecase break iterator can be provided to customize for arbitrary 2612 * styles, using rules and dictionaries beyond the standard iterators. 2613 * It may be more efficient to always provide an iterator to avoid 2614 * opening and closing one for each string. 2615 * The standard titlecase iterator for the root locale implements the 2616 * algorithm of Unicode TR 21. 2617 * 2618 * This function uses only the setText(), first() and next() methods of the 2619 * provided break iterator. 2620 * 2621 * @param titleIter A break iterator to find the first characters of words 2622 * that are to be titlecased. 2623 * If none is provided (0), then a standard titlecase 2624 * break iterator is opened. 2625 * Otherwise the provided iterator is set to the string's text. 2626 * @return A reference to this. 2627 * @stable ICU 2.1 2628 */ 2629 UnicodeString &toTitle(BreakIterator *titleIter); 2630 2631 /** 2632 * Titlecase this string. 2633 * 2634 * Casing is locale-dependent and context-sensitive. 2635 * Titlecasing uses a break iterator to find the first characters of words 2636 * that are to be titlecased. It titlecases those characters and lowercases 2637 * all others. 2638 * 2639 * The titlecase break iterator can be provided to customize for arbitrary 2640 * styles, using rules and dictionaries beyond the standard iterators. 2641 * It may be more efficient to always provide an iterator to avoid 2642 * opening and closing one for each string. 2643 * The standard titlecase iterator for the root locale implements the 2644 * algorithm of Unicode TR 21. 2645 * 2646 * This function uses only the setText(), first() and next() methods of the 2647 * provided break iterator. 2648 * 2649 * @param titleIter A break iterator to find the first characters of words 2650 * that are to be titlecased. 2651 * If none is provided (0), then a standard titlecase 2652 * break iterator is opened. 2653 * Otherwise the provided iterator is set to the string's text. 2654 * @param locale The locale to consider. 2655 * @return A reference to this. 2656 * @stable ICU 2.1 2657 */ 2658 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); 2659 2660 /** 2661 * Titlecase this string, with options. 2662 * 2663 * Casing is locale-dependent and context-sensitive. 2664 * Titlecasing uses a break iterator to find the first characters of words 2665 * that are to be titlecased. It titlecases those characters and lowercases 2666 * all others. (This can be modified with options.) 2667 * 2668 * The titlecase break iterator can be provided to customize for arbitrary 2669 * styles, using rules and dictionaries beyond the standard iterators. 2670 * It may be more efficient to always provide an iterator to avoid 2671 * opening and closing one for each string. 2672 * The standard titlecase iterator for the root locale implements the 2673 * algorithm of Unicode TR 21. 2674 * 2675 * This function uses only the setText(), first() and next() methods of the 2676 * provided break iterator. 2677 * 2678 * @param titleIter A break iterator to find the first characters of words 2679 * that are to be titlecased. 2680 * If none is provided (0), then a standard titlecase 2681 * break iterator is opened. 2682 * Otherwise the provided iterator is set to the string's text. 2683 * @param locale The locale to consider. 2684 * @param options Options bit set, see ucasemap_open(). 2685 * @return A reference to this. 2686 * @see U_TITLECASE_NO_LOWERCASE 2687 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT 2688 * @see ucasemap_open 2689 * @stable ICU 3.8 2690 */ 2691 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); 2692 2693 #endif 2694 2695 /** 2696 * Case-folds the characters in this string. 2697 * 2698 * Case-folding is locale-independent and not context-sensitive, 2699 * but there is an option for whether to include or exclude mappings for dotted I 2700 * and dotless i that are marked with 'T' in CaseFolding.txt. 2701 * 2702 * The result may be longer or shorter than the original. 2703 * 2704 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I 2705 * @return A reference to this. 2706 * @stable ICU 2.0 2707 */ 2708 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); 2709 2710 //======================================== 2711 // Access to the internal buffer 2712 //======================================== 2713 2714 /** 2715 * Get a read/write pointer to the internal buffer. 2716 * The buffer is guaranteed to be large enough for at least minCapacity UChars, 2717 * writable, and is still owned by the UnicodeString object. 2718 * Calls to getBuffer(minCapacity) must not be nested, and 2719 * must be matched with calls to releaseBuffer(newLength). 2720 * If the string buffer was read-only or shared, 2721 * then it will be reallocated and copied. 2722 * 2723 * An attempted nested call will return 0, and will not further modify the 2724 * state of the UnicodeString object. 2725 * It also returns 0 if the string is bogus. 2726 * 2727 * The actual capacity of the string buffer may be larger than minCapacity. 2728 * getCapacity() returns the actual capacity. 2729 * For many operations, the full capacity should be used to avoid reallocations. 2730 * 2731 * While the buffer is "open" between getBuffer(minCapacity) 2732 * and releaseBuffer(newLength), the following applies: 2733 * - The string length is set to 0. 2734 * - Any read API call on the UnicodeString object will behave like on a 0-length string. 2735 * - Any write API call on the UnicodeString object is disallowed and will have no effect. 2736 * - You can read from and write to the returned buffer. 2737 * - The previous string contents will still be in the buffer; 2738 * if you want to use it, then you need to call length() before getBuffer(minCapacity). 2739 * If the length() was greater than minCapacity, then any contents after minCapacity 2740 * may be lost. 2741 * The buffer contents is not NUL-terminated by getBuffer(). 2742 * If length()<getCapacity() then you can terminate it by writing a NUL 2743 * at index length(). 2744 * - You must call releaseBuffer(newLength) before and in order to 2745 * return to normal UnicodeString operation. 2746 * 2747 * @param minCapacity the minimum number of UChars that are to be available 2748 * in the buffer, starting at the returned pointer; 2749 * default to the current string capacity if minCapacity==-1 2750 * @return a writable pointer to the internal string buffer, 2751 * or 0 if an error occurs (nested calls, out of memory) 2752 * 2753 * @see releaseBuffer 2754 * @see getTerminatedBuffer() 2755 * @stable ICU 2.0 2756 */ 2757 UChar *getBuffer(int32_t minCapacity); 2758 2759 /** 2760 * Release a read/write buffer on a UnicodeString object with an 2761 * "open" getBuffer(minCapacity). 2762 * This function must be called in a matched pair with getBuffer(minCapacity). 2763 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open". 2764 * 2765 * It will set the string length to newLength, at most to the current capacity. 2766 * If newLength==-1 then it will set the length according to the 2767 * first NUL in the buffer, or to the capacity if there is no NUL. 2768 * 2769 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation. 2770 * 2771 * @param newLength the new length of the UnicodeString object; 2772 * defaults to the current capacity if newLength is greater than that; 2773 * if newLength==-1, it defaults to u_strlen(buffer) but not more than 2774 * the current capacity of the string 2775 * 2776 * @see getBuffer(int32_t minCapacity) 2777 * @stable ICU 2.0 2778 */ 2779 void releaseBuffer(int32_t newLength=-1); 2780 2781 /** 2782 * Get a read-only pointer to the internal buffer. 2783 * This can be called at any time on a valid UnicodeString. 2784 * 2785 * It returns 0 if the string is bogus, or 2786 * during an "open" getBuffer(minCapacity). 2787 * 2788 * It can be called as many times as desired. 2789 * The pointer that it returns will remain valid until the UnicodeString object is modified, 2790 * at which time the pointer is semantically invalidated and must not be used any more. 2791 * 2792 * The capacity of the buffer can be determined with getCapacity(). 2793 * The part after length() may or may not be initialized and valid, 2794 * depending on the history of the UnicodeString object. 2795 * 2796 * The buffer contents is (probably) not NUL-terminated. 2797 * You can check if it is with 2798 * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>. 2799 * (See getTerminatedBuffer().) 2800 * 2801 * The buffer may reside in read-only memory. Its contents must not 2802 * be modified. 2803 * 2804 * @return a read-only pointer to the internal string buffer, 2805 * or 0 if the string is empty or bogus 2806 * 2807 * @see getBuffer(int32_t minCapacity) 2808 * @see getTerminatedBuffer() 2809 * @stable ICU 2.0 2810 */ 2811 inline const UChar *getBuffer() const; 2812 2813 /** 2814 * Get a read-only pointer to the internal buffer, 2815 * making sure that it is NUL-terminated. 2816 * This can be called at any time on a valid UnicodeString. 2817 * 2818 * It returns 0 if the string is bogus, or 2819 * during an "open" getBuffer(minCapacity), or if the buffer cannot 2820 * be NUL-terminated (because memory allocation failed). 2821 * 2822 * It can be called as many times as desired. 2823 * The pointer that it returns will remain valid until the UnicodeString object is modified, 2824 * at which time the pointer is semantically invalidated and must not be used any more. 2825 * 2826 * The capacity of the buffer can be determined with getCapacity(). 2827 * The part after length()+1 may or may not be initialized and valid, 2828 * depending on the history of the UnicodeString object. 2829 * 2830 * The buffer contents is guaranteed to be NUL-terminated. 2831 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL 2832 * is written. 2833 * For this reason, this function is not const, unlike getBuffer(). 2834 * Note that a UnicodeString may also contain NUL characters as part of its contents. 2835 * 2836 * The buffer may reside in read-only memory. Its contents must not 2837 * be modified. 2838 * 2839 * @return a read-only pointer to the internal string buffer, 2840 * or 0 if the string is empty or bogus 2841 * 2842 * @see getBuffer(int32_t minCapacity) 2843 * @see getBuffer() 2844 * @stable ICU 2.2 2845 */ 2846 const UChar *getTerminatedBuffer(); 2847 2848 //======================================== 2849 // Constructors 2850 //======================================== 2851 2852 /** Construct an empty UnicodeString. 2853 * @stable ICU 2.0 2854 */ 2855 inline UnicodeString(); 2856 2857 /** 2858 * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars 2859 * @param capacity the number of UChars this UnicodeString should hold 2860 * before a resize is necessary; if count is greater than 0 and count 2861 * code points c take up more space than capacity, then capacity is adjusted 2862 * accordingly. 2863 * @param c is used to initially fill the string 2864 * @param count specifies how many code points c are to be written in the 2865 * string 2866 * @stable ICU 2.0 2867 */ 2868 UnicodeString(int32_t capacity, UChar32 c, int32_t count); 2869 2870 /** 2871 * Single UChar (code unit) constructor. 2872 * 2873 * It is recommended to mark this constructor "explicit" by 2874 * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> 2875 * on the compiler command line or similar. 2876 * @param ch the character to place in the UnicodeString 2877 * @stable ICU 2.0 2878 */ 2879 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch); 2880 2881 /** 2882 * Single UChar32 (code point) constructor. 2883 * 2884 * It is recommended to mark this constructor "explicit" by 2885 * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> 2886 * on the compiler command line or similar. 2887 * @param ch the character to place in the UnicodeString 2888 * @stable ICU 2.0 2889 */ 2890 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); 2891 2892 /** 2893 * UChar* constructor. 2894 * 2895 * It is recommended to mark this constructor "explicit" by 2896 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 2897 * on the compiler command line or similar. 2898 * @param text The characters to place in the UnicodeString. <TT>text</TT> 2899 * must be NULL (U+0000) terminated. 2900 * @stable ICU 2.0 2901 */ 2902 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); 2903 2904 /** 2905 * UChar* constructor. 2906 * @param text The characters to place in the UnicodeString. 2907 * @param textLength The number of Unicode characters in <TT>text</TT> 2908 * to copy. 2909 * @stable ICU 2.0 2910 */ 2911 UnicodeString(const UChar *text, 2912 int32_t textLength); 2913 2914 /** 2915 * Readonly-aliasing UChar* constructor. 2916 * The text will be used for the UnicodeString object, but 2917 * it will not be released when the UnicodeString is destroyed. 2918 * This has copy-on-write semantics: 2919 * When the string is modified, then the buffer is first copied into 2920 * newly allocated memory. 2921 * The aliased buffer is never modified. 2922 * 2923 * In an assignment to another UnicodeString, when using the copy constructor 2924 * or the assignment operator, the text will be copied. 2925 * When using fastCopyFrom(), the text will be aliased again, 2926 * so that both strings then alias the same readonly-text. 2927 * 2928 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. 2929 * This must be true if <code>textLength==-1</code>. 2930 * @param text The characters to alias for the UnicodeString. 2931 * @param textLength The number of Unicode characters in <code>text</code> to alias. 2932 * If -1, then this constructor will determine the length 2933 * by calling <code>u_strlen()</code>. 2934 * @stable ICU 2.0 2935 */ 2936 UnicodeString(UBool isTerminated, 2937 const UChar *text, 2938 int32_t textLength); 2939 2940 /** 2941 * Writable-aliasing UChar* constructor. 2942 * The text will be used for the UnicodeString object, but 2943 * it will not be released when the UnicodeString is destroyed. 2944 * This has write-through semantics: 2945 * For as long as the capacity of the buffer is sufficient, write operations 2946 * will directly affect the buffer. When more capacity is necessary, then 2947 * a new buffer will be allocated and the contents copied as with regularly 2948 * constructed strings. 2949 * In an assignment to another UnicodeString, the buffer will be copied. 2950 * The extract(UChar *dst) function detects whether the dst pointer is the same 2951 * as the string buffer itself and will in this case not copy the contents. 2952 * 2953 * @param buffer The characters to alias for the UnicodeString. 2954 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. 2955 * @param buffCapacity The size of <code>buffer</code> in UChars. 2956 * @stable ICU 2.0 2957 */ 2958 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); 2959 2960 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 2961 2962 /** 2963 * char* constructor. 2964 * Uses the default converter (and thus depends on the ICU conversion code) 2965 * unless U_CHARSET_IS_UTF8 is set to 1. 2966 * 2967 * For ASCII (really "invariant character") strings it is more efficient to use 2968 * the constructor that takes a US_INV (for its enum EInvariant). 2969 * For ASCII (invariant-character) string literals, see UNICODE_STRING and 2970 * UNICODE_STRING_SIMPLE. 2971 * 2972 * It is recommended to mark this constructor "explicit" by 2973 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 2974 * on the compiler command line or similar. 2975 * @param codepageData an array of bytes, null-terminated, 2976 * in the platform's default codepage. 2977 * @stable ICU 2.0 2978 * @see UNICODE_STRING 2979 * @see UNICODE_STRING_SIMPLE 2980 */ 2981 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); 2982 2983 /** 2984 * char* constructor. 2985 * Uses the default converter (and thus depends on the ICU conversion code) 2986 * unless U_CHARSET_IS_UTF8 is set to 1. 2987 * @param codepageData an array of bytes in the platform's default codepage. 2988 * @param dataLength The number of bytes in <TT>codepageData</TT>. 2989 * @stable ICU 2.0 2990 */ 2991 UnicodeString(const char *codepageData, int32_t dataLength); 2992 2993 #endif 2994 2995 #if !UCONFIG_NO_CONVERSION 2996 2997 /** 2998 * char* constructor. 2999 * @param codepageData an array of bytes, null-terminated 3000 * @param codepage the encoding of <TT>codepageData</TT>. The special 3001 * value 0 for <TT>codepage</TT> indicates that the text is in the 3002 * platform's default codepage. 3003 * 3004 * If <code>codepage</code> is an empty string (<code>""</code>), 3005 * then a simple conversion is performed on the codepage-invariant 3006 * subset ("invariant characters") of the platform encoding. See utypes.h. 3007 * Recommendation: For invariant-character strings use the constructor 3008 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 3009 * because it avoids object code dependencies of UnicodeString on 3010 * the conversion code. 3011 * 3012 * @stable ICU 2.0 3013 */ 3014 UnicodeString(const char *codepageData, const char *codepage); 3015 3016 /** 3017 * char* constructor. 3018 * @param codepageData an array of bytes. 3019 * @param dataLength The number of bytes in <TT>codepageData</TT>. 3020 * @param codepage the encoding of <TT>codepageData</TT>. The special 3021 * value 0 for <TT>codepage</TT> indicates that the text is in the 3022 * platform's default codepage. 3023 * If <code>codepage</code> is an empty string (<code>""</code>), 3024 * then a simple conversion is performed on the codepage-invariant 3025 * subset ("invariant characters") of the platform encoding. See utypes.h. 3026 * Recommendation: For invariant-character strings use the constructor 3027 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 3028 * because it avoids object code dependencies of UnicodeString on 3029 * the conversion code. 3030 * 3031 * @stable ICU 2.0 3032 */ 3033 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); 3034 3035 /** 3036 * char * / UConverter constructor. 3037 * This constructor uses an existing UConverter object to 3038 * convert the codepage string to Unicode and construct a UnicodeString 3039 * from that. 3040 * 3041 * The converter is reset at first. 3042 * If the error code indicates a failure before this constructor is called, 3043 * or if an error occurs during conversion or construction, 3044 * then the string will be bogus. 3045 * 3046 * This function avoids the overhead of opening and closing a converter if 3047 * multiple strings are constructed. 3048 * 3049 * @param src input codepage string 3050 * @param srcLength length of the input string, can be -1 for NUL-terminated strings 3051 * @param cnv converter object (ucnv_resetToUnicode() will be called), 3052 * can be NULL for the default converter 3053 * @param errorCode normal ICU error code 3054 * @stable ICU 2.0 3055 */ 3056 UnicodeString( 3057 const char *src, int32_t srcLength, 3058 UConverter *cnv, 3059 UErrorCode &errorCode); 3060 3061 #endif 3062 3063 /** 3064 * Constructs a Unicode string from an invariant-character char * string. 3065 * About invariant characters see utypes.h. 3066 * This constructor has no runtime dependency on conversion code and is 3067 * therefore recommended over ones taking a charset name string 3068 * (where the empty string "" indicates invariant-character conversion). 3069 * 3070 * Use the macro US_INV as the third, signature-distinguishing parameter. 3071 * 3072 * For example: 3073 * \code 3074 * void fn(const char *s) { 3075 * UnicodeString ustr(s, -1, US_INV); 3076 * // use ustr ... 3077 * } 3078 * \endcode 3079 * 3080 * @param src String using only invariant characters. 3081 * @param length Length of src, or -1 if NUL-terminated. 3082 * @param inv Signature-distinguishing paramater, use US_INV. 3083 * 3084 * @see US_INV 3085 * @stable ICU 3.2 3086 */ 3087 UnicodeString(const char *src, int32_t length, enum EInvariant inv); 3088 3089 3090 /** 3091 * Copy constructor. 3092 * @param that The UnicodeString object to copy. 3093 * @stable ICU 2.0 3094 */ 3095 UnicodeString(const UnicodeString& that); 3096 3097 /** 3098 * 'Substring' constructor from tail of source string. 3099 * @param src The UnicodeString object to copy. 3100 * @param srcStart The offset into <tt>src</tt> at which to start copying. 3101 * @stable ICU 2.2 3102 */ 3103 UnicodeString(const UnicodeString& src, int32_t srcStart); 3104 3105 /** 3106 * 'Substring' constructor from subrange of source string. 3107 * @param src The UnicodeString object to copy. 3108 * @param srcStart The offset into <tt>src</tt> at which to start copying. 3109 * @param srcLength The number of characters from <tt>src</tt> to copy. 3110 * @stable ICU 2.2 3111 */ 3112 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 3113 3114 /** 3115 * Clone this object, an instance of a subclass of Replaceable. 3116 * Clones can be used concurrently in multiple threads. 3117 * If a subclass does not implement clone(), or if an error occurs, 3118 * then NULL is returned. 3119 * The clone functions in all subclasses return a pointer to a Replaceable 3120 * because some compilers do not support covariant (same-as-this) 3121 * return types; cast to the appropriate subclass if necessary. 3122 * The caller must delete the clone. 3123 * 3124 * @return a clone of this object 3125 * 3126 * @see Replaceable::clone 3127 * @see getDynamicClassID 3128 * @stable ICU 2.6 3129 */ 3130 virtual Replaceable *clone() const; 3131 3132 /** Destructor. 3133 * @stable ICU 2.0 3134 */ 3135 virtual ~UnicodeString(); 3136 3137 /** 3138 * Create a UnicodeString from a UTF-8 string. 3139 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 3140 * Calls u_strFromUTF8WithSub(). 3141 * 3142 * @param utf8 UTF-8 input string. 3143 * Note that a StringPiece can be implicitly constructed 3144 * from a std::string or a NUL-terminated const char * string. 3145 * @return A UnicodeString with equivalent UTF-16 contents. 3146 * @see toUTF8 3147 * @see toUTF8String 3148 * @stable ICU 4.2 3149 */ 3150 static UnicodeString fromUTF8(const StringPiece &utf8); 3151 3152 /** 3153 * Create a UnicodeString from a UTF-32 string. 3154 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 3155 * Calls u_strFromUTF32WithSub(). 3156 * 3157 * @param utf32 UTF-32 input string. Must not be NULL. 3158 * @param length Length of the input string, or -1 if NUL-terminated. 3159 * @return A UnicodeString with equivalent UTF-16 contents. 3160 * @see toUTF32 3161 * @stable ICU 4.2 3162 */ 3163 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); 3164 3165 /* Miscellaneous operations */ 3166 3167 /** 3168 * Unescape a string of characters and return a string containing 3169 * the result. The following escape sequences are recognized: 3170 * 3171 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] 3172 * \\Uhhhhhhhh 8 hex digits 3173 * \\xhh 1-2 hex digits 3174 * \\ooo 1-3 octal digits; o in [0-7] 3175 * \\cX control-X; X is masked with 0x1F 3176 * 3177 * as well as the standard ANSI C escapes: 3178 * 3179 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, 3180 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, 3181 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C 3182 * 3183 * Anything else following a backslash is generically escaped. For 3184 * example, "[a\\-z]" returns "[a-z]". 3185 * 3186 * If an escape sequence is ill-formed, this method returns an empty 3187 * string. An example of an ill-formed sequence is "\\u" followed by 3188 * fewer than 4 hex digits. 3189 * 3190 * This function is similar to u_unescape() but not identical to it. 3191 * The latter takes a source char*, so it does escape recognition 3192 * and also invariant conversion. 3193 * 3194 * @return a string with backslash escapes interpreted, or an 3195 * empty string on error. 3196 * @see UnicodeString#unescapeAt() 3197 * @see u_unescape() 3198 * @see u_unescapeAt() 3199 * @stable ICU 2.0 3200 */ 3201 UnicodeString unescape() const; 3202 3203 /** 3204 * Unescape a single escape sequence and return the represented 3205 * character. See unescape() for a listing of the recognized escape 3206 * sequences. The character at offset-1 is assumed (without 3207 * checking) to be a backslash. If the escape sequence is 3208 * ill-formed, or the offset is out of range, U_SENTINEL=-1 is 3209 * returned. 3210 * 3211 * @param offset an input output parameter. On input, it is the 3212 * offset into this string where the escape sequence is located, 3213 * after the initial backslash. On output, it is advanced after the 3214 * last character parsed. On error, it is not advanced at all. 3215 * @return the character represented by the escape sequence at 3216 * offset, or U_SENTINEL=-1 on error. 3217 * @see UnicodeString#unescape() 3218 * @see u_unescape() 3219 * @see u_unescapeAt() 3220 * @stable ICU 2.0 3221 */ 3222 UChar32 unescapeAt(int32_t &offset) const; 3223 3224 /** 3225 * ICU "poor man's RTTI", returns a UClassID for this class. 3226 * 3227 * @stable ICU 2.2 3228 */ 3229 static UClassID U_EXPORT2 getStaticClassID(); 3230 3231 /** 3232 * ICU "poor man's RTTI", returns a UClassID for the actual class. 3233 * 3234 * @stable ICU 2.2 3235 */ 3236 virtual UClassID getDynamicClassID() const; 3237 3238 //======================================== 3239 // Implementation methods 3240 //======================================== 3241 3242 protected: 3243 /** 3244 * Implement Replaceable::getLength() (see jitterbug 1027). 3245 * @stable ICU 2.4 3246 */ 3247 virtual int32_t getLength() const; 3248 3249 /** 3250 * The change in Replaceable to use virtual getCharAt() allows 3251 * UnicodeString::charAt() to be inline again (see jitterbug 709). 3252 * @stable ICU 2.4 3253 */ 3254 virtual UChar getCharAt(int32_t offset) const; 3255 3256 /** 3257 * The change in Replaceable to use virtual getChar32At() allows 3258 * UnicodeString::char32At() to be inline again (see jitterbug 709). 3259 * @stable ICU 2.4 3260 */ 3261 virtual UChar32 getChar32At(int32_t offset) const; 3262 3263 private: 3264 // For char* constructors. Could be made public. 3265 UnicodeString &setToUTF8(const StringPiece &utf8); 3266 // For extract(char*). 3267 // We could make a toUTF8(target, capacity, errorCode) public but not 3268 // this version: New API will be cleaner if we make callers create substrings 3269 // rather than having start+length on every method, 3270 // and it should take a UErrorCode&. 3271 int32_t 3272 toUTF8(int32_t start, int32_t len, 3273 char *target, int32_t capacity) const; 3274 3275 /** 3276 * Internal string contents comparison, called by operator==. 3277 * Requires: this & text not bogus and have same lengths. 3278 */ 3279 UBool doEquals(const UnicodeString &text, int32_t len) const; 3280 3281 inline int8_t 3282 doCompare(int32_t start, 3283 int32_t length, 3284 const UnicodeString& srcText, 3285 int32_t srcStart, 3286 int32_t srcLength) const; 3287 3288 int8_t doCompare(int32_t start, 3289 int32_t length, 3290 const UChar *srcChars, 3291 int32_t srcStart, 3292 int32_t srcLength) const; 3293 3294 inline int8_t 3295 doCompareCodePointOrder(int32_t start, 3296 int32_t length, 3297 const UnicodeString& srcText, 3298 int32_t srcStart, 3299 int32_t srcLength) const; 3300 3301 int8_t doCompareCodePointOrder(int32_t start, 3302 int32_t length, 3303 const UChar *srcChars, 3304 int32_t srcStart, 3305 int32_t srcLength) const; 3306 3307 inline int8_t 3308 doCaseCompare(int32_t start, 3309 int32_t length, 3310 const UnicodeString &srcText, 3311 int32_t srcStart, 3312 int32_t srcLength, 3313 uint32_t options) const; 3314 3315 int8_t 3316 doCaseCompare(int32_t start, 3317 int32_t length, 3318 const UChar *srcChars, 3319 int32_t srcStart, 3320 int32_t srcLength, 3321 uint32_t options) const; 3322 3323 int32_t doIndexOf(UChar c, 3324 int32_t start, 3325 int32_t length) const; 3326 3327 int32_t doIndexOf(UChar32 c, 3328 int32_t start, 3329 int32_t length) const; 3330 3331 int32_t doLastIndexOf(UChar c, 3332 int32_t start, 3333 int32_t length) const; 3334 3335 int32_t doLastIndexOf(UChar32 c, 3336 int32_t start, 3337 int32_t length) const; 3338 3339 void doExtract(int32_t start, 3340 int32_t length, 3341 UChar *dst, 3342 int32_t dstStart) const; 3343 3344 inline void doExtract(int32_t start, 3345 int32_t length, 3346 UnicodeString& target) const; 3347 3348 inline UChar doCharAt(int32_t offset) const; 3349 3350 UnicodeString& doReplace(int32_t start, 3351 int32_t length, 3352 const UnicodeString& srcText, 3353 int32_t srcStart, 3354 int32_t srcLength); 3355 3356 UnicodeString& doReplace(int32_t start, 3357 int32_t length, 3358 const UChar *srcChars, 3359 int32_t srcStart, 3360 int32_t srcLength); 3361 3362 UnicodeString& doReverse(int32_t start, 3363 int32_t length); 3364 3365 // calculate hash code 3366 int32_t doHashCode(void) const; 3367 3368 // get pointer to start of array 3369 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 3370 inline UChar* getArrayStart(void); 3371 inline const UChar* getArrayStart(void) const; 3372 3373 inline UBool hasShortLength() const; 3374 inline int32_t getShortLength() const; 3375 3376 // A UnicodeString object (not necessarily its current buffer) 3377 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 3378 inline UBool isWritable() const; 3379 3380 // Is the current buffer writable? 3381 inline UBool isBufferWritable() const; 3382 3383 // None of the following does releaseArray(). 3384 inline void setZeroLength(); 3385 inline void setShortLength(int32_t len); 3386 inline void setLength(int32_t len); 3387 inline void setToEmpty(); 3388 inline void setArray(UChar *array, int32_t len, int32_t capacity); // sets length but not flags 3389 3390 // allocate the array; result may be the stack buffer 3391 // sets refCount to 1 if appropriate 3392 // sets fArray, fCapacity, and flags 3393 // sets length to 0 3394 // returns boolean for success or failure 3395 UBool allocate(int32_t capacity); 3396 3397 // release the array if owned 3398 void releaseArray(void); 3399 3400 // turn a bogus string into an empty one 3401 void unBogus(); 3402 3403 // implements assigment operator, copy constructor, and fastCopyFrom() 3404 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); 3405 3406 // Pin start and limit to acceptable values. 3407 inline void pinIndex(int32_t& start) const; 3408 inline void pinIndices(int32_t& start, 3409 int32_t& length) const; 3410 3411 #if !UCONFIG_NO_CONVERSION 3412 3413 /* Internal extract() using UConverter. */ 3414 int32_t doExtract(int32_t start, int32_t length, 3415 char *dest, int32_t destCapacity, 3416 UConverter *cnv, 3417 UErrorCode &errorCode) const; 3418 3419 /* 3420 * Real constructor for converting from codepage data. 3421 * It assumes that it is called with !fRefCounted. 3422 * 3423 * If <code>codepage==0</code>, then the default converter 3424 * is used for the platform encoding. 3425 * If <code>codepage</code> is an empty string (<code>""</code>), 3426 * then a simple conversion is performed on the codepage-invariant 3427 * subset ("invariant characters") of the platform encoding. See utypes.h. 3428 */ 3429 void doCodepageCreate(const char *codepageData, 3430 int32_t dataLength, 3431 const char *codepage); 3432 3433 /* 3434 * Worker function for creating a UnicodeString from 3435 * a codepage string using a UConverter. 3436 */ 3437 void 3438 doCodepageCreate(const char *codepageData, 3439 int32_t dataLength, 3440 UConverter *converter, 3441 UErrorCode &status); 3442 3443 #endif 3444 3445 /* 3446 * This function is called when write access to the array 3447 * is necessary. 3448 * 3449 * We need to make a copy of the array if 3450 * the buffer is read-only, or 3451 * the buffer is refCounted (shared), and refCount>1, or 3452 * the buffer is too small. 3453 * 3454 * Return FALSE if memory could not be allocated. 3455 */ 3456 UBool cloneArrayIfNeeded(int32_t newCapacity = -1, 3457 int32_t growCapacity = -1, 3458 UBool doCopyArray = TRUE, 3459 int32_t **pBufferToDelete = 0, 3460 UBool forceClone = FALSE); 3461 3462 /** 3463 * Common function for UnicodeString case mappings. 3464 * The stringCaseMapper has the same type UStringCaseMapper 3465 * as in ustr_imp.h for ustrcase_map(). 3466 */ 3467 UnicodeString & 3468 caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper); 3469 3470 // ref counting 3471 void addRef(void); 3472 int32_t removeRef(void); 3473 int32_t refCount(void) const; 3474 3475 // constants 3476 enum { 3477 // Set the stack buffer size so that sizeof(UnicodeString) is, 3478 // naturally (without padding), a multiple of sizeof(pointer). 3479 US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings 3480 kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index) 3481 kGrowSize=128, // grow size for this buffer 3482 kInvalidHashCode=0, // invalid hash code 3483 kEmptyHashCode=1, // hash code for empty string 3484 3485 // bit flag values for fLengthAndFlags 3486 kIsBogus=1, // this string is bogus, i.e., not valid or NULL 3487 kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields 3488 kRefCounted=4, // there is a refCount field before the characters in fArray 3489 kBufferIsReadonly=8,// do not write to this buffer 3490 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), 3491 // and releaseBuffer(newLength) must be called 3492 kAllStorageFlags=0x1f, 3493 3494 kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long 3495 kLength1=1<<kLengthShift, 3496 kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0) 3497 kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength 3498 3499 // combined values for convenience 3500 kShortString=kUsingStackBuffer, 3501 kLongString=kRefCounted, 3502 kReadonlyAlias=kBufferIsReadonly, 3503 kWritableAlias=0 3504 }; 3505 3506 friend class StringThreadTest; 3507 friend class UnicodeStringAppendable; 3508 3509 union StackBufferOrFields; // forward declaration necessary before friend declaration 3510 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion 3511 3512 /* 3513 * The following are all the class fields that are stored 3514 * in each UnicodeString object. 3515 * Note that UnicodeString has virtual functions, 3516 * therefore there is an implicit vtable pointer 3517 * as the first real field. 3518 * The fields should be aligned such that no padding is necessary. 3519 * On 32-bit machines, the size should be 32 bytes, 3520 * on 64-bit machines (8-byte pointers), it should be 40 bytes. 3521 * 3522 * We use a hack to achieve this. 3523 * 3524 * With at least some compilers, each of the following is forced to 3525 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], 3526 * rounded up with additional padding if the fields do not already fit that requirement: 3527 * - sizeof(class UnicodeString) 3528 * - offsetof(UnicodeString, fUnion) 3529 * - sizeof(fUnion) 3530 * - sizeof(fStackFields) 3531 * 3532 * We optimize for the longest possible internal buffer for short strings. 3533 * fUnion.fStackFields begins with 2 bytes for storage flags 3534 * and the length of relatively short strings, 3535 * followed by the buffer for short string contents. 3536 * There is no padding inside fStackFields. 3537 * 3538 * Heap-allocated and aliased strings use fUnion.fFields. 3539 * Both fStackFields and fFields must begin with the same fields for flags and short length, 3540 * that is, those must have the same memory offsets inside the object, 3541 * because the flags must be inspected in order to decide which half of fUnion is being used. 3542 * We assume that the compiler does not reorder the fields. 3543 * 3544 * (Padding at the end of fFields is ok: 3545 * As long as it is no larger than fStackFields, it is not wasted space.) 3546 * 3547 * For some of the history of the UnicodeString class fields layout, 3548 * see ICU ticket #11336 "UnicodeString: recombine stack buffer arrays" 3549 * and ticket #8322 "why is sizeof(UnicodeString)==48?". 3550 */ 3551 // (implicit) *vtable; 3552 union StackBufferOrFields { 3553 // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used. 3554 // Each struct of the union must begin with fLengthAndFlags. 3555 struct { 3556 int16_t fLengthAndFlags; // bit fields: see constants above 3557 UChar fBuffer[US_STACKBUF_SIZE]; // buffer for short strings 3558 } fStackFields; 3559 struct { 3560 int16_t fLengthAndFlags; // bit fields: see constants above 3561 UChar *fArray; // the Unicode data 3562 int32_t fCapacity; // capacity of fArray (in UChars) 3563 int32_t fLength; // number of characters in fArray if >127; else undefined 3564 } fFields; 3565 } fUnion; 3566 }; 3567 3568 /** 3569 * Create a new UnicodeString with the concatenation of two others. 3570 * 3571 * @param s1 The first string to be copied to the new one. 3572 * @param s2 The second string to be copied to the new one, after s1. 3573 * @return UnicodeString(s1).append(s2) 3574 * @stable ICU 2.8 3575 */ 3576 U_COMMON_API UnicodeString U_EXPORT2 3577 operator+ (const UnicodeString &s1, const UnicodeString &s2); 3578 3579 //======================================== 3580 // Inline members 3581 //======================================== 3582 3583 //======================================== 3584 // Privates 3585 //======================================== 3586 3587 inline void 3588 UnicodeString::pinIndex(int32_t& start) const 3589 { 3590 // pin index 3591 if(start < 0) { 3592 start = 0; 3593 } else if(start > length()) { 3594 start = length(); 3595 } 3596 } 3597 3598 inline void 3599 UnicodeString::pinIndices(int32_t& start, 3600 int32_t& _length) const 3601 { 3602 // pin indices 3603 int32_t len = length(); 3604 if(start < 0) { 3605 start = 0; 3606 } else if(start > len) { 3607 start = len; 3608 } 3609 if(_length < 0) { 3610 _length = 0; 3611 } else if(_length > (len - start)) { 3612 _length = (len - start); 3613 } 3614 } 3615 3616 inline UChar* 3617 UnicodeString::getArrayStart() { 3618 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 3619 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; 3620 } 3621 3622 inline const UChar* 3623 UnicodeString::getArrayStart() const { 3624 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 3625 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; 3626 } 3627 3628 //======================================== 3629 // Default constructor 3630 //======================================== 3631 3632 inline 3633 UnicodeString::UnicodeString() { 3634 fUnion.fStackFields.fLengthAndFlags=kShortString; 3635 } 3636 3637 //======================================== 3638 // Read-only implementation methods 3639 //======================================== 3640 inline UBool 3641 UnicodeString::hasShortLength() const { 3642 return fUnion.fFields.fLengthAndFlags>=0; 3643 } 3644 3645 inline int32_t 3646 UnicodeString::getShortLength() const { 3647 // fLengthAndFlags must be non-negative -> short length >= 0 3648 // and arithmetic or logical shift does not matter. 3649 return fUnion.fFields.fLengthAndFlags>>kLengthShift; 3650 } 3651 3652 inline int32_t 3653 UnicodeString::length() const { 3654 return hasShortLength() ? getShortLength() : fUnion.fFields.fLength; 3655 } 3656 3657 inline int32_t 3658 UnicodeString::getCapacity() const { 3659 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 3660 US_STACKBUF_SIZE : fUnion.fFields.fCapacity; 3661 } 3662 3663 inline int32_t 3664 UnicodeString::hashCode() const 3665 { return doHashCode(); } 3666 3667 inline UBool 3668 UnicodeString::isBogus() const 3669 { return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); } 3670 3671 inline UBool 3672 UnicodeString::isWritable() const 3673 { return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); } 3674 3675 inline UBool 3676 UnicodeString::isBufferWritable() const 3677 { 3678 return (UBool)( 3679 !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && 3680 (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1)); 3681 } 3682 3683 inline const UChar * 3684 UnicodeString::getBuffer() const { 3685 if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) { 3686 return 0; 3687 } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { 3688 return fUnion.fStackFields.fBuffer; 3689 } else { 3690 return fUnion.fFields.fArray; 3691 } 3692 } 3693 3694 //======================================== 3695 // Read-only alias methods 3696 //======================================== 3697 inline int8_t 3698 UnicodeString::doCompare(int32_t start, 3699 int32_t thisLength, 3700 const UnicodeString& srcText, 3701 int32_t srcStart, 3702 int32_t srcLength) const 3703 { 3704 if(srcText.isBogus()) { 3705 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 3706 } else { 3707 srcText.pinIndices(srcStart, srcLength); 3708 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 3709 } 3710 } 3711 3712 inline UBool 3713 UnicodeString::operator== (const UnicodeString& text) const 3714 { 3715 if(isBogus()) { 3716 return text.isBogus(); 3717 } else { 3718 int32_t len = length(), textLength = text.length(); 3719 return !text.isBogus() && len == textLength && doEquals(text, len); 3720 } 3721 } 3722 3723 inline UBool 3724 UnicodeString::operator!= (const UnicodeString& text) const 3725 { return (! operator==(text)); } 3726 3727 inline UBool 3728 UnicodeString::operator> (const UnicodeString& text) const 3729 { return doCompare(0, length(), text, 0, text.length()) == 1; } 3730 3731 inline UBool 3732 UnicodeString::operator< (const UnicodeString& text) const 3733 { return doCompare(0, length(), text, 0, text.length()) == -1; } 3734 3735 inline UBool 3736 UnicodeString::operator>= (const UnicodeString& text) const 3737 { return doCompare(0, length(), text, 0, text.length()) != -1; } 3738 3739 inline UBool 3740 UnicodeString::operator<= (const UnicodeString& text) const 3741 { return doCompare(0, length(), text, 0, text.length()) != 1; } 3742 3743 inline int8_t 3744 UnicodeString::compare(const UnicodeString& text) const 3745 { return doCompare(0, length(), text, 0, text.length()); } 3746 3747 inline int8_t 3748 UnicodeString::compare(int32_t start, 3749 int32_t _length, 3750 const UnicodeString& srcText) const 3751 { return doCompare(start, _length, srcText, 0, srcText.length()); } 3752 3753 inline int8_t 3754 UnicodeString::compare(const UChar *srcChars, 3755 int32_t srcLength) const 3756 { return doCompare(0, length(), srcChars, 0, srcLength); } 3757 3758 inline int8_t 3759 UnicodeString::compare(int32_t start, 3760 int32_t _length, 3761 const UnicodeString& srcText, 3762 int32_t srcStart, 3763 int32_t srcLength) const 3764 { return doCompare(start, _length, srcText, srcStart, srcLength); } 3765 3766 inline int8_t 3767 UnicodeString::compare(int32_t start, 3768 int32_t _length, 3769 const UChar *srcChars) const 3770 { return doCompare(start, _length, srcChars, 0, _length); } 3771 3772 inline int8_t 3773 UnicodeString::compare(int32_t start, 3774 int32_t _length, 3775 const UChar *srcChars, 3776 int32_t srcStart, 3777 int32_t srcLength) const 3778 { return doCompare(start, _length, srcChars, srcStart, srcLength); } 3779 3780 inline int8_t 3781 UnicodeString::compareBetween(int32_t start, 3782 int32_t limit, 3783 const UnicodeString& srcText, 3784 int32_t srcStart, 3785 int32_t srcLimit) const 3786 { return doCompare(start, limit - start, 3787 srcText, srcStart, srcLimit - srcStart); } 3788 3789 inline int8_t 3790 UnicodeString::doCompareCodePointOrder(int32_t start, 3791 int32_t thisLength, 3792 const UnicodeString& srcText, 3793 int32_t srcStart, 3794 int32_t srcLength) const 3795 { 3796 if(srcText.isBogus()) { 3797 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 3798 } else { 3799 srcText.pinIndices(srcStart, srcLength); 3800 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 3801 } 3802 } 3803 3804 inline int8_t 3805 UnicodeString::compareCodePointOrder(const UnicodeString& text) const 3806 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); } 3807 3808 inline int8_t 3809 UnicodeString::compareCodePointOrder(int32_t start, 3810 int32_t _length, 3811 const UnicodeString& srcText) const 3812 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } 3813 3814 inline int8_t 3815 UnicodeString::compareCodePointOrder(const UChar *srcChars, 3816 int32_t srcLength) const 3817 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } 3818 3819 inline int8_t 3820 UnicodeString::compareCodePointOrder(int32_t start, 3821 int32_t _length, 3822 const UnicodeString& srcText, 3823 int32_t srcStart, 3824 int32_t srcLength) const 3825 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } 3826 3827 inline int8_t 3828 UnicodeString::compareCodePointOrder(int32_t start, 3829 int32_t _length, 3830 const UChar *srcChars) const 3831 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } 3832 3833 inline int8_t 3834 UnicodeString::compareCodePointOrder(int32_t start, 3835 int32_t _length, 3836 const UChar *srcChars, 3837 int32_t srcStart, 3838 int32_t srcLength) const 3839 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } 3840 3841 inline int8_t 3842 UnicodeString::compareCodePointOrderBetween(int32_t start, 3843 int32_t limit, 3844 const UnicodeString& srcText, 3845 int32_t srcStart, 3846 int32_t srcLimit) const 3847 { return doCompareCodePointOrder(start, limit - start, 3848 srcText, srcStart, srcLimit - srcStart); } 3849 3850 inline int8_t 3851 UnicodeString::doCaseCompare(int32_t start, 3852 int32_t thisLength, 3853 const UnicodeString &srcText, 3854 int32_t srcStart, 3855 int32_t srcLength, 3856 uint32_t options) const 3857 { 3858 if(srcText.isBogus()) { 3859 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 3860 } else { 3861 srcText.pinIndices(srcStart, srcLength); 3862 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); 3863 } 3864 } 3865 3866 inline int8_t 3867 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { 3868 return doCaseCompare(0, length(), text, 0, text.length(), options); 3869 } 3870 3871 inline int8_t 3872 UnicodeString::caseCompare(int32_t start, 3873 int32_t _length, 3874 const UnicodeString &srcText, 3875 uint32_t options) const { 3876 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); 3877 } 3878 3879 inline int8_t 3880 UnicodeString::caseCompare(const UChar *srcChars, 3881 int32_t srcLength, 3882 uint32_t options) const { 3883 return doCaseCompare(0, length(), srcChars, 0, srcLength, options); 3884 } 3885 3886 inline int8_t 3887 UnicodeString::caseCompare(int32_t start, 3888 int32_t _length, 3889 const UnicodeString &srcText, 3890 int32_t srcStart, 3891 int32_t srcLength, 3892 uint32_t options) const { 3893 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); 3894 } 3895 3896 inline int8_t 3897 UnicodeString::caseCompare(int32_t start, 3898 int32_t _length, 3899 const UChar *srcChars, 3900 uint32_t options) const { 3901 return doCaseCompare(start, _length, srcChars, 0, _length, options); 3902 } 3903 3904 inline int8_t 3905 UnicodeString::caseCompare(int32_t start, 3906 int32_t _length, 3907 const UChar *srcChars, 3908 int32_t srcStart, 3909 int32_t srcLength, 3910 uint32_t options) const { 3911 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); 3912 } 3913 3914 inline int8_t 3915 UnicodeString::caseCompareBetween(int32_t start, 3916 int32_t limit, 3917 const UnicodeString &srcText, 3918 int32_t srcStart, 3919 int32_t srcLimit, 3920 uint32_t options) const { 3921 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); 3922 } 3923 3924 inline int32_t 3925 UnicodeString::indexOf(const UnicodeString& srcText, 3926 int32_t srcStart, 3927 int32_t srcLength, 3928 int32_t start, 3929 int32_t _length) const 3930 { 3931 if(!srcText.isBogus()) { 3932 srcText.pinIndices(srcStart, srcLength); 3933 if(srcLength > 0) { 3934 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 3935 } 3936 } 3937 return -1; 3938 } 3939 3940 inline int32_t 3941 UnicodeString::indexOf(const UnicodeString& text) const 3942 { return indexOf(text, 0, text.length(), 0, length()); } 3943 3944 inline int32_t 3945 UnicodeString::indexOf(const UnicodeString& text, 3946 int32_t start) const { 3947 pinIndex(start); 3948 return indexOf(text, 0, text.length(), start, length() - start); 3949 } 3950 3951 inline int32_t 3952 UnicodeString::indexOf(const UnicodeString& text, 3953 int32_t start, 3954 int32_t _length) const 3955 { return indexOf(text, 0, text.length(), start, _length); } 3956 3957 inline int32_t 3958 UnicodeString::indexOf(const UChar *srcChars, 3959 int32_t srcLength, 3960 int32_t start) const { 3961 pinIndex(start); 3962 return indexOf(srcChars, 0, srcLength, start, length() - start); 3963 } 3964 3965 inline int32_t 3966 UnicodeString::indexOf(const UChar *srcChars, 3967 int32_t srcLength, 3968 int32_t start, 3969 int32_t _length) const 3970 { return indexOf(srcChars, 0, srcLength, start, _length); } 3971 3972 inline int32_t 3973 UnicodeString::indexOf(UChar c, 3974 int32_t start, 3975 int32_t _length) const 3976 { return doIndexOf(c, start, _length); } 3977 3978 inline int32_t 3979 UnicodeString::indexOf(UChar32 c, 3980 int32_t start, 3981 int32_t _length) const 3982 { return doIndexOf(c, start, _length); } 3983 3984 inline int32_t 3985 UnicodeString::indexOf(UChar c) const 3986 { return doIndexOf(c, 0, length()); } 3987 3988 inline int32_t 3989 UnicodeString::indexOf(UChar32 c) const 3990 { return indexOf(c, 0, length()); } 3991 3992 inline int32_t 3993 UnicodeString::indexOf(UChar c, 3994 int32_t start) const { 3995 pinIndex(start); 3996 return doIndexOf(c, start, length() - start); 3997 } 3998 3999 inline int32_t 4000 UnicodeString::indexOf(UChar32 c, 4001 int32_t start) const { 4002 pinIndex(start); 4003 return indexOf(c, start, length() - start); 4004 } 4005 4006 inline int32_t 4007 UnicodeString::lastIndexOf(const UChar *srcChars, 4008 int32_t srcLength, 4009 int32_t start, 4010 int32_t _length) const 4011 { return lastIndexOf(srcChars, 0, srcLength, start, _length); } 4012 4013 inline int32_t 4014 UnicodeString::lastIndexOf(const UChar *srcChars, 4015 int32_t srcLength, 4016 int32_t start) const { 4017 pinIndex(start); 4018 return lastIndexOf(srcChars, 0, srcLength, start, length() - start); 4019 } 4020 4021 inline int32_t 4022 UnicodeString::lastIndexOf(const UnicodeString& srcText, 4023 int32_t srcStart, 4024 int32_t srcLength, 4025 int32_t start, 4026 int32_t _length) const 4027 { 4028 if(!srcText.isBogus()) { 4029 srcText.pinIndices(srcStart, srcLength); 4030 if(srcLength > 0) { 4031 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 4032 } 4033 } 4034 return -1; 4035 } 4036 4037 inline int32_t 4038 UnicodeString::lastIndexOf(const UnicodeString& text, 4039 int32_t start, 4040 int32_t _length) const 4041 { return lastIndexOf(text, 0, text.length(), start, _length); } 4042 4043 inline int32_t 4044 UnicodeString::lastIndexOf(const UnicodeString& text, 4045 int32_t start) const { 4046 pinIndex(start); 4047 return lastIndexOf(text, 0, text.length(), start, length() - start); 4048 } 4049 4050 inline int32_t 4051 UnicodeString::lastIndexOf(const UnicodeString& text) const 4052 { return lastIndexOf(text, 0, text.length(), 0, length()); } 4053 4054 inline int32_t 4055 UnicodeString::lastIndexOf(UChar c, 4056 int32_t start, 4057 int32_t _length) const 4058 { return doLastIndexOf(c, start, _length); } 4059 4060 inline int32_t 4061 UnicodeString::lastIndexOf(UChar32 c, 4062 int32_t start, 4063 int32_t _length) const { 4064 return doLastIndexOf(c, start, _length); 4065 } 4066 4067 inline int32_t 4068 UnicodeString::lastIndexOf(UChar c) const 4069 { return doLastIndexOf(c, 0, length()); } 4070 4071 inline int32_t 4072 UnicodeString::lastIndexOf(UChar32 c) const { 4073 return lastIndexOf(c, 0, length()); 4074 } 4075 4076 inline int32_t 4077 UnicodeString::lastIndexOf(UChar c, 4078 int32_t start) const { 4079 pinIndex(start); 4080 return doLastIndexOf(c, start, length() - start); 4081 } 4082 4083 inline int32_t 4084 UnicodeString::lastIndexOf(UChar32 c, 4085 int32_t start) const { 4086 pinIndex(start); 4087 return lastIndexOf(c, start, length() - start); 4088 } 4089 4090 inline UBool 4091 UnicodeString::startsWith(const UnicodeString& text) const 4092 { return compare(0, text.length(), text, 0, text.length()) == 0; } 4093 4094 inline UBool 4095 UnicodeString::startsWith(const UnicodeString& srcText, 4096 int32_t srcStart, 4097 int32_t srcLength) const 4098 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } 4099 4100 inline UBool 4101 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const { 4102 if(srcLength < 0) { 4103 srcLength = u_strlen(srcChars); 4104 } 4105 return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; 4106 } 4107 4108 inline UBool 4109 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const { 4110 if(srcLength < 0) { 4111 srcLength = u_strlen(srcChars); 4112 } 4113 return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; 4114 } 4115 4116 inline UBool 4117 UnicodeString::endsWith(const UnicodeString& text) const 4118 { return doCompare(length() - text.length(), text.length(), 4119 text, 0, text.length()) == 0; } 4120 4121 inline UBool 4122 UnicodeString::endsWith(const UnicodeString& srcText, 4123 int32_t srcStart, 4124 int32_t srcLength) const { 4125 srcText.pinIndices(srcStart, srcLength); 4126 return doCompare(length() - srcLength, srcLength, 4127 srcText, srcStart, srcLength) == 0; 4128 } 4129 4130 inline UBool 4131 UnicodeString::endsWith(const UChar *srcChars, 4132 int32_t srcLength) const { 4133 if(srcLength < 0) { 4134 srcLength = u_strlen(srcChars); 4135 } 4136 return doCompare(length() - srcLength, srcLength, 4137 srcChars, 0, srcLength) == 0; 4138 } 4139 4140 inline UBool 4141 UnicodeString::endsWith(const UChar *srcChars, 4142 int32_t srcStart, 4143 int32_t srcLength) const { 4144 if(srcLength < 0) { 4145 srcLength = u_strlen(srcChars + srcStart); 4146 } 4147 return doCompare(length() - srcLength, srcLength, 4148 srcChars, srcStart, srcLength) == 0; 4149 } 4150 4151 //======================================== 4152 // replace 4153 //======================================== 4154 inline UnicodeString& 4155 UnicodeString::replace(int32_t start, 4156 int32_t _length, 4157 const UnicodeString& srcText) 4158 { return doReplace(start, _length, srcText, 0, srcText.length()); } 4159 4160 inline UnicodeString& 4161 UnicodeString::replace(int32_t start, 4162 int32_t _length, 4163 const UnicodeString& srcText, 4164 int32_t srcStart, 4165 int32_t srcLength) 4166 { return doReplace(start, _length, srcText, srcStart, srcLength); } 4167 4168 inline UnicodeString& 4169 UnicodeString::replace(int32_t start, 4170 int32_t _length, 4171 const UChar *srcChars, 4172 int32_t srcLength) 4173 { return doReplace(start, _length, srcChars, 0, srcLength); } 4174 4175 inline UnicodeString& 4176 UnicodeString::replace(int32_t start, 4177 int32_t _length, 4178 const UChar *srcChars, 4179 int32_t srcStart, 4180 int32_t srcLength) 4181 { return doReplace(start, _length, srcChars, srcStart, srcLength); } 4182 4183 inline UnicodeString& 4184 UnicodeString::replace(int32_t start, 4185 int32_t _length, 4186 UChar srcChar) 4187 { return doReplace(start, _length, &srcChar, 0, 1); } 4188 4189 inline UnicodeString& 4190 UnicodeString::replaceBetween(int32_t start, 4191 int32_t limit, 4192 const UnicodeString& srcText) 4193 { return doReplace(start, limit - start, srcText, 0, srcText.length()); } 4194 4195 inline UnicodeString& 4196 UnicodeString::replaceBetween(int32_t start, 4197 int32_t limit, 4198 const UnicodeString& srcText, 4199 int32_t srcStart, 4200 int32_t srcLimit) 4201 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } 4202 4203 inline UnicodeString& 4204 UnicodeString::findAndReplace(const UnicodeString& oldText, 4205 const UnicodeString& newText) 4206 { return findAndReplace(0, length(), oldText, 0, oldText.length(), 4207 newText, 0, newText.length()); } 4208 4209 inline UnicodeString& 4210 UnicodeString::findAndReplace(int32_t start, 4211 int32_t _length, 4212 const UnicodeString& oldText, 4213 const UnicodeString& newText) 4214 { return findAndReplace(start, _length, oldText, 0, oldText.length(), 4215 newText, 0, newText.length()); } 4216 4217 // ============================ 4218 // extract 4219 // ============================ 4220 inline void 4221 UnicodeString::doExtract(int32_t start, 4222 int32_t _length, 4223 UnicodeString& target) const 4224 { target.replace(0, target.length(), *this, start, _length); } 4225 4226 inline void 4227 UnicodeString::extract(int32_t start, 4228 int32_t _length, 4229 UChar *target, 4230 int32_t targetStart) const 4231 { doExtract(start, _length, target, targetStart); } 4232 4233 inline void 4234 UnicodeString::extract(int32_t start, 4235 int32_t _length, 4236 UnicodeString& target) const 4237 { doExtract(start, _length, target); } 4238 4239 #if !UCONFIG_NO_CONVERSION 4240 4241 inline int32_t 4242 UnicodeString::extract(int32_t start, 4243 int32_t _length, 4244 char *dst, 4245 const char *codepage) const 4246 4247 { 4248 // This dstSize value will be checked explicitly 4249 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); 4250 } 4251 4252 #endif 4253 4254 inline void 4255 UnicodeString::extractBetween(int32_t start, 4256 int32_t limit, 4257 UChar *dst, 4258 int32_t dstStart) const { 4259 pinIndex(start); 4260 pinIndex(limit); 4261 doExtract(start, limit - start, dst, dstStart); 4262 } 4263 4264 inline UnicodeString 4265 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { 4266 return tempSubString(start, limit - start); 4267 } 4268 4269 inline UChar 4270 UnicodeString::doCharAt(int32_t offset) const 4271 { 4272 if((uint32_t)offset < (uint32_t)length()) { 4273 return getArrayStart()[offset]; 4274 } else { 4275 return kInvalidUChar; 4276 } 4277 } 4278 4279 inline UChar 4280 UnicodeString::charAt(int32_t offset) const 4281 { return doCharAt(offset); } 4282 4283 inline UChar 4284 UnicodeString::operator[] (int32_t offset) const 4285 { return doCharAt(offset); } 4286 4287 inline UBool 4288 UnicodeString::isEmpty() const { 4289 // Arithmetic or logical right shift does not matter: only testing for 0. 4290 return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0; 4291 } 4292 4293 //======================================== 4294 // Write implementation methods 4295 //======================================== 4296 inline void 4297 UnicodeString::setZeroLength() { 4298 fUnion.fFields.fLengthAndFlags &= kAllStorageFlags; 4299 } 4300 4301 inline void 4302 UnicodeString::setShortLength(int32_t len) { 4303 // requires 0 <= len <= kMaxShortLength 4304 fUnion.fFields.fLengthAndFlags = 4305 (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift)); 4306 } 4307 4308 inline void 4309 UnicodeString::setLength(int32_t len) { 4310 if(len <= kMaxShortLength) { 4311 setShortLength(len); 4312 } else { 4313 fUnion.fFields.fLengthAndFlags |= kLengthIsLarge; 4314 fUnion.fFields.fLength = len; 4315 } 4316 } 4317 4318 inline void 4319 UnicodeString::setToEmpty() { 4320 fUnion.fFields.fLengthAndFlags = kShortString; 4321 } 4322 4323 inline void 4324 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { 4325 setLength(len); 4326 fUnion.fFields.fArray = array; 4327 fUnion.fFields.fCapacity = capacity; 4328 } 4329 4330 inline UnicodeString& 4331 UnicodeString::operator= (UChar ch) 4332 { return doReplace(0, length(), &ch, 0, 1); } 4333 4334 inline UnicodeString& 4335 UnicodeString::operator= (UChar32 ch) 4336 { return replace(0, length(), ch); } 4337 4338 inline UnicodeString& 4339 UnicodeString::setTo(const UnicodeString& srcText, 4340 int32_t srcStart, 4341 int32_t srcLength) 4342 { 4343 unBogus(); 4344 return doReplace(0, length(), srcText, srcStart, srcLength); 4345 } 4346 4347 inline UnicodeString& 4348 UnicodeString::setTo(const UnicodeString& srcText, 4349 int32_t srcStart) 4350 { 4351 unBogus(); 4352 srcText.pinIndex(srcStart); 4353 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); 4354 } 4355 4356 inline UnicodeString& 4357 UnicodeString::setTo(const UnicodeString& srcText) 4358 { 4359 return copyFrom(srcText); 4360 } 4361 4362 inline UnicodeString& 4363 UnicodeString::setTo(const UChar *srcChars, 4364 int32_t srcLength) 4365 { 4366 unBogus(); 4367 return doReplace(0, length(), srcChars, 0, srcLength); 4368 } 4369 4370 inline UnicodeString& 4371 UnicodeString::setTo(UChar srcChar) 4372 { 4373 unBogus(); 4374 return doReplace(0, length(), &srcChar, 0, 1); 4375 } 4376 4377 inline UnicodeString& 4378 UnicodeString::setTo(UChar32 srcChar) 4379 { 4380 unBogus(); 4381 return replace(0, length(), srcChar); 4382 } 4383 4384 inline UnicodeString& 4385 UnicodeString::append(const UnicodeString& srcText, 4386 int32_t srcStart, 4387 int32_t srcLength) 4388 { return doReplace(length(), 0, srcText, srcStart, srcLength); } 4389 4390 inline UnicodeString& 4391 UnicodeString::append(const UnicodeString& srcText) 4392 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 4393 4394 inline UnicodeString& 4395 UnicodeString::append(const UChar *srcChars, 4396 int32_t srcStart, 4397 int32_t srcLength) 4398 { return doReplace(length(), 0, srcChars, srcStart, srcLength); } 4399 4400 inline UnicodeString& 4401 UnicodeString::append(const UChar *srcChars, 4402 int32_t srcLength) 4403 { return doReplace(length(), 0, srcChars, 0, srcLength); } 4404 4405 inline UnicodeString& 4406 UnicodeString::append(UChar srcChar) 4407 { return doReplace(length(), 0, &srcChar, 0, 1); } 4408 4409 inline UnicodeString& 4410 UnicodeString::operator+= (UChar ch) 4411 { return doReplace(length(), 0, &ch, 0, 1); } 4412 4413 inline UnicodeString& 4414 UnicodeString::operator+= (UChar32 ch) { 4415 return append(ch); 4416 } 4417 4418 inline UnicodeString& 4419 UnicodeString::operator+= (const UnicodeString& srcText) 4420 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 4421 4422 inline UnicodeString& 4423 UnicodeString::insert(int32_t start, 4424 const UnicodeString& srcText, 4425 int32_t srcStart, 4426 int32_t srcLength) 4427 { return doReplace(start, 0, srcText, srcStart, srcLength); } 4428 4429 inline UnicodeString& 4430 UnicodeString::insert(int32_t start, 4431 const UnicodeString& srcText) 4432 { return doReplace(start, 0, srcText, 0, srcText.length()); } 4433 4434 inline UnicodeString& 4435 UnicodeString::insert(int32_t start, 4436 const UChar *srcChars, 4437 int32_t srcStart, 4438 int32_t srcLength) 4439 { return doReplace(start, 0, srcChars, srcStart, srcLength); } 4440 4441 inline UnicodeString& 4442 UnicodeString::insert(int32_t start, 4443 const UChar *srcChars, 4444 int32_t srcLength) 4445 { return doReplace(start, 0, srcChars, 0, srcLength); } 4446 4447 inline UnicodeString& 4448 UnicodeString::insert(int32_t start, 4449 UChar srcChar) 4450 { return doReplace(start, 0, &srcChar, 0, 1); } 4451 4452 inline UnicodeString& 4453 UnicodeString::insert(int32_t start, 4454 UChar32 srcChar) 4455 { return replace(start, 0, srcChar); } 4456 4457 4458 inline UnicodeString& 4459 UnicodeString::remove() 4460 { 4461 // remove() of a bogus string makes the string empty and non-bogus 4462 if(isBogus()) { 4463 setToEmpty(); 4464 } else { 4465 setZeroLength(); 4466 } 4467 return *this; 4468 } 4469 4470 inline UnicodeString& 4471 UnicodeString::remove(int32_t start, 4472 int32_t _length) 4473 { 4474 if(start <= 0 && _length == INT32_MAX) { 4475 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 4476 return remove(); 4477 } 4478 return doReplace(start, _length, NULL, 0, 0); 4479 } 4480 4481 inline UnicodeString& 4482 UnicodeString::removeBetween(int32_t start, 4483 int32_t limit) 4484 { return doReplace(start, limit - start, NULL, 0, 0); } 4485 4486 inline UnicodeString & 4487 UnicodeString::retainBetween(int32_t start, int32_t limit) { 4488 truncate(limit); 4489 return doReplace(0, start, NULL, 0, 0); 4490 } 4491 4492 inline UBool 4493 UnicodeString::truncate(int32_t targetLength) 4494 { 4495 if(isBogus() && targetLength == 0) { 4496 // truncate(0) of a bogus string makes the string empty and non-bogus 4497 unBogus(); 4498 return FALSE; 4499 } else if((uint32_t)targetLength < (uint32_t)length()) { 4500 setLength(targetLength); 4501 return TRUE; 4502 } else { 4503 return FALSE; 4504 } 4505 } 4506 4507 inline UnicodeString& 4508 UnicodeString::reverse() 4509 { return doReverse(0, length()); } 4510 4511 inline UnicodeString& 4512 UnicodeString::reverse(int32_t start, 4513 int32_t _length) 4514 { return doReverse(start, _length); } 4515 4516 U_NAMESPACE_END 4517 4518 #endif 4519