1 /* 2 ********************************************************************** 3 * Copyright (C) 1998-2015, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * 7 * File unistr.h 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 09/25/98 stephen Creation. 13 * 11/11/98 stephen Changed per 11/9 code review. 14 * 04/20/99 stephen Overhauled per 4/16 code review. 15 * 11/18/99 aliu Made to inherit from Replaceable. Added method 16 * handleReplaceBetween(); other methods unchanged. 17 * 06/25/01 grhoten Remove dependency on iostream. 18 ****************************************************************************** 19 */ 20 21 #ifndef UNISTR_H 22 #define UNISTR_H 23 24 /** 25 * \file 26 * \brief C++ API: Unicode String 27 */ 28 29 #include "unicode/utypes.h" 30 #include "unicode/rep.h" 31 #include "unicode/std_string.h" 32 #include "unicode/stringpiece.h" 33 #include "unicode/bytestream.h" 34 #include "unicode/ucasemap.h" 35 36 struct UConverter; // unicode/ucnv.h 37 38 #ifndef U_COMPARE_CODE_POINT_ORDER 39 /* see also ustring.h and unorm.h */ 40 /** 41 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: 42 * Compare strings in code point order instead of code unit order. 43 * @stable ICU 2.2 44 */ 45 #define U_COMPARE_CODE_POINT_ORDER 0x8000 46 #endif 47 48 #ifndef USTRING_H 49 /** 50 * \ingroup ustring_ustrlen 51 */ 52 U_STABLE int32_t U_EXPORT2 53 u_strlen(const UChar *s); 54 #endif 55 56 /** 57 * \def U_STRING_CASE_MAPPER_DEFINED 58 * @internal 59 */ 60 #ifndef U_STRING_CASE_MAPPER_DEFINED 61 #define U_STRING_CASE_MAPPER_DEFINED 62 63 /** 64 * Internal string case mapping function type. 65 * @internal 66 */ 67 typedef int32_t U_CALLCONV 68 UStringCaseMapper(const UCaseMap *csm, 69 UChar *dest, int32_t destCapacity, 70 const UChar *src, int32_t srcLength, 71 UErrorCode *pErrorCode); 72 73 #endif 74 75 U_NAMESPACE_BEGIN 76 77 class BreakIterator; // unicode/brkiter.h 78 class Locale; // unicode/locid.h 79 class StringCharacterIterator; 80 class UnicodeStringAppendable; // unicode/appendable.h 81 82 /* The <iostream> include has been moved to unicode/ustream.h */ 83 84 /** 85 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 86 * which constructs a Unicode string from an invariant-character char * string. 87 * About invariant characters see utypes.h. 88 * This constructor has no runtime dependency on conversion code and is 89 * therefore recommended over ones taking a charset name string 90 * (where the empty string "" indicates invariant-character conversion). 91 * 92 * @stable ICU 3.2 93 */ 94 #define US_INV icu::UnicodeString::kInvariant 95 96 /** 97 * Unicode String literals in C++. 98 * Dependent on the platform properties, different UnicodeString 99 * constructors should be used to create a UnicodeString object from 100 * a string literal. 101 * The macros are defined for maximum performance. 102 * They work only for strings that contain "invariant characters", i.e., 103 * only latin letters, digits, and some punctuation. 104 * See utypes.h for details. 105 * 106 * The string parameter must be a C string literal. 107 * The length of the string, not including the terminating 108 * <code>NUL</code>, must be specified as a constant. 109 * The U_STRING_DECL macro should be invoked exactly once for one 110 * such string variable before it is used. 111 * @stable ICU 2.0 112 */ 113 #if defined(U_DECLARE_UTF16) 114 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) 115 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) 116 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length) 117 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY 118 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length) 119 #else 120 # define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV) 121 #endif 122 123 /** 124 * Unicode String literals in C++. 125 * Dependent on the platform properties, different UnicodeString 126 * constructors should be used to create a UnicodeString object from 127 * a string literal. 128 * The macros are defined for improved performance. 129 * They work only for strings that contain "invariant characters", i.e., 130 * only latin letters, digits, and some punctuation. 131 * See utypes.h for details. 132 * 133 * The string parameter must be a C string literal. 134 * @stable ICU 2.0 135 */ 136 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 137 138 /** 139 * \def UNISTR_FROM_CHAR_EXPLICIT 140 * This can be defined to be empty or "explicit". 141 * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32) 142 * constructors are marked as explicit, preventing their inadvertent use. 143 * @stable ICU 49 144 */ 145 #ifndef UNISTR_FROM_CHAR_EXPLICIT 146 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 147 // Auto-"explicit" in ICU library code. 148 # define UNISTR_FROM_CHAR_EXPLICIT explicit 149 # else 150 // Empty by default for source code compatibility. 151 # define UNISTR_FROM_CHAR_EXPLICIT 152 # endif 153 #endif 154 155 /** 156 * \def UNISTR_FROM_STRING_EXPLICIT 157 * This can be defined to be empty or "explicit". 158 * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *) 159 * constructors are marked as explicit, preventing their inadvertent use. 160 * 161 * In particular, this helps prevent accidentally depending on ICU conversion code 162 * by passing a string literal into an API with a const UnicodeString & parameter. 163 * @stable ICU 49 164 */ 165 #ifndef UNISTR_FROM_STRING_EXPLICIT 166 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 167 // Auto-"explicit" in ICU library code. 168 # define UNISTR_FROM_STRING_EXPLICIT explicit 169 # else 170 // Empty by default for source code compatibility. 171 # define UNISTR_FROM_STRING_EXPLICIT 172 # endif 173 #endif 174 175 /* Cannot make the following #ifndef U_HIDE_INTERNAL_API, 176 it is used to construct other non-internal constants */ 177 /** 178 * \def UNISTR_OBJECT_SIZE 179 * Desired sizeof(UnicodeString) in bytes. 180 * It should be a multiple of sizeof(pointer) to avoid unusable space for padding. 181 * The object size may want to be a multiple of 16 bytes, 182 * which is a common granularity for heap allocation. 183 * 184 * Any space inside the object beyond sizeof(vtable pointer) + 2 185 * is available for storing short strings inside the object. 186 * The bigger the object, the longer a string that can be stored inside the object, 187 * without additional heap allocation. 188 * 189 * Depending on a platform's pointer size, pointer alignment requirements, 190 * and struct padding, the compiler will usually round up sizeof(UnicodeString) 191 * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models), 192 * to hold the fields for heap-allocated strings. 193 * Such a minimum size also ensures that the object is easily large enough 194 * to hold at least 2 UChars, for one supplementary code point (U16_MAX_LENGTH). 195 * 196 * sizeof(UnicodeString) >= 48 should work for all known platforms. 197 * 198 * For example, on a 64-bit machine where sizeof(vtable pointer) is 8, 199 * sizeof(UnicodeString) = 64 would leave space for 200 * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27 201 * UChars stored inside the object. 202 * 203 * The minimum object size on a 64-bit machine would be 204 * 4 * sizeof(pointer) = 4 * 8 = 32 bytes, 205 * and the internal buffer would hold up to 11 UChars in that case. 206 * 207 * @see U16_MAX_LENGTH 208 * @draft ICU 56 209 */ 210 #ifndef UNISTR_OBJECT_SIZE 211 # define UNISTR_OBJECT_SIZE 64 212 #endif 213 214 /** 215 * UnicodeString is a string class that stores Unicode characters directly and provides 216 * similar functionality as the Java String and StringBuffer/StringBuilder classes. 217 * It is a concrete implementation of the abstract class Replaceable (for transliteration). 218 * 219 * A UnicodeString may also "alias" an external array of characters 220 * (that is, point to it, rather than own the array) 221 * whose lifetime must then at least match the lifetime of the aliasing object. 222 * This aliasing may be preserved when returning a UnicodeString by value, 223 * depending on the compiler and the function implementation, 224 * via Return Value Optimization (RVO) or the move assignment operator. 225 * (However, the copy assignment operator does not preserve aliasing.) 226 * For details see the description of storage models at the end of the class API docs 227 * and in the User Guide chapter linked from there. 228 * 229 * The UnicodeString class is not suitable for subclassing. 230 * 231 * <p>For an overview of Unicode strings in C and C++ see the 232 * <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p> 233 * 234 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>. 235 * A Unicode character may be stored with either one code unit 236 * (the most common case) or with a matched pair of special code units 237 * ("surrogates"). The data type for code units is UChar. 238 * For single-character handling, a Unicode character code <em>point</em> is a value 239 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p> 240 * 241 * <p>Indexes and offsets into and lengths of strings always count code units, not code points. 242 * This is the same as with multi-byte char* strings in traditional string handling. 243 * Operations on partial strings typically do not test for code point boundaries. 244 * If necessary, the user needs to take care of such boundaries by testing for the code unit 245 * values or by using functions like 246 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() 247 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p> 248 * 249 * UnicodeString methods are more lenient with regard to input parameter values 250 * than other ICU APIs. In particular: 251 * - If indexes are out of bounds for a UnicodeString object 252 * (<0 or >length()) then they are "pinned" to the nearest boundary. 253 * - If primitive string pointer values (e.g., const UChar * or char *) 254 * for input strings are NULL, then those input string parameters are treated 255 * as if they pointed to an empty string. 256 * However, this is <em>not</em> the case for char * parameters for charset names 257 * or other IDs. 258 * - Most UnicodeString methods do not take a UErrorCode parameter because 259 * there are usually very few opportunities for failure other than a shortage 260 * of memory, error codes in low-level C++ string methods would be inconvenient, 261 * and the error code as the last parameter (ICU convention) would prevent 262 * the use of default parameter values. 263 * Instead, such methods set the UnicodeString into a "bogus" state 264 * (see isBogus()) if an error occurs. 265 * 266 * In string comparisons, two UnicodeString objects that are both "bogus" 267 * compare equal (to be transitive and prevent endless loops in sorting), 268 * and a "bogus" string compares less than any non-"bogus" one. 269 * 270 * Const UnicodeString methods are thread-safe. Multiple threads can use 271 * const methods on the same UnicodeString object simultaneously, 272 * but non-const methods must not be called concurrently (in multiple threads) 273 * with any other (const or non-const) methods. 274 * 275 * Similarly, const UnicodeString & parameters are thread-safe. 276 * One object may be passed in as such a parameter concurrently in multiple threads. 277 * This includes the const UnicodeString & parameters for 278 * copy construction, assignment, and cloning. 279 * 280 * <p>UnicodeString uses several storage methods. 281 * String contents can be stored inside the UnicodeString object itself, 282 * in an allocated and shared buffer, or in an outside buffer that is "aliased". 283 * Most of this is done transparently, but careful aliasing in particular provides 284 * significant performance improvements. 285 * Also, the internal buffer is accessible via special functions. 286 * For details see the 287 * <a href="http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model">User Guide Strings chapter</a>.</p> 288 * 289 * @see utf.h 290 * @see CharacterIterator 291 * @stable ICU 2.0 292 */ 293 class U_COMMON_API UnicodeString : public Replaceable 294 { 295 public: 296 297 /** 298 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 299 * which constructs a Unicode string from an invariant-character char * string. 300 * Use the macro US_INV instead of the full qualification for this value. 301 * 302 * @see US_INV 303 * @stable ICU 3.2 304 */ 305 enum EInvariant { 306 /** 307 * @see EInvariant 308 * @stable ICU 3.2 309 */ 310 kInvariant 311 }; 312 313 //======================================== 314 // Read-only operations 315 //======================================== 316 317 /* Comparison - bitwise only - for international comparison use collation */ 318 319 /** 320 * Equality operator. Performs only bitwise comparison. 321 * @param text The UnicodeString to compare to this one. 322 * @return TRUE if <TT>text</TT> contains the same characters as this one, 323 * FALSE otherwise. 324 * @stable ICU 2.0 325 */ 326 inline UBool operator== (const UnicodeString& text) const; 327 328 /** 329 * Inequality operator. Performs only bitwise comparison. 330 * @param text The UnicodeString to compare to this one. 331 * @return FALSE if <TT>text</TT> contains the same characters as this one, 332 * TRUE otherwise. 333 * @stable ICU 2.0 334 */ 335 inline UBool operator!= (const UnicodeString& text) const; 336 337 /** 338 * Greater than operator. Performs only bitwise comparison. 339 * @param text The UnicodeString to compare to this one. 340 * @return TRUE if the characters in this are bitwise 341 * greater than the characters in <code>text</code>, FALSE otherwise 342 * @stable ICU 2.0 343 */ 344 inline UBool operator> (const UnicodeString& text) const; 345 346 /** 347 * Less than operator. Performs only bitwise comparison. 348 * @param text The UnicodeString to compare to this one. 349 * @return TRUE if the characters in this are bitwise 350 * less than the characters in <code>text</code>, FALSE otherwise 351 * @stable ICU 2.0 352 */ 353 inline UBool operator< (const UnicodeString& text) const; 354 355 /** 356 * Greater than or equal operator. Performs only bitwise comparison. 357 * @param text The UnicodeString to compare to this one. 358 * @return TRUE if the characters in this are bitwise 359 * greater than or equal to the characters in <code>text</code>, FALSE otherwise 360 * @stable ICU 2.0 361 */ 362 inline UBool operator>= (const UnicodeString& text) const; 363 364 /** 365 * Less than or equal operator. Performs only bitwise comparison. 366 * @param text The UnicodeString to compare to this one. 367 * @return TRUE if the characters in this are bitwise 368 * less than or equal to the characters in <code>text</code>, FALSE otherwise 369 * @stable ICU 2.0 370 */ 371 inline UBool operator<= (const UnicodeString& text) const; 372 373 /** 374 * Compare the characters bitwise in this UnicodeString to 375 * the characters in <code>text</code>. 376 * @param text The UnicodeString to compare to this one. 377 * @return The result of bitwise character comparison: 0 if this 378 * contains the same characters as <code>text</code>, -1 if the characters in 379 * this are bitwise less than the characters in <code>text</code>, +1 if the 380 * characters in this are bitwise greater than the characters 381 * in <code>text</code>. 382 * @stable ICU 2.0 383 */ 384 inline int8_t compare(const UnicodeString& text) const; 385 386 /** 387 * Compare the characters bitwise in the range 388 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 389 * in the <b>entire string</b> <TT>text</TT>. 390 * (The parameters "start" and "length" are not applied to the other text "text".) 391 * @param start the offset at which the compare operation begins 392 * @param length the number of characters of text to compare. 393 * @param text the other text to be compared against this string. 394 * @return The result of bitwise character comparison: 0 if this 395 * contains the same characters as <code>text</code>, -1 if the characters in 396 * this are bitwise less than the characters in <code>text</code>, +1 if the 397 * characters in this are bitwise greater than the characters 398 * in <code>text</code>. 399 * @stable ICU 2.0 400 */ 401 inline int8_t compare(int32_t start, 402 int32_t length, 403 const UnicodeString& text) const; 404 405 /** 406 * Compare the characters bitwise in the range 407 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 408 * in <TT>srcText</TT> in the range 409 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 410 * @param start the offset at which the compare operation begins 411 * @param length the number of characters in this to compare. 412 * @param srcText the text to be compared 413 * @param srcStart the offset into <TT>srcText</TT> to start comparison 414 * @param srcLength the number of characters in <TT>src</TT> to compare 415 * @return The result of bitwise character comparison: 0 if this 416 * contains the same characters as <code>srcText</code>, -1 if the characters in 417 * this are bitwise less than the characters in <code>srcText</code>, +1 if the 418 * characters in this are bitwise greater than the characters 419 * in <code>srcText</code>. 420 * @stable ICU 2.0 421 */ 422 inline int8_t compare(int32_t start, 423 int32_t length, 424 const UnicodeString& srcText, 425 int32_t srcStart, 426 int32_t srcLength) const; 427 428 /** 429 * Compare the characters bitwise in this UnicodeString with the first 430 * <TT>srcLength</TT> characters in <TT>srcChars</TT>. 431 * @param srcChars The characters to compare to this UnicodeString. 432 * @param srcLength the number of characters in <TT>srcChars</TT> to compare 433 * @return The result of bitwise character comparison: 0 if this 434 * contains the same characters as <code>srcChars</code>, -1 if the characters in 435 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 436 * characters in this are bitwise greater than the characters 437 * in <code>srcChars</code>. 438 * @stable ICU 2.0 439 */ 440 inline int8_t compare(const UChar *srcChars, 441 int32_t srcLength) const; 442 443 /** 444 * Compare the characters bitwise in the range 445 * [<TT>start</TT>, <TT>start + length</TT>) with the first 446 * <TT>length</TT> characters in <TT>srcChars</TT> 447 * @param start the offset at which the compare operation begins 448 * @param length the number of characters to compare. 449 * @param srcChars the characters to be compared 450 * @return The result of bitwise character comparison: 0 if this 451 * contains the same characters as <code>srcChars</code>, -1 if the characters in 452 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 453 * characters in this are bitwise greater than the characters 454 * in <code>srcChars</code>. 455 * @stable ICU 2.0 456 */ 457 inline int8_t compare(int32_t start, 458 int32_t length, 459 const UChar *srcChars) const; 460 461 /** 462 * Compare the characters bitwise in the range 463 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 464 * in <TT>srcChars</TT> in the range 465 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 466 * @param start the offset at which the compare operation begins 467 * @param length the number of characters in this to compare 468 * @param srcChars the characters to be compared 469 * @param srcStart the offset into <TT>srcChars</TT> to start comparison 470 * @param srcLength the number of characters in <TT>srcChars</TT> to compare 471 * @return The result of bitwise character comparison: 0 if this 472 * contains the same characters as <code>srcChars</code>, -1 if the characters in 473 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 474 * characters in this are bitwise greater than the characters 475 * in <code>srcChars</code>. 476 * @stable ICU 2.0 477 */ 478 inline int8_t compare(int32_t start, 479 int32_t length, 480 const UChar *srcChars, 481 int32_t srcStart, 482 int32_t srcLength) const; 483 484 /** 485 * Compare the characters bitwise in the range 486 * [<TT>start</TT>, <TT>limit</TT>) with the characters 487 * in <TT>srcText</TT> in the range 488 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). 489 * @param start the offset at which the compare operation begins 490 * @param limit the offset immediately following the compare operation 491 * @param srcText the text to be compared 492 * @param srcStart the offset into <TT>srcText</TT> to start comparison 493 * @param srcLimit the offset into <TT>srcText</TT> to limit comparison 494 * @return The result of bitwise character comparison: 0 if this 495 * contains the same characters as <code>srcText</code>, -1 if the characters in 496 * this are bitwise less than the characters in <code>srcText</code>, +1 if the 497 * characters in this are bitwise greater than the characters 498 * in <code>srcText</code>. 499 * @stable ICU 2.0 500 */ 501 inline int8_t compareBetween(int32_t start, 502 int32_t limit, 503 const UnicodeString& srcText, 504 int32_t srcStart, 505 int32_t srcLimit) const; 506 507 /** 508 * Compare two Unicode strings in code point order. 509 * The result may be different from the results of compare(), operator<, etc. 510 * if supplementary characters are present: 511 * 512 * In UTF-16, supplementary characters (with code points U+10000 and above) are 513 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 514 * which means that they compare as less than some other BMP characters like U+feff. 515 * This function compares Unicode strings in code point order. 516 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 517 * 518 * @param text Another string to compare this one to. 519 * @return a negative/zero/positive integer corresponding to whether 520 * this string is less than/equal to/greater than the second one 521 * in code point order 522 * @stable ICU 2.0 523 */ 524 inline int8_t compareCodePointOrder(const UnicodeString& text) const; 525 526 /** 527 * Compare two Unicode strings in code point order. 528 * The result may be different from the results of compare(), operator<, etc. 529 * if supplementary characters are present: 530 * 531 * In UTF-16, supplementary characters (with code points U+10000 and above) are 532 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 533 * which means that they compare as less than some other BMP characters like U+feff. 534 * This function compares Unicode strings in code point order. 535 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 536 * 537 * @param start The start offset in this string at which the compare operation begins. 538 * @param length The number of code units from this string to compare. 539 * @param srcText Another string to compare this one to. 540 * @return a negative/zero/positive integer corresponding to whether 541 * this string is less than/equal to/greater than the second one 542 * in code point order 543 * @stable ICU 2.0 544 */ 545 inline int8_t compareCodePointOrder(int32_t start, 546 int32_t length, 547 const UnicodeString& srcText) const; 548 549 /** 550 * Compare two Unicode strings in code point order. 551 * The result may be different from the results of compare(), operator<, etc. 552 * if supplementary characters are present: 553 * 554 * In UTF-16, supplementary characters (with code points U+10000 and above) are 555 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 556 * which means that they compare as less than some other BMP characters like U+feff. 557 * This function compares Unicode strings in code point order. 558 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 559 * 560 * @param start The start offset in this string at which the compare operation begins. 561 * @param length The number of code units from this string to compare. 562 * @param srcText Another string to compare this one to. 563 * @param srcStart The start offset in that string at which the compare operation begins. 564 * @param srcLength The number of code units from that string to compare. 565 * @return a negative/zero/positive integer corresponding to whether 566 * this string is less than/equal to/greater than the second one 567 * in code point order 568 * @stable ICU 2.0 569 */ 570 inline int8_t compareCodePointOrder(int32_t start, 571 int32_t length, 572 const UnicodeString& srcText, 573 int32_t srcStart, 574 int32_t srcLength) const; 575 576 /** 577 * Compare two Unicode strings in code point order. 578 * The result may be different from the results of compare(), operator<, etc. 579 * if supplementary characters are present: 580 * 581 * In UTF-16, supplementary characters (with code points U+10000 and above) are 582 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 583 * which means that they compare as less than some other BMP characters like U+feff. 584 * This function compares Unicode strings in code point order. 585 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 586 * 587 * @param srcChars A pointer to another string to compare this one to. 588 * @param srcLength The number of code units from that string to compare. 589 * @return a negative/zero/positive integer corresponding to whether 590 * this string is less than/equal to/greater than the second one 591 * in code point order 592 * @stable ICU 2.0 593 */ 594 inline int8_t compareCodePointOrder(const UChar *srcChars, 595 int32_t srcLength) const; 596 597 /** 598 * Compare two Unicode strings in code point order. 599 * The result may be different from the results of compare(), operator<, etc. 600 * if supplementary characters are present: 601 * 602 * In UTF-16, supplementary characters (with code points U+10000 and above) are 603 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 604 * which means that they compare as less than some other BMP characters like U+feff. 605 * This function compares Unicode strings in code point order. 606 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 607 * 608 * @param start The start offset in this string at which the compare operation begins. 609 * @param length The number of code units from this string to compare. 610 * @param srcChars A pointer to another string to compare this one to. 611 * @return a negative/zero/positive integer corresponding to whether 612 * this string is less than/equal to/greater than the second one 613 * in code point order 614 * @stable ICU 2.0 615 */ 616 inline int8_t compareCodePointOrder(int32_t start, 617 int32_t length, 618 const UChar *srcChars) const; 619 620 /** 621 * Compare two Unicode strings in code point order. 622 * The result may be different from the results of compare(), operator<, etc. 623 * if supplementary characters are present: 624 * 625 * In UTF-16, supplementary characters (with code points U+10000 and above) are 626 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 627 * which means that they compare as less than some other BMP characters like U+feff. 628 * This function compares Unicode strings in code point order. 629 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 630 * 631 * @param start The start offset in this string at which the compare operation begins. 632 * @param length The number of code units from this string to compare. 633 * @param srcChars A pointer to another string to compare this one to. 634 * @param srcStart The start offset in that string at which the compare operation begins. 635 * @param srcLength The number of code units from that string to compare. 636 * @return a negative/zero/positive integer corresponding to whether 637 * this string is less than/equal to/greater than the second one 638 * in code point order 639 * @stable ICU 2.0 640 */ 641 inline int8_t compareCodePointOrder(int32_t start, 642 int32_t length, 643 const UChar *srcChars, 644 int32_t srcStart, 645 int32_t srcLength) const; 646 647 /** 648 * Compare two Unicode strings in code point order. 649 * The result may be different from the results of compare(), operator<, etc. 650 * if supplementary characters are present: 651 * 652 * In UTF-16, supplementary characters (with code points U+10000 and above) are 653 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 654 * which means that they compare as less than some other BMP characters like U+feff. 655 * This function compares Unicode strings in code point order. 656 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 657 * 658 * @param start The start offset in this string at which the compare operation begins. 659 * @param limit The offset after the last code unit from this string to compare. 660 * @param srcText Another string to compare this one to. 661 * @param srcStart The start offset in that string at which the compare operation begins. 662 * @param srcLimit The offset after the last code unit from that string to compare. 663 * @return a negative/zero/positive integer corresponding to whether 664 * this string is less than/equal to/greater than the second one 665 * in code point order 666 * @stable ICU 2.0 667 */ 668 inline int8_t compareCodePointOrderBetween(int32_t start, 669 int32_t limit, 670 const UnicodeString& srcText, 671 int32_t srcStart, 672 int32_t srcLimit) const; 673 674 /** 675 * Compare two strings case-insensitively using full case folding. 676 * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). 677 * 678 * @param text Another string to compare this one to. 679 * @param options A bit set of options: 680 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 681 * Comparison in code unit order with default case folding. 682 * 683 * - U_COMPARE_CODE_POINT_ORDER 684 * Set to choose code point order instead of code unit order 685 * (see u_strCompare for details). 686 * 687 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 688 * 689 * @return A negative, zero, or positive integer indicating the comparison result. 690 * @stable ICU 2.0 691 */ 692 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; 693 694 /** 695 * Compare two strings case-insensitively using full case folding. 696 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 697 * 698 * @param start The start offset in this string at which the compare operation begins. 699 * @param length The number of code units from this string to compare. 700 * @param srcText Another string to compare this one to. 701 * @param options A bit set of options: 702 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 703 * Comparison in code unit order with default case folding. 704 * 705 * - U_COMPARE_CODE_POINT_ORDER 706 * Set to choose code point order instead of code unit order 707 * (see u_strCompare for details). 708 * 709 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 710 * 711 * @return A negative, zero, or positive integer indicating the comparison result. 712 * @stable ICU 2.0 713 */ 714 inline int8_t caseCompare(int32_t start, 715 int32_t length, 716 const UnicodeString& srcText, 717 uint32_t options) const; 718 719 /** 720 * Compare two strings case-insensitively using full case folding. 721 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 722 * 723 * @param start The start offset in this string at which the compare operation begins. 724 * @param length The number of code units from this string to compare. 725 * @param srcText Another string to compare this one to. 726 * @param srcStart The start offset in that string at which the compare operation begins. 727 * @param srcLength The number of code units from that string to compare. 728 * @param options A bit set of options: 729 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 730 * Comparison in code unit order with default case folding. 731 * 732 * - U_COMPARE_CODE_POINT_ORDER 733 * Set to choose code point order instead of code unit order 734 * (see u_strCompare for details). 735 * 736 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 737 * 738 * @return A negative, zero, or positive integer indicating the comparison result. 739 * @stable ICU 2.0 740 */ 741 inline int8_t caseCompare(int32_t start, 742 int32_t length, 743 const UnicodeString& srcText, 744 int32_t srcStart, 745 int32_t srcLength, 746 uint32_t options) const; 747 748 /** 749 * Compare two strings case-insensitively using full case folding. 750 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 751 * 752 * @param srcChars A pointer to another string to compare this one to. 753 * @param srcLength The number of code units from that string to compare. 754 * @param options A bit set of options: 755 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 756 * Comparison in code unit order with default case folding. 757 * 758 * - U_COMPARE_CODE_POINT_ORDER 759 * Set to choose code point order instead of code unit order 760 * (see u_strCompare for details). 761 * 762 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 763 * 764 * @return A negative, zero, or positive integer indicating the comparison result. 765 * @stable ICU 2.0 766 */ 767 inline int8_t caseCompare(const UChar *srcChars, 768 int32_t srcLength, 769 uint32_t options) const; 770 771 /** 772 * Compare two strings case-insensitively using full case folding. 773 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 774 * 775 * @param start The start offset in this string at which the compare operation begins. 776 * @param length The number of code units from this string to compare. 777 * @param srcChars A pointer to another string to compare this one to. 778 * @param options A bit set of options: 779 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 780 * Comparison in code unit order with default case folding. 781 * 782 * - U_COMPARE_CODE_POINT_ORDER 783 * Set to choose code point order instead of code unit order 784 * (see u_strCompare for details). 785 * 786 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 787 * 788 * @return A negative, zero, or positive integer indicating the comparison result. 789 * @stable ICU 2.0 790 */ 791 inline int8_t caseCompare(int32_t start, 792 int32_t length, 793 const UChar *srcChars, 794 uint32_t options) const; 795 796 /** 797 * Compare two strings case-insensitively using full case folding. 798 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 799 * 800 * @param start The start offset in this string at which the compare operation begins. 801 * @param length The number of code units from this string to compare. 802 * @param srcChars A pointer to another string to compare this one to. 803 * @param srcStart The start offset in that string at which the compare operation begins. 804 * @param srcLength The number of code units from that string to compare. 805 * @param options A bit set of options: 806 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 807 * Comparison in code unit order with default case folding. 808 * 809 * - U_COMPARE_CODE_POINT_ORDER 810 * Set to choose code point order instead of code unit order 811 * (see u_strCompare for details). 812 * 813 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 814 * 815 * @return A negative, zero, or positive integer indicating the comparison result. 816 * @stable ICU 2.0 817 */ 818 inline int8_t caseCompare(int32_t start, 819 int32_t length, 820 const UChar *srcChars, 821 int32_t srcStart, 822 int32_t srcLength, 823 uint32_t options) const; 824 825 /** 826 * Compare two strings case-insensitively using full case folding. 827 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). 828 * 829 * @param start The start offset in this string at which the compare operation begins. 830 * @param limit The offset after the last code unit from this string to compare. 831 * @param srcText Another string to compare this one to. 832 * @param srcStart The start offset in that string at which the compare operation begins. 833 * @param srcLimit The offset after the last code unit from that string to compare. 834 * @param options A bit set of options: 835 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 836 * Comparison in code unit order with default case folding. 837 * 838 * - U_COMPARE_CODE_POINT_ORDER 839 * Set to choose code point order instead of code unit order 840 * (see u_strCompare for details). 841 * 842 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 843 * 844 * @return A negative, zero, or positive integer indicating the comparison result. 845 * @stable ICU 2.0 846 */ 847 inline int8_t caseCompareBetween(int32_t start, 848 int32_t limit, 849 const UnicodeString& srcText, 850 int32_t srcStart, 851 int32_t srcLimit, 852 uint32_t options) const; 853 854 /** 855 * Determine if this starts with the characters in <TT>text</TT> 856 * @param text The text to match. 857 * @return TRUE if this starts with the characters in <TT>text</TT>, 858 * FALSE otherwise 859 * @stable ICU 2.0 860 */ 861 inline UBool startsWith(const UnicodeString& text) const; 862 863 /** 864 * Determine if this starts with the characters in <TT>srcText</TT> 865 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 866 * @param srcText The text to match. 867 * @param srcStart the offset into <TT>srcText</TT> to start matching 868 * @param srcLength the number of characters in <TT>srcText</TT> to match 869 * @return TRUE if this starts with the characters in <TT>text</TT>, 870 * FALSE otherwise 871 * @stable ICU 2.0 872 */ 873 inline UBool startsWith(const UnicodeString& srcText, 874 int32_t srcStart, 875 int32_t srcLength) const; 876 877 /** 878 * Determine if this starts with the characters in <TT>srcChars</TT> 879 * @param srcChars The characters to match. 880 * @param srcLength the number of characters in <TT>srcChars</TT> 881 * @return TRUE if this starts with the characters in <TT>srcChars</TT>, 882 * FALSE otherwise 883 * @stable ICU 2.0 884 */ 885 inline UBool startsWith(const UChar *srcChars, 886 int32_t srcLength) const; 887 888 /** 889 * Determine if this ends with the characters in <TT>srcChars</TT> 890 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 891 * @param srcChars The characters to match. 892 * @param srcStart the offset into <TT>srcText</TT> to start matching 893 * @param srcLength the number of characters in <TT>srcChars</TT> to match 894 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise 895 * @stable ICU 2.0 896 */ 897 inline UBool startsWith(const UChar *srcChars, 898 int32_t srcStart, 899 int32_t srcLength) const; 900 901 /** 902 * Determine if this ends with the characters in <TT>text</TT> 903 * @param text The text to match. 904 * @return TRUE if this ends with the characters in <TT>text</TT>, 905 * FALSE otherwise 906 * @stable ICU 2.0 907 */ 908 inline UBool endsWith(const UnicodeString& text) const; 909 910 /** 911 * Determine if this ends with the characters in <TT>srcText</TT> 912 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 913 * @param srcText The text to match. 914 * @param srcStart the offset into <TT>srcText</TT> to start matching 915 * @param srcLength the number of characters in <TT>srcText</TT> to match 916 * @return TRUE if this ends with the characters in <TT>text</TT>, 917 * FALSE otherwise 918 * @stable ICU 2.0 919 */ 920 inline UBool endsWith(const UnicodeString& srcText, 921 int32_t srcStart, 922 int32_t srcLength) const; 923 924 /** 925 * Determine if this ends with the characters in <TT>srcChars</TT> 926 * @param srcChars The characters to match. 927 * @param srcLength the number of characters in <TT>srcChars</TT> 928 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 929 * FALSE otherwise 930 * @stable ICU 2.0 931 */ 932 inline UBool endsWith(const UChar *srcChars, 933 int32_t srcLength) const; 934 935 /** 936 * Determine if this ends with the characters in <TT>srcChars</TT> 937 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 938 * @param srcChars The characters to match. 939 * @param srcStart the offset into <TT>srcText</TT> to start matching 940 * @param srcLength the number of characters in <TT>srcChars</TT> to match 941 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 942 * FALSE otherwise 943 * @stable ICU 2.0 944 */ 945 inline UBool endsWith(const UChar *srcChars, 946 int32_t srcStart, 947 int32_t srcLength) const; 948 949 950 /* Searching - bitwise only */ 951 952 /** 953 * Locate in this the first occurrence of the characters in <TT>text</TT>, 954 * using bitwise comparison. 955 * @param text The text to search for. 956 * @return The offset into this of the start of <TT>text</TT>, 957 * or -1 if not found. 958 * @stable ICU 2.0 959 */ 960 inline int32_t indexOf(const UnicodeString& text) const; 961 962 /** 963 * Locate in this the first occurrence of the characters in <TT>text</TT> 964 * starting at offset <TT>start</TT>, using bitwise comparison. 965 * @param text The text to search for. 966 * @param start The offset at which searching will start. 967 * @return The offset into this of the start of <TT>text</TT>, 968 * or -1 if not found. 969 * @stable ICU 2.0 970 */ 971 inline int32_t indexOf(const UnicodeString& text, 972 int32_t start) const; 973 974 /** 975 * Locate in this the first occurrence in the range 976 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 977 * in <TT>text</TT>, using bitwise comparison. 978 * @param text The text to search for. 979 * @param start The offset at which searching will start. 980 * @param length The number of characters to search 981 * @return The offset into this of the start of <TT>text</TT>, 982 * or -1 if not found. 983 * @stable ICU 2.0 984 */ 985 inline int32_t indexOf(const UnicodeString& text, 986 int32_t start, 987 int32_t length) const; 988 989 /** 990 * Locate in this the first occurrence in the range 991 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 992 * in <TT>srcText</TT> in the range 993 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 994 * using bitwise comparison. 995 * @param srcText The text to search for. 996 * @param srcStart the offset into <TT>srcText</TT> at which 997 * to start matching 998 * @param srcLength the number of characters in <TT>srcText</TT> to match 999 * @param start the offset into this at which to start matching 1000 * @param length the number of characters in this to search 1001 * @return The offset into this of the start of <TT>text</TT>, 1002 * or -1 if not found. 1003 * @stable ICU 2.0 1004 */ 1005 inline int32_t indexOf(const UnicodeString& srcText, 1006 int32_t srcStart, 1007 int32_t srcLength, 1008 int32_t start, 1009 int32_t length) const; 1010 1011 /** 1012 * Locate in this the first occurrence of the characters in 1013 * <TT>srcChars</TT> 1014 * starting at offset <TT>start</TT>, using bitwise comparison. 1015 * @param srcChars The text to search for. 1016 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1017 * @param start the offset into this at which to start matching 1018 * @return The offset into this of the start of <TT>text</TT>, 1019 * or -1 if not found. 1020 * @stable ICU 2.0 1021 */ 1022 inline int32_t indexOf(const UChar *srcChars, 1023 int32_t srcLength, 1024 int32_t start) const; 1025 1026 /** 1027 * Locate in this the first occurrence in the range 1028 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1029 * in <TT>srcChars</TT>, using bitwise comparison. 1030 * @param srcChars The text to search for. 1031 * @param srcLength the number of characters in <TT>srcChars</TT> 1032 * @param start The offset at which searching will start. 1033 * @param length The number of characters to search 1034 * @return The offset into this of the start of <TT>srcChars</TT>, 1035 * or -1 if not found. 1036 * @stable ICU 2.0 1037 */ 1038 inline int32_t indexOf(const UChar *srcChars, 1039 int32_t srcLength, 1040 int32_t start, 1041 int32_t length) const; 1042 1043 /** 1044 * Locate in this the first occurrence in the range 1045 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1046 * in <TT>srcChars</TT> in the range 1047 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1048 * using bitwise comparison. 1049 * @param srcChars The text to search for. 1050 * @param srcStart the offset into <TT>srcChars</TT> at which 1051 * to start matching 1052 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1053 * @param start the offset into this at which to start matching 1054 * @param length the number of characters in this to search 1055 * @return The offset into this of the start of <TT>text</TT>, 1056 * or -1 if not found. 1057 * @stable ICU 2.0 1058 */ 1059 int32_t indexOf(const UChar *srcChars, 1060 int32_t srcStart, 1061 int32_t srcLength, 1062 int32_t start, 1063 int32_t length) const; 1064 1065 /** 1066 * Locate in this the first occurrence of the BMP code point <code>c</code>, 1067 * using bitwise comparison. 1068 * @param c The code unit to search for. 1069 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1070 * @stable ICU 2.0 1071 */ 1072 inline int32_t indexOf(UChar c) const; 1073 1074 /** 1075 * Locate in this the first occurrence of the code point <TT>c</TT>, 1076 * using bitwise comparison. 1077 * 1078 * @param c The code point to search for. 1079 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1080 * @stable ICU 2.0 1081 */ 1082 inline int32_t indexOf(UChar32 c) const; 1083 1084 /** 1085 * Locate in this the first occurrence of the BMP code point <code>c</code>, 1086 * starting at offset <TT>start</TT>, using bitwise comparison. 1087 * @param c The code unit to search for. 1088 * @param start The offset at which searching will start. 1089 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1090 * @stable ICU 2.0 1091 */ 1092 inline int32_t indexOf(UChar c, 1093 int32_t start) const; 1094 1095 /** 1096 * Locate in this the first occurrence of the code point <TT>c</TT> 1097 * starting at offset <TT>start</TT>, using bitwise comparison. 1098 * 1099 * @param c The code point to search for. 1100 * @param start The offset at which searching will start. 1101 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1102 * @stable ICU 2.0 1103 */ 1104 inline int32_t indexOf(UChar32 c, 1105 int32_t start) const; 1106 1107 /** 1108 * Locate in this the first occurrence of the BMP code point <code>c</code> 1109 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1110 * using bitwise comparison. 1111 * @param c The code unit to search for. 1112 * @param start the offset into this at which to start matching 1113 * @param length the number of characters in this to search 1114 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1115 * @stable ICU 2.0 1116 */ 1117 inline int32_t indexOf(UChar c, 1118 int32_t start, 1119 int32_t length) const; 1120 1121 /** 1122 * Locate in this the first occurrence of the code point <TT>c</TT> 1123 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1124 * using bitwise comparison. 1125 * 1126 * @param c The code point to search for. 1127 * @param start the offset into this at which to start matching 1128 * @param length the number of characters in this to search 1129 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1130 * @stable ICU 2.0 1131 */ 1132 inline int32_t indexOf(UChar32 c, 1133 int32_t start, 1134 int32_t length) const; 1135 1136 /** 1137 * Locate in this the last occurrence of the characters in <TT>text</TT>, 1138 * using bitwise comparison. 1139 * @param text The text to search for. 1140 * @return The offset into this of the start of <TT>text</TT>, 1141 * or -1 if not found. 1142 * @stable ICU 2.0 1143 */ 1144 inline int32_t lastIndexOf(const UnicodeString& text) const; 1145 1146 /** 1147 * Locate in this the last occurrence of the characters in <TT>text</TT> 1148 * starting at offset <TT>start</TT>, using bitwise comparison. 1149 * @param text The text to search for. 1150 * @param start The offset at which searching will start. 1151 * @return The offset into this of the start of <TT>text</TT>, 1152 * or -1 if not found. 1153 * @stable ICU 2.0 1154 */ 1155 inline int32_t lastIndexOf(const UnicodeString& text, 1156 int32_t start) const; 1157 1158 /** 1159 * Locate in this the last occurrence in the range 1160 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1161 * in <TT>text</TT>, using bitwise comparison. 1162 * @param text The text to search for. 1163 * @param start The offset at which searching will start. 1164 * @param length The number of characters to search 1165 * @return The offset into this of the start of <TT>text</TT>, 1166 * or -1 if not found. 1167 * @stable ICU 2.0 1168 */ 1169 inline int32_t lastIndexOf(const UnicodeString& text, 1170 int32_t start, 1171 int32_t length) const; 1172 1173 /** 1174 * Locate in this the last occurrence in the range 1175 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1176 * in <TT>srcText</TT> in the range 1177 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1178 * using bitwise comparison. 1179 * @param srcText The text to search for. 1180 * @param srcStart the offset into <TT>srcText</TT> at which 1181 * to start matching 1182 * @param srcLength the number of characters in <TT>srcText</TT> to match 1183 * @param start the offset into this at which to start matching 1184 * @param length the number of characters in this to search 1185 * @return The offset into this of the start of <TT>text</TT>, 1186 * or -1 if not found. 1187 * @stable ICU 2.0 1188 */ 1189 inline int32_t lastIndexOf(const UnicodeString& srcText, 1190 int32_t srcStart, 1191 int32_t srcLength, 1192 int32_t start, 1193 int32_t length) const; 1194 1195 /** 1196 * Locate in this the last occurrence of the characters in <TT>srcChars</TT> 1197 * starting at offset <TT>start</TT>, using bitwise comparison. 1198 * @param srcChars The text to search for. 1199 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1200 * @param start the offset into this at which to start matching 1201 * @return The offset into this of the start of <TT>text</TT>, 1202 * or -1 if not found. 1203 * @stable ICU 2.0 1204 */ 1205 inline int32_t lastIndexOf(const UChar *srcChars, 1206 int32_t srcLength, 1207 int32_t start) const; 1208 1209 /** 1210 * Locate in this the last occurrence in the range 1211 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1212 * in <TT>srcChars</TT>, using bitwise comparison. 1213 * @param srcChars The text to search for. 1214 * @param srcLength the number of characters in <TT>srcChars</TT> 1215 * @param start The offset at which searching will start. 1216 * @param length The number of characters to search 1217 * @return The offset into this of the start of <TT>srcChars</TT>, 1218 * or -1 if not found. 1219 * @stable ICU 2.0 1220 */ 1221 inline int32_t lastIndexOf(const UChar *srcChars, 1222 int32_t srcLength, 1223 int32_t start, 1224 int32_t length) const; 1225 1226 /** 1227 * Locate in this the last occurrence in the range 1228 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1229 * in <TT>srcChars</TT> in the range 1230 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1231 * using bitwise comparison. 1232 * @param srcChars The text to search for. 1233 * @param srcStart the offset into <TT>srcChars</TT> at which 1234 * to start matching 1235 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1236 * @param start the offset into this at which to start matching 1237 * @param length the number of characters in this to search 1238 * @return The offset into this of the start of <TT>text</TT>, 1239 * or -1 if not found. 1240 * @stable ICU 2.0 1241 */ 1242 int32_t lastIndexOf(const UChar *srcChars, 1243 int32_t srcStart, 1244 int32_t srcLength, 1245 int32_t start, 1246 int32_t length) const; 1247 1248 /** 1249 * Locate in this the last occurrence of the BMP code point <code>c</code>, 1250 * using bitwise comparison. 1251 * @param c The code unit to search for. 1252 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1253 * @stable ICU 2.0 1254 */ 1255 inline int32_t lastIndexOf(UChar c) const; 1256 1257 /** 1258 * Locate in this the last occurrence of the code point <TT>c</TT>, 1259 * using bitwise comparison. 1260 * 1261 * @param c The code point to search for. 1262 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1263 * @stable ICU 2.0 1264 */ 1265 inline int32_t lastIndexOf(UChar32 c) const; 1266 1267 /** 1268 * Locate in this the last occurrence of the BMP code point <code>c</code> 1269 * starting at offset <TT>start</TT>, using bitwise comparison. 1270 * @param c The code unit to search for. 1271 * @param start The offset at which searching will start. 1272 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1273 * @stable ICU 2.0 1274 */ 1275 inline int32_t lastIndexOf(UChar c, 1276 int32_t start) const; 1277 1278 /** 1279 * Locate in this the last occurrence of the code point <TT>c</TT> 1280 * starting at offset <TT>start</TT>, using bitwise comparison. 1281 * 1282 * @param c The code point to search for. 1283 * @param start The offset at which searching will start. 1284 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1285 * @stable ICU 2.0 1286 */ 1287 inline int32_t lastIndexOf(UChar32 c, 1288 int32_t start) const; 1289 1290 /** 1291 * Locate in this the last occurrence of the BMP code point <code>c</code> 1292 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1293 * using bitwise comparison. 1294 * @param c The code unit to search for. 1295 * @param start the offset into this at which to start matching 1296 * @param length the number of characters in this to search 1297 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1298 * @stable ICU 2.0 1299 */ 1300 inline int32_t lastIndexOf(UChar c, 1301 int32_t start, 1302 int32_t length) const; 1303 1304 /** 1305 * Locate in this the last occurrence of the code point <TT>c</TT> 1306 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1307 * using bitwise comparison. 1308 * 1309 * @param c The code point to search for. 1310 * @param start the offset into this at which to start matching 1311 * @param length the number of characters in this to search 1312 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1313 * @stable ICU 2.0 1314 */ 1315 inline int32_t lastIndexOf(UChar32 c, 1316 int32_t start, 1317 int32_t length) const; 1318 1319 1320 /* Character access */ 1321 1322 /** 1323 * Return the code unit at offset <tt>offset</tt>. 1324 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1325 * @param offset a valid offset into the text 1326 * @return the code unit at offset <tt>offset</tt> 1327 * or 0xffff if the offset is not valid for this string 1328 * @stable ICU 2.0 1329 */ 1330 inline UChar charAt(int32_t offset) const; 1331 1332 /** 1333 * Return the code unit at offset <tt>offset</tt>. 1334 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1335 * @param offset a valid offset into the text 1336 * @return the code unit at offset <tt>offset</tt> 1337 * @stable ICU 2.0 1338 */ 1339 inline UChar operator[] (int32_t offset) const; 1340 1341 /** 1342 * Return the code point that contains the code unit 1343 * at offset <tt>offset</tt>. 1344 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1345 * @param offset a valid offset into the text 1346 * that indicates the text offset of any of the code units 1347 * that will be assembled into a code point (21-bit value) and returned 1348 * @return the code point of text at <tt>offset</tt> 1349 * or 0xffff if the offset is not valid for this string 1350 * @stable ICU 2.0 1351 */ 1352 UChar32 char32At(int32_t offset) const; 1353 1354 /** 1355 * Adjust a random-access offset so that 1356 * it points to the beginning of a Unicode character. 1357 * The offset that is passed in points to 1358 * any code unit of a code point, 1359 * while the returned offset will point to the first code unit 1360 * of the same code point. 1361 * In UTF-16, if the input offset points to a second surrogate 1362 * of a surrogate pair, then the returned offset will point 1363 * to the first surrogate. 1364 * @param offset a valid offset into one code point of the text 1365 * @return offset of the first code unit of the same code point 1366 * @see U16_SET_CP_START 1367 * @stable ICU 2.0 1368 */ 1369 int32_t getChar32Start(int32_t offset) const; 1370 1371 /** 1372 * Adjust a random-access offset so that 1373 * it points behind a Unicode character. 1374 * The offset that is passed in points behind 1375 * any code unit of a code point, 1376 * while the returned offset will point behind the last code unit 1377 * of the same code point. 1378 * In UTF-16, if the input offset points behind the first surrogate 1379 * (i.e., to the second surrogate) 1380 * of a surrogate pair, then the returned offset will point 1381 * behind the second surrogate (i.e., to the first surrogate). 1382 * @param offset a valid offset after any code unit of a code point of the text 1383 * @return offset of the first code unit after the same code point 1384 * @see U16_SET_CP_LIMIT 1385 * @stable ICU 2.0 1386 */ 1387 int32_t getChar32Limit(int32_t offset) const; 1388 1389 /** 1390 * Move the code unit index along the string by delta code points. 1391 * Interpret the input index as a code unit-based offset into the string, 1392 * move the index forward or backward by delta code points, and 1393 * return the resulting index. 1394 * The input index should point to the first code unit of a code point, 1395 * if there is more than one. 1396 * 1397 * Both input and output indexes are code unit-based as for all 1398 * string indexes/offsets in ICU (and other libraries, like MBCS char*). 1399 * If delta<0 then the index is moved backward (toward the start of the string). 1400 * If delta>0 then the index is moved forward (toward the end of the string). 1401 * 1402 * This behaves like CharacterIterator::move32(delta, kCurrent). 1403 * 1404 * Behavior for out-of-bounds indexes: 1405 * <code>moveIndex32</code> pins the input index to 0..length(), i.e., 1406 * if the input index<0 then it is pinned to 0; 1407 * if it is index>length() then it is pinned to length(). 1408 * Afterwards, the index is moved by <code>delta</code> code points 1409 * forward or backward, 1410 * but no further backward than to 0 and no further forward than to length(). 1411 * The resulting index return value will be in between 0 and length(), inclusively. 1412 * 1413 * Examples: 1414 * <pre> 1415 * // s has code points 'a' U+10000 'b' U+10ffff U+2029 1416 * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape(); 1417 * 1418 * // initial index: position of U+10000 1419 * int32_t index=1; 1420 * 1421 * // the following examples will all result in index==4, position of U+10ffff 1422 * 1423 * // skip 2 code points from some position in the string 1424 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b' 1425 * 1426 * // go to the 3rd code point from the start of s (0-based) 1427 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b' 1428 * 1429 * // go to the next-to-last code point of s 1430 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff 1431 * </pre> 1432 * 1433 * @param index input code unit index 1434 * @param delta (signed) code point count to move the index forward or backward 1435 * in the string 1436 * @return the resulting code unit index 1437 * @stable ICU 2.0 1438 */ 1439 int32_t moveIndex32(int32_t index, int32_t delta) const; 1440 1441 /* Substring extraction */ 1442 1443 /** 1444 * Copy the characters in the range 1445 * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>, 1446 * beginning at <tt>dstStart</tt>. 1447 * If the string aliases to <code>dst</code> itself as an external buffer, 1448 * then extract() will not copy the contents. 1449 * 1450 * @param start offset of first character which will be copied into the array 1451 * @param length the number of characters to extract 1452 * @param dst array in which to copy characters. The length of <tt>dst</tt> 1453 * must be at least (<tt>dstStart + length</tt>). 1454 * @param dstStart the offset in <TT>dst</TT> where the first character 1455 * will be extracted 1456 * @stable ICU 2.0 1457 */ 1458 inline void extract(int32_t start, 1459 int32_t length, 1460 UChar *dst, 1461 int32_t dstStart = 0) const; 1462 1463 /** 1464 * Copy the contents of the string into dest. 1465 * This is a convenience function that 1466 * checks if there is enough space in dest, 1467 * extracts the entire string if possible, 1468 * and NUL-terminates dest if possible. 1469 * 1470 * If the string fits into dest but cannot be NUL-terminated 1471 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. 1472 * If the string itself does not fit into dest 1473 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. 1474 * 1475 * If the string aliases to <code>dest</code> itself as an external buffer, 1476 * then extract() will not copy the contents. 1477 * 1478 * @param dest Destination string buffer. 1479 * @param destCapacity Number of UChars available at dest. 1480 * @param errorCode ICU error code. 1481 * @return length() 1482 * @stable ICU 2.0 1483 */ 1484 int32_t 1485 extract(UChar *dest, int32_t destCapacity, 1486 UErrorCode &errorCode) const; 1487 1488 /** 1489 * Copy the characters in the range 1490 * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString 1491 * <tt>target</tt>. 1492 * @param start offset of first character which will be copied 1493 * @param length the number of characters to extract 1494 * @param target UnicodeString into which to copy characters. 1495 * @return A reference to <TT>target</TT> 1496 * @stable ICU 2.0 1497 */ 1498 inline void extract(int32_t start, 1499 int32_t length, 1500 UnicodeString& target) const; 1501 1502 /** 1503 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 1504 * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>. 1505 * @param start offset of first character which will be copied into the array 1506 * @param limit offset immediately following the last character to be copied 1507 * @param dst array in which to copy characters. The length of <tt>dst</tt> 1508 * must be at least (<tt>dstStart + (limit - start)</tt>). 1509 * @param dstStart the offset in <TT>dst</TT> where the first character 1510 * will be extracted 1511 * @stable ICU 2.0 1512 */ 1513 inline void extractBetween(int32_t start, 1514 int32_t limit, 1515 UChar *dst, 1516 int32_t dstStart = 0) const; 1517 1518 /** 1519 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 1520 * into the UnicodeString <tt>target</tt>. Replaceable API. 1521 * @param start offset of first character which will be copied 1522 * @param limit offset immediately following the last character to be copied 1523 * @param target UnicodeString into which to copy characters. 1524 * @return A reference to <TT>target</TT> 1525 * @stable ICU 2.0 1526 */ 1527 virtual void extractBetween(int32_t start, 1528 int32_t limit, 1529 UnicodeString& target) const; 1530 1531 /** 1532 * Copy the characters in the range 1533 * [<tt>start</TT>, <tt>start + startLength</TT>) into an array of characters. 1534 * All characters must be invariant (see utypes.h). 1535 * Use US_INV as the last, signature-distinguishing parameter. 1536 * 1537 * This function does not write any more than <code>targetCapacity</code> 1538 * characters but returns the length of the entire output string 1539 * so that one can allocate a larger buffer and call the function again 1540 * if necessary. 1541 * The output string is NUL-terminated if possible. 1542 * 1543 * @param start offset of first character which will be copied 1544 * @param startLength the number of characters to extract 1545 * @param target the target buffer for extraction, can be NULL 1546 * if targetLength is 0 1547 * @param targetCapacity the length of the target buffer 1548 * @param inv Signature-distinguishing paramater, use US_INV. 1549 * @return the output string length, not including the terminating NUL 1550 * @stable ICU 3.2 1551 */ 1552 int32_t extract(int32_t start, 1553 int32_t startLength, 1554 char *target, 1555 int32_t targetCapacity, 1556 enum EInvariant inv) const; 1557 1558 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 1559 1560 /** 1561 * Copy the characters in the range 1562 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1563 * in the platform's default codepage. 1564 * This function does not write any more than <code>targetLength</code> 1565 * characters but returns the length of the entire output string 1566 * so that one can allocate a larger buffer and call the function again 1567 * if necessary. 1568 * The output string is NUL-terminated if possible. 1569 * 1570 * @param start offset of first character which will be copied 1571 * @param startLength the number of characters to extract 1572 * @param target the target buffer for extraction 1573 * @param targetLength the length of the target buffer 1574 * If <TT>target</TT> is NULL, then the number of bytes required for 1575 * <TT>target</TT> is returned. 1576 * @return the output string length, not including the terminating NUL 1577 * @stable ICU 2.0 1578 */ 1579 int32_t extract(int32_t start, 1580 int32_t startLength, 1581 char *target, 1582 uint32_t targetLength) const; 1583 1584 #endif 1585 1586 #if !UCONFIG_NO_CONVERSION 1587 1588 /** 1589 * Copy the characters in the range 1590 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1591 * in a specified codepage. 1592 * The output string is NUL-terminated. 1593 * 1594 * Recommendation: For invariant-character strings use 1595 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1596 * because it avoids object code dependencies of UnicodeString on 1597 * the conversion code. 1598 * 1599 * @param start offset of first character which will be copied 1600 * @param startLength the number of characters to extract 1601 * @param target the target buffer for extraction 1602 * @param codepage the desired codepage for the characters. 0 has 1603 * the special meaning of the default codepage 1604 * If <code>codepage</code> is an empty string (<code>""</code>), 1605 * then a simple conversion is performed on the codepage-invariant 1606 * subset ("invariant characters") of the platform encoding. See utypes.h. 1607 * If <TT>target</TT> is NULL, then the number of bytes required for 1608 * <TT>target</TT> is returned. It is assumed that the target is big enough 1609 * to fit all of the characters. 1610 * @return the output string length, not including the terminating NUL 1611 * @stable ICU 2.0 1612 */ 1613 inline int32_t extract(int32_t start, 1614 int32_t startLength, 1615 char *target, 1616 const char *codepage = 0) const; 1617 1618 /** 1619 * Copy the characters in the range 1620 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1621 * in a specified codepage. 1622 * This function does not write any more than <code>targetLength</code> 1623 * characters but returns the length of the entire output string 1624 * so that one can allocate a larger buffer and call the function again 1625 * if necessary. 1626 * The output string is NUL-terminated if possible. 1627 * 1628 * Recommendation: For invariant-character strings use 1629 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1630 * because it avoids object code dependencies of UnicodeString on 1631 * the conversion code. 1632 * 1633 * @param start offset of first character which will be copied 1634 * @param startLength the number of characters to extract 1635 * @param target the target buffer for extraction 1636 * @param targetLength the length of the target buffer 1637 * @param codepage the desired codepage for the characters. 0 has 1638 * the special meaning of the default codepage 1639 * If <code>codepage</code> is an empty string (<code>""</code>), 1640 * then a simple conversion is performed on the codepage-invariant 1641 * subset ("invariant characters") of the platform encoding. See utypes.h. 1642 * If <TT>target</TT> is NULL, then the number of bytes required for 1643 * <TT>target</TT> is returned. 1644 * @return the output string length, not including the terminating NUL 1645 * @stable ICU 2.0 1646 */ 1647 int32_t extract(int32_t start, 1648 int32_t startLength, 1649 char *target, 1650 uint32_t targetLength, 1651 const char *codepage) const; 1652 1653 /** 1654 * Convert the UnicodeString into a codepage string using an existing UConverter. 1655 * The output string is NUL-terminated if possible. 1656 * 1657 * This function avoids the overhead of opening and closing a converter if 1658 * multiple strings are extracted. 1659 * 1660 * @param dest destination string buffer, can be NULL if destCapacity==0 1661 * @param destCapacity the number of chars available at dest 1662 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), 1663 * or NULL for the default converter 1664 * @param errorCode normal ICU error code 1665 * @return the length of the output string, not counting the terminating NUL; 1666 * if the length is greater than destCapacity, then the string will not fit 1667 * and a buffer of the indicated length would need to be passed in 1668 * @stable ICU 2.0 1669 */ 1670 int32_t extract(char *dest, int32_t destCapacity, 1671 UConverter *cnv, 1672 UErrorCode &errorCode) const; 1673 1674 #endif 1675 1676 /** 1677 * Create a temporary substring for the specified range. 1678 * Unlike the substring constructor and setTo() functions, 1679 * the object returned here will be a read-only alias (using getBuffer()) 1680 * rather than copying the text. 1681 * As a result, this substring operation is much faster but requires 1682 * that the original string not be modified or deleted during the lifetime 1683 * of the returned substring object. 1684 * @param start offset of the first character visible in the substring 1685 * @param length length of the substring 1686 * @return a read-only alias UnicodeString object for the substring 1687 * @stable ICU 4.4 1688 */ 1689 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; 1690 1691 /** 1692 * Create a temporary substring for the specified range. 1693 * Same as tempSubString(start, length) except that the substring range 1694 * is specified as a (start, limit) pair (with an exclusive limit index) 1695 * rather than a (start, length) pair. 1696 * @param start offset of the first character visible in the substring 1697 * @param limit offset immediately following the last character visible in the substring 1698 * @return a read-only alias UnicodeString object for the substring 1699 * @stable ICU 4.4 1700 */ 1701 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; 1702 1703 /** 1704 * Convert the UnicodeString to UTF-8 and write the result 1705 * to a ByteSink. This is called by toUTF8String(). 1706 * Unpaired surrogates are replaced with U+FFFD. 1707 * Calls u_strToUTF8WithSub(). 1708 * 1709 * @param sink A ByteSink to which the UTF-8 version of the string is written. 1710 * sink.Flush() is called at the end. 1711 * @stable ICU 4.2 1712 * @see toUTF8String 1713 */ 1714 void toUTF8(ByteSink &sink) const; 1715 1716 #if U_HAVE_STD_STRING 1717 1718 /** 1719 * Convert the UnicodeString to UTF-8 and append the result 1720 * to a standard string. 1721 * Unpaired surrogates are replaced with U+FFFD. 1722 * Calls toUTF8(). 1723 * 1724 * @param result A standard string (or a compatible object) 1725 * to which the UTF-8 version of the string is appended. 1726 * @return The string object. 1727 * @stable ICU 4.2 1728 * @see toUTF8 1729 */ 1730 template<typename StringClass> 1731 StringClass &toUTF8String(StringClass &result) const { 1732 StringByteSink<StringClass> sbs(&result); 1733 toUTF8(sbs); 1734 return result; 1735 } 1736 1737 #endif 1738 1739 /** 1740 * Convert the UnicodeString to UTF-32. 1741 * Unpaired surrogates are replaced with U+FFFD. 1742 * Calls u_strToUTF32WithSub(). 1743 * 1744 * @param utf32 destination string buffer, can be NULL if capacity==0 1745 * @param capacity the number of UChar32s available at utf32 1746 * @param errorCode Standard ICU error code. Its input value must 1747 * pass the U_SUCCESS() test, or else the function returns 1748 * immediately. Check for U_FAILURE() on output or use with 1749 * function chaining. (See User Guide for details.) 1750 * @return The length of the UTF-32 string. 1751 * @see fromUTF32 1752 * @stable ICU 4.2 1753 */ 1754 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; 1755 1756 /* Length operations */ 1757 1758 /** 1759 * Return the length of the UnicodeString object. 1760 * The length is the number of UChar code units are in the UnicodeString. 1761 * If you want the number of code points, please use countChar32(). 1762 * @return the length of the UnicodeString object 1763 * @see countChar32 1764 * @stable ICU 2.0 1765 */ 1766 inline int32_t length(void) const; 1767 1768 /** 1769 * Count Unicode code points in the length UChar code units of the string. 1770 * A code point may occupy either one or two UChar code units. 1771 * Counting code points involves reading all code units. 1772 * 1773 * This functions is basically the inverse of moveIndex32(). 1774 * 1775 * @param start the index of the first code unit to check 1776 * @param length the number of UChar code units to check 1777 * @return the number of code points in the specified code units 1778 * @see length 1779 * @stable ICU 2.0 1780 */ 1781 int32_t 1782 countChar32(int32_t start=0, int32_t length=INT32_MAX) const; 1783 1784 /** 1785 * Check if the length UChar code units of the string 1786 * contain more Unicode code points than a certain number. 1787 * This is more efficient than counting all code points in this part of the string 1788 * and comparing that number with a threshold. 1789 * This function may not need to scan the string at all if the length 1790 * falls within a certain range, and 1791 * never needs to count more than 'number+1' code points. 1792 * Logically equivalent to (countChar32(start, length)>number). 1793 * A Unicode code point may occupy either one or two UChar code units. 1794 * 1795 * @param start the index of the first code unit to check (0 for the entire string) 1796 * @param length the number of UChar code units to check 1797 * (use INT32_MAX for the entire string; remember that start/length 1798 * values are pinned) 1799 * @param number The number of code points in the (sub)string is compared against 1800 * the 'number' parameter. 1801 * @return Boolean value for whether the string contains more Unicode code points 1802 * than 'number'. Same as (u_countChar32(s, length)>number). 1803 * @see countChar32 1804 * @see u_strHasMoreChar32Than 1805 * @stable ICU 2.4 1806 */ 1807 UBool 1808 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; 1809 1810 /** 1811 * Determine if this string is empty. 1812 * @return TRUE if this string contains 0 characters, FALSE otherwise. 1813 * @stable ICU 2.0 1814 */ 1815 inline UBool isEmpty(void) const; 1816 1817 /** 1818 * Return the capacity of the internal buffer of the UnicodeString object. 1819 * This is useful together with the getBuffer functions. 1820 * See there for details. 1821 * 1822 * @return the number of UChars available in the internal buffer 1823 * @see getBuffer 1824 * @stable ICU 2.0 1825 */ 1826 inline int32_t getCapacity(void) const; 1827 1828 /* Other operations */ 1829 1830 /** 1831 * Generate a hash code for this object. 1832 * @return The hash code of this UnicodeString. 1833 * @stable ICU 2.0 1834 */ 1835 inline int32_t hashCode(void) const; 1836 1837 /** 1838 * Determine if this object contains a valid string. 1839 * A bogus string has no value. It is different from an empty string, 1840 * although in both cases isEmpty() returns TRUE and length() returns 0. 1841 * setToBogus() and isBogus() can be used to indicate that no string value is available. 1842 * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and 1843 * length() returns 0. 1844 * 1845 * @return TRUE if the string is bogus/invalid, FALSE otherwise 1846 * @see setToBogus() 1847 * @stable ICU 2.0 1848 */ 1849 inline UBool isBogus(void) const; 1850 1851 1852 //======================================== 1853 // Write operations 1854 //======================================== 1855 1856 /* Assignment operations */ 1857 1858 /** 1859 * Assignment operator. Replace the characters in this UnicodeString 1860 * with the characters from <TT>srcText</TT>. 1861 * 1862 * Starting with ICU 2.4, the assignment operator and the copy constructor 1863 * allocate a new buffer and copy the buffer contents even for readonly aliases. 1864 * By contrast, the fastCopyFrom() function implements the old, 1865 * more efficient but less safe behavior 1866 * of making this string also a readonly alias to the same buffer. 1867 * 1868 * If the source object has an "open" buffer from getBuffer(minCapacity), 1869 * then the copy is an empty string. 1870 * 1871 * @param srcText The text containing the characters to replace 1872 * @return a reference to this 1873 * @stable ICU 2.0 1874 * @see fastCopyFrom 1875 */ 1876 UnicodeString &operator=(const UnicodeString &srcText); 1877 1878 /** 1879 * Almost the same as the assignment operator. 1880 * Replace the characters in this UnicodeString 1881 * with the characters from <code>srcText</code>. 1882 * 1883 * This function works the same as the assignment operator 1884 * for all strings except for ones that are readonly aliases. 1885 * 1886 * Starting with ICU 2.4, the assignment operator and the copy constructor 1887 * allocate a new buffer and copy the buffer contents even for readonly aliases. 1888 * This function implements the old, more efficient but less safe behavior 1889 * of making this string also a readonly alias to the same buffer. 1890 * 1891 * The fastCopyFrom function must be used only if it is known that the lifetime of 1892 * this UnicodeString does not exceed the lifetime of the aliased buffer 1893 * including its contents, for example for strings from resource bundles 1894 * or aliases to string constants. 1895 * 1896 * If the source object has an "open" buffer from getBuffer(minCapacity), 1897 * then the copy is an empty string. 1898 * 1899 * @param src The text containing the characters to replace. 1900 * @return a reference to this 1901 * @stable ICU 2.4 1902 */ 1903 UnicodeString &fastCopyFrom(const UnicodeString &src); 1904 1905 #ifndef U_HIDE_DRAFT_API 1906 #if U_HAVE_RVALUE_REFERENCES 1907 /** 1908 * Move assignment operator, might leave src in bogus state. 1909 * This string will have the same contents and state that the source string had. 1910 * The behavior is undefined if *this and src are the same object. 1911 * @param src source string 1912 * @return *this 1913 * @draft ICU 56 1914 */ 1915 UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT { 1916 return moveFrom(src); 1917 } 1918 #endif 1919 /** 1920 * Move assignment, might leave src in bogus state. 1921 * This string will have the same contents and state that the source string had. 1922 * The behavior is undefined if *this and src are the same object. 1923 * 1924 * Can be called explicitly, does not need C++11 support. 1925 * @param src source string 1926 * @return *this 1927 * @draft ICU 56 1928 */ 1929 UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT; 1930 1931 /** 1932 * Swap strings. 1933 * @param other other string 1934 * @draft ICU 56 1935 */ 1936 void swap(UnicodeString &other) U_NOEXCEPT; 1937 1938 /** 1939 * Non-member UnicodeString swap function. 1940 * @param s1 will get s2's contents and state 1941 * @param s2 will get s1's contents and state 1942 * @draft ICU 56 1943 */ 1944 friend U_COMMON_API inline void U_EXPORT2 1945 swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT { 1946 s1.swap(s2); 1947 } 1948 #endif /* U_HIDE_DRAFT_API */ 1949 1950 /** 1951 * Assignment operator. Replace the characters in this UnicodeString 1952 * with the code unit <TT>ch</TT>. 1953 * @param ch the code unit to replace 1954 * @return a reference to this 1955 * @stable ICU 2.0 1956 */ 1957 inline UnicodeString& operator= (UChar ch); 1958 1959 /** 1960 * Assignment operator. Replace the characters in this UnicodeString 1961 * with the code point <TT>ch</TT>. 1962 * @param ch the code point to replace 1963 * @return a reference to this 1964 * @stable ICU 2.0 1965 */ 1966 inline UnicodeString& operator= (UChar32 ch); 1967 1968 /** 1969 * Set the text in the UnicodeString object to the characters 1970 * in <TT>srcText</TT> in the range 1971 * [<TT>srcStart</TT>, <TT>srcText.length()</TT>). 1972 * <TT>srcText</TT> is not modified. 1973 * @param srcText the source for the new characters 1974 * @param srcStart the offset into <TT>srcText</TT> where new characters 1975 * will be obtained 1976 * @return a reference to this 1977 * @stable ICU 2.2 1978 */ 1979 inline UnicodeString& setTo(const UnicodeString& srcText, 1980 int32_t srcStart); 1981 1982 /** 1983 * Set the text in the UnicodeString object to the characters 1984 * in <TT>srcText</TT> in the range 1985 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 1986 * <TT>srcText</TT> is not modified. 1987 * @param srcText the source for the new characters 1988 * @param srcStart the offset into <TT>srcText</TT> where new characters 1989 * will be obtained 1990 * @param srcLength the number of characters in <TT>srcText</TT> in the 1991 * replace string. 1992 * @return a reference to this 1993 * @stable ICU 2.0 1994 */ 1995 inline UnicodeString& setTo(const UnicodeString& srcText, 1996 int32_t srcStart, 1997 int32_t srcLength); 1998 1999 /** 2000 * Set the text in the UnicodeString object to the characters in 2001 * <TT>srcText</TT>. 2002 * <TT>srcText</TT> is not modified. 2003 * @param srcText the source for the new characters 2004 * @return a reference to this 2005 * @stable ICU 2.0 2006 */ 2007 inline UnicodeString& setTo(const UnicodeString& srcText); 2008 2009 /** 2010 * Set the characters in the UnicodeString object to the characters 2011 * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. 2012 * @param srcChars the source for the new characters 2013 * @param srcLength the number of Unicode characters in srcChars. 2014 * @return a reference to this 2015 * @stable ICU 2.0 2016 */ 2017 inline UnicodeString& setTo(const UChar *srcChars, 2018 int32_t srcLength); 2019 2020 /** 2021 * Set the characters in the UnicodeString object to the code unit 2022 * <TT>srcChar</TT>. 2023 * @param srcChar the code unit which becomes the UnicodeString's character 2024 * content 2025 * @return a reference to this 2026 * @stable ICU 2.0 2027 */ 2028 UnicodeString& setTo(UChar srcChar); 2029 2030 /** 2031 * Set the characters in the UnicodeString object to the code point 2032 * <TT>srcChar</TT>. 2033 * @param srcChar the code point which becomes the UnicodeString's character 2034 * content 2035 * @return a reference to this 2036 * @stable ICU 2.0 2037 */ 2038 UnicodeString& setTo(UChar32 srcChar); 2039 2040 /** 2041 * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. 2042 * The text will be used for the UnicodeString object, but 2043 * it will not be released when the UnicodeString is destroyed. 2044 * This has copy-on-write semantics: 2045 * When the string is modified, then the buffer is first copied into 2046 * newly allocated memory. 2047 * The aliased buffer is never modified. 2048 * 2049 * In an assignment to another UnicodeString, when using the copy constructor 2050 * or the assignment operator, the text will be copied. 2051 * When using fastCopyFrom(), the text will be aliased again, 2052 * so that both strings then alias the same readonly-text. 2053 * 2054 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. 2055 * This must be true if <code>textLength==-1</code>. 2056 * @param text The characters to alias for the UnicodeString. 2057 * @param textLength The number of Unicode characters in <code>text</code> to alias. 2058 * If -1, then this constructor will determine the length 2059 * by calling <code>u_strlen()</code>. 2060 * @return a reference to this 2061 * @stable ICU 2.0 2062 */ 2063 UnicodeString &setTo(UBool isTerminated, 2064 const UChar *text, 2065 int32_t textLength); 2066 2067 /** 2068 * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor. 2069 * The text will be used for the UnicodeString object, but 2070 * it will not be released when the UnicodeString is destroyed. 2071 * This has write-through semantics: 2072 * For as long as the capacity of the buffer is sufficient, write operations 2073 * will directly affect the buffer. When more capacity is necessary, then 2074 * a new buffer will be allocated and the contents copied as with regularly 2075 * constructed strings. 2076 * In an assignment to another UnicodeString, the buffer will be copied. 2077 * The extract(UChar *dst) function detects whether the dst pointer is the same 2078 * as the string buffer itself and will in this case not copy the contents. 2079 * 2080 * @param buffer The characters to alias for the UnicodeString. 2081 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. 2082 * @param buffCapacity The size of <code>buffer</code> in UChars. 2083 * @return a reference to this 2084 * @stable ICU 2.0 2085 */ 2086 UnicodeString &setTo(UChar *buffer, 2087 int32_t buffLength, 2088 int32_t buffCapacity); 2089 2090 /** 2091 * Make this UnicodeString object invalid. 2092 * The string will test TRUE with isBogus(). 2093 * 2094 * A bogus string has no value. It is different from an empty string. 2095 * It can be used to indicate that no string value is available. 2096 * getBuffer() and getTerminatedBuffer() return NULL, and 2097 * length() returns 0. 2098 * 2099 * This utility function is used throughout the UnicodeString 2100 * implementation to indicate that a UnicodeString operation failed, 2101 * and may be used in other functions, 2102 * especially but not exclusively when such functions do not 2103 * take a UErrorCode for simplicity. 2104 * 2105 * The following methods, and no others, will clear a string object's bogus flag: 2106 * - remove() 2107 * - remove(0, INT32_MAX) 2108 * - truncate(0) 2109 * - operator=() (assignment operator) 2110 * - setTo(...) 2111 * 2112 * The simplest ways to turn a bogus string into an empty one 2113 * is to use the remove() function. 2114 * Examples for other functions that are equivalent to "set to empty string": 2115 * \code 2116 * if(s.isBogus()) { 2117 * s.remove(); // set to an empty string (remove all), or 2118 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or 2119 * s.truncate(0); // set to an empty string (complete truncation), or 2120 * s=UnicodeString(); // assign an empty string, or 2121 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or 2122 * static const UChar nul=0; 2123 * s.setTo(&nul, 0); // set to an empty C Unicode string 2124 * } 2125 * \endcode 2126 * 2127 * @see isBogus() 2128 * @stable ICU 2.0 2129 */ 2130 void setToBogus(); 2131 2132 /** 2133 * Set the character at the specified offset to the specified character. 2134 * @param offset A valid offset into the text of the character to set 2135 * @param ch The new character 2136 * @return A reference to this 2137 * @stable ICU 2.0 2138 */ 2139 UnicodeString& setCharAt(int32_t offset, 2140 UChar ch); 2141 2142 2143 /* Append operations */ 2144 2145 /** 2146 * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString 2147 * object. 2148 * @param ch the code unit to be appended 2149 * @return a reference to this 2150 * @stable ICU 2.0 2151 */ 2152 inline UnicodeString& operator+= (UChar ch); 2153 2154 /** 2155 * Append operator. Append the code point <TT>ch</TT> to the UnicodeString 2156 * object. 2157 * @param ch the code point to be appended 2158 * @return a reference to this 2159 * @stable ICU 2.0 2160 */ 2161 inline UnicodeString& operator+= (UChar32 ch); 2162 2163 /** 2164 * Append operator. Append the characters in <TT>srcText</TT> to the 2165 * UnicodeString object. <TT>srcText</TT> is not modified. 2166 * @param srcText the source for the new characters 2167 * @return a reference to this 2168 * @stable ICU 2.0 2169 */ 2170 inline UnicodeString& operator+= (const UnicodeString& srcText); 2171 2172 /** 2173 * Append the characters 2174 * in <TT>srcText</TT> in the range 2175 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the 2176 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> 2177 * is not modified. 2178 * @param srcText the source for the new characters 2179 * @param srcStart the offset into <TT>srcText</TT> where new characters 2180 * will be obtained 2181 * @param srcLength the number of characters in <TT>srcText</TT> in 2182 * the append string 2183 * @return a reference to this 2184 * @stable ICU 2.0 2185 */ 2186 inline UnicodeString& append(const UnicodeString& srcText, 2187 int32_t srcStart, 2188 int32_t srcLength); 2189 2190 /** 2191 * Append the characters in <TT>srcText</TT> to the UnicodeString object. 2192 * <TT>srcText</TT> is not modified. 2193 * @param srcText the source for the new characters 2194 * @return a reference to this 2195 * @stable ICU 2.0 2196 */ 2197 inline UnicodeString& append(const UnicodeString& srcText); 2198 2199 /** 2200 * Append the characters in <TT>srcChars</TT> in the range 2201 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString 2202 * object at offset 2203 * <TT>start</TT>. <TT>srcChars</TT> is not modified. 2204 * @param srcChars the source for the new characters 2205 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2206 * will be obtained 2207 * @param srcLength the number of characters in <TT>srcChars</TT> in 2208 * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated 2209 * @return a reference to this 2210 * @stable ICU 2.0 2211 */ 2212 inline UnicodeString& append(const UChar *srcChars, 2213 int32_t srcStart, 2214 int32_t srcLength); 2215 2216 /** 2217 * Append the characters in <TT>srcChars</TT> to the UnicodeString object 2218 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2219 * @param srcChars the source for the new characters 2220 * @param srcLength the number of Unicode characters in <TT>srcChars</TT>; 2221 * can be -1 if <TT>srcChars</TT> is NUL-terminated 2222 * @return a reference to this 2223 * @stable ICU 2.0 2224 */ 2225 inline UnicodeString& append(const UChar *srcChars, 2226 int32_t srcLength); 2227 2228 /** 2229 * Append the code unit <TT>srcChar</TT> to the UnicodeString object. 2230 * @param srcChar the code unit to append 2231 * @return a reference to this 2232 * @stable ICU 2.0 2233 */ 2234 inline UnicodeString& append(UChar srcChar); 2235 2236 /** 2237 * Append the code point <TT>srcChar</TT> to the UnicodeString object. 2238 * @param srcChar the code point to append 2239 * @return a reference to this 2240 * @stable ICU 2.0 2241 */ 2242 UnicodeString& append(UChar32 srcChar); 2243 2244 2245 /* Insert operations */ 2246 2247 /** 2248 * Insert the characters in <TT>srcText</TT> in the range 2249 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 2250 * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified. 2251 * @param start the offset where the insertion begins 2252 * @param srcText the source for the new characters 2253 * @param srcStart the offset into <TT>srcText</TT> where new characters 2254 * will be obtained 2255 * @param srcLength the number of characters in <TT>srcText</TT> in 2256 * the insert string 2257 * @return a reference to this 2258 * @stable ICU 2.0 2259 */ 2260 inline UnicodeString& insert(int32_t start, 2261 const UnicodeString& srcText, 2262 int32_t srcStart, 2263 int32_t srcLength); 2264 2265 /** 2266 * Insert the characters in <TT>srcText</TT> into the UnicodeString object 2267 * at offset <TT>start</TT>. <TT>srcText</TT> is not modified. 2268 * @param start the offset where the insertion begins 2269 * @param srcText the source for the new characters 2270 * @return a reference to this 2271 * @stable ICU 2.0 2272 */ 2273 inline UnicodeString& insert(int32_t start, 2274 const UnicodeString& srcText); 2275 2276 /** 2277 * Insert the characters in <TT>srcChars</TT> in the range 2278 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 2279 * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2280 * @param start the offset at which the insertion begins 2281 * @param srcChars the source for the new characters 2282 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2283 * will be obtained 2284 * @param srcLength the number of characters in <TT>srcChars</TT> 2285 * in the insert string 2286 * @return a reference to this 2287 * @stable ICU 2.0 2288 */ 2289 inline UnicodeString& insert(int32_t start, 2290 const UChar *srcChars, 2291 int32_t srcStart, 2292 int32_t srcLength); 2293 2294 /** 2295 * Insert the characters in <TT>srcChars</TT> into the UnicodeString object 2296 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2297 * @param start the offset where the insertion begins 2298 * @param srcChars the source for the new characters 2299 * @param srcLength the number of Unicode characters in srcChars. 2300 * @return a reference to this 2301 * @stable ICU 2.0 2302 */ 2303 inline UnicodeString& insert(int32_t start, 2304 const UChar *srcChars, 2305 int32_t srcLength); 2306 2307 /** 2308 * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at 2309 * offset <TT>start</TT>. 2310 * @param start the offset at which the insertion occurs 2311 * @param srcChar the code unit to insert 2312 * @return a reference to this 2313 * @stable ICU 2.0 2314 */ 2315 inline UnicodeString& insert(int32_t start, 2316 UChar srcChar); 2317 2318 /** 2319 * Insert the code point <TT>srcChar</TT> into the UnicodeString object at 2320 * offset <TT>start</TT>. 2321 * @param start the offset at which the insertion occurs 2322 * @param srcChar the code point to insert 2323 * @return a reference to this 2324 * @stable ICU 2.0 2325 */ 2326 inline UnicodeString& insert(int32_t start, 2327 UChar32 srcChar); 2328 2329 2330 /* Replace operations */ 2331 2332 /** 2333 * Replace the characters in the range 2334 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2335 * <TT>srcText</TT> in the range 2336 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 2337 * <TT>srcText</TT> is not modified. 2338 * @param start the offset at which the replace operation begins 2339 * @param length the number of characters to replace. The character at 2340 * <TT>start + length</TT> is not modified. 2341 * @param srcText the source for the new characters 2342 * @param srcStart the offset into <TT>srcText</TT> where new characters 2343 * will be obtained 2344 * @param srcLength the number of characters in <TT>srcText</TT> in 2345 * the replace string 2346 * @return a reference to this 2347 * @stable ICU 2.0 2348 */ 2349 UnicodeString& replace(int32_t start, 2350 int32_t length, 2351 const UnicodeString& srcText, 2352 int32_t srcStart, 2353 int32_t srcLength); 2354 2355 /** 2356 * Replace the characters in the range 2357 * [<TT>start</TT>, <TT>start + length</TT>) 2358 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is 2359 * not modified. 2360 * @param start the offset at which the replace operation begins 2361 * @param length the number of characters to replace. The character at 2362 * <TT>start + length</TT> is not modified. 2363 * @param srcText the source for the new characters 2364 * @return a reference to this 2365 * @stable ICU 2.0 2366 */ 2367 UnicodeString& replace(int32_t start, 2368 int32_t length, 2369 const UnicodeString& srcText); 2370 2371 /** 2372 * Replace the characters in the range 2373 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2374 * <TT>srcChars</TT> in the range 2375 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT> 2376 * is not modified. 2377 * @param start the offset at which the replace operation begins 2378 * @param length the number of characters to replace. The character at 2379 * <TT>start + length</TT> is not modified. 2380 * @param srcChars the source for the new characters 2381 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2382 * will be obtained 2383 * @param srcLength the number of characters in <TT>srcChars</TT> 2384 * in the replace string 2385 * @return a reference to this 2386 * @stable ICU 2.0 2387 */ 2388 UnicodeString& replace(int32_t start, 2389 int32_t length, 2390 const UChar *srcChars, 2391 int32_t srcStart, 2392 int32_t srcLength); 2393 2394 /** 2395 * Replace the characters in the range 2396 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2397 * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. 2398 * @param start the offset at which the replace operation begins 2399 * @param length number of characters to replace. The character at 2400 * <TT>start + length</TT> is not modified. 2401 * @param srcChars the source for the new characters 2402 * @param srcLength the number of Unicode characters in srcChars 2403 * @return a reference to this 2404 * @stable ICU 2.0 2405 */ 2406 inline UnicodeString& replace(int32_t start, 2407 int32_t length, 2408 const UChar *srcChars, 2409 int32_t srcLength); 2410 2411 /** 2412 * Replace the characters in the range 2413 * [<TT>start</TT>, <TT>start + length</TT>) with the code unit 2414 * <TT>srcChar</TT>. 2415 * @param start the offset at which the replace operation begins 2416 * @param length the number of characters to replace. The character at 2417 * <TT>start + length</TT> is not modified. 2418 * @param srcChar the new code unit 2419 * @return a reference to this 2420 * @stable ICU 2.0 2421 */ 2422 inline UnicodeString& replace(int32_t start, 2423 int32_t length, 2424 UChar srcChar); 2425 2426 /** 2427 * Replace the characters in the range 2428 * [<TT>start</TT>, <TT>start + length</TT>) with the code point 2429 * <TT>srcChar</TT>. 2430 * @param start the offset at which the replace operation begins 2431 * @param length the number of characters to replace. The character at 2432 * <TT>start + length</TT> is not modified. 2433 * @param srcChar the new code point 2434 * @return a reference to this 2435 * @stable ICU 2.0 2436 */ 2437 UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); 2438 2439 /** 2440 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 2441 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified. 2442 * @param start the offset at which the replace operation begins 2443 * @param limit the offset immediately following the replace range 2444 * @param srcText the source for the new characters 2445 * @return a reference to this 2446 * @stable ICU 2.0 2447 */ 2448 inline UnicodeString& replaceBetween(int32_t start, 2449 int32_t limit, 2450 const UnicodeString& srcText); 2451 2452 /** 2453 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 2454 * with the characters in <TT>srcText</TT> in the range 2455 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified. 2456 * @param start the offset at which the replace operation begins 2457 * @param limit the offset immediately following the replace range 2458 * @param srcText the source for the new characters 2459 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2460 * will be obtained 2461 * @param srcLimit the offset immediately following the range to copy 2462 * in <TT>srcText</TT> 2463 * @return a reference to this 2464 * @stable ICU 2.0 2465 */ 2466 inline UnicodeString& replaceBetween(int32_t start, 2467 int32_t limit, 2468 const UnicodeString& srcText, 2469 int32_t srcStart, 2470 int32_t srcLimit); 2471 2472 /** 2473 * Replace a substring of this object with the given text. 2474 * @param start the beginning index, inclusive; <code>0 <= start 2475 * <= limit</code>. 2476 * @param limit the ending index, exclusive; <code>start <= limit 2477 * <= length()</code>. 2478 * @param text the text to replace characters <code>start</code> 2479 * to <code>limit - 1</code> 2480 * @stable ICU 2.0 2481 */ 2482 virtual void handleReplaceBetween(int32_t start, 2483 int32_t limit, 2484 const UnicodeString& text); 2485 2486 /** 2487 * Replaceable API 2488 * @return TRUE if it has MetaData 2489 * @stable ICU 2.4 2490 */ 2491 virtual UBool hasMetaData() const; 2492 2493 /** 2494 * Copy a substring of this object, retaining attribute (out-of-band) 2495 * information. This method is used to duplicate or reorder substrings. 2496 * The destination index must not overlap the source range. 2497 * 2498 * @param start the beginning index, inclusive; <code>0 <= start <= 2499 * limit</code>. 2500 * @param limit the ending index, exclusive; <code>start <= limit <= 2501 * length()</code>. 2502 * @param dest the destination index. The characters from 2503 * <code>start..limit-1</code> will be copied to <code>dest</code>. 2504 * Implementations of this method may assume that <code>dest <= start || 2505 * dest >= limit</code>. 2506 * @stable ICU 2.0 2507 */ 2508 virtual void copy(int32_t start, int32_t limit, int32_t dest); 2509 2510 /* Search and replace operations */ 2511 2512 /** 2513 * Replace all occurrences of characters in oldText with the characters 2514 * in newText 2515 * @param oldText the text containing the search text 2516 * @param newText the text containing the replacement text 2517 * @return a reference to this 2518 * @stable ICU 2.0 2519 */ 2520 inline UnicodeString& findAndReplace(const UnicodeString& oldText, 2521 const UnicodeString& newText); 2522 2523 /** 2524 * Replace all occurrences of characters in oldText with characters 2525 * in newText 2526 * in the range [<TT>start</TT>, <TT>start + length</TT>). 2527 * @param start the start of the range in which replace will performed 2528 * @param length the length of the range in which replace will be performed 2529 * @param oldText the text containing the search text 2530 * @param newText the text containing the replacement text 2531 * @return a reference to this 2532 * @stable ICU 2.0 2533 */ 2534 inline UnicodeString& findAndReplace(int32_t start, 2535 int32_t length, 2536 const UnicodeString& oldText, 2537 const UnicodeString& newText); 2538 2539 /** 2540 * Replace all occurrences of characters in oldText in the range 2541 * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters 2542 * in newText in the range 2543 * [<TT>newStart</TT>, <TT>newStart + newLength</TT>) 2544 * in the range [<TT>start</TT>, <TT>start + length</TT>). 2545 * @param start the start of the range in which replace will performed 2546 * @param length the length of the range in which replace will be performed 2547 * @param oldText the text containing the search text 2548 * @param oldStart the start of the search range in <TT>oldText</TT> 2549 * @param oldLength the length of the search range in <TT>oldText</TT> 2550 * @param newText the text containing the replacement text 2551 * @param newStart the start of the replacement range in <TT>newText</TT> 2552 * @param newLength the length of the replacement range in <TT>newText</TT> 2553 * @return a reference to this 2554 * @stable ICU 2.0 2555 */ 2556 UnicodeString& findAndReplace(int32_t start, 2557 int32_t length, 2558 const UnicodeString& oldText, 2559 int32_t oldStart, 2560 int32_t oldLength, 2561 const UnicodeString& newText, 2562 int32_t newStart, 2563 int32_t newLength); 2564 2565 2566 /* Remove operations */ 2567 2568 /** 2569 * Remove all characters from the UnicodeString object. 2570 * @return a reference to this 2571 * @stable ICU 2.0 2572 */ 2573 inline UnicodeString& remove(void); 2574 2575 /** 2576 * Remove the characters in the range 2577 * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object. 2578 * @param start the offset of the first character to remove 2579 * @param length the number of characters to remove 2580 * @return a reference to this 2581 * @stable ICU 2.0 2582 */ 2583 inline UnicodeString& remove(int32_t start, 2584 int32_t length = (int32_t)INT32_MAX); 2585 2586 /** 2587 * Remove the characters in the range 2588 * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object. 2589 * @param start the offset of the first character to remove 2590 * @param limit the offset immediately following the range to remove 2591 * @return a reference to this 2592 * @stable ICU 2.0 2593 */ 2594 inline UnicodeString& removeBetween(int32_t start, 2595 int32_t limit = (int32_t)INT32_MAX); 2596 2597 /** 2598 * Retain only the characters in the range 2599 * [<code>start</code>, <code>limit</code>) from the UnicodeString object. 2600 * Removes characters before <code>start</code> and at and after <code>limit</code>. 2601 * @param start the offset of the first character to retain 2602 * @param limit the offset immediately following the range to retain 2603 * @return a reference to this 2604 * @stable ICU 4.4 2605 */ 2606 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); 2607 2608 /* Length operations */ 2609 2610 /** 2611 * Pad the start of this UnicodeString with the character <TT>padChar</TT>. 2612 * If the length of this UnicodeString is less than targetLength, 2613 * length() - targetLength copies of padChar will be added to the 2614 * beginning of this UnicodeString. 2615 * @param targetLength the desired length of the string 2616 * @param padChar the character to use for padding. Defaults to 2617 * space (U+0020) 2618 * @return TRUE if the text was padded, FALSE otherwise. 2619 * @stable ICU 2.0 2620 */ 2621 UBool padLeading(int32_t targetLength, 2622 UChar padChar = 0x0020); 2623 2624 /** 2625 * Pad the end of this UnicodeString with the character <TT>padChar</TT>. 2626 * If the length of this UnicodeString is less than targetLength, 2627 * length() - targetLength copies of padChar will be added to the 2628 * end of this UnicodeString. 2629 * @param targetLength the desired length of the string 2630 * @param padChar the character to use for padding. Defaults to 2631 * space (U+0020) 2632 * @return TRUE if the text was padded, FALSE otherwise. 2633 * @stable ICU 2.0 2634 */ 2635 UBool padTrailing(int32_t targetLength, 2636 UChar padChar = 0x0020); 2637 2638 /** 2639 * Truncate this UnicodeString to the <TT>targetLength</TT>. 2640 * @param targetLength the desired length of this UnicodeString. 2641 * @return TRUE if the text was truncated, FALSE otherwise 2642 * @stable ICU 2.0 2643 */ 2644 inline UBool truncate(int32_t targetLength); 2645 2646 /** 2647 * Trims leading and trailing whitespace from this UnicodeString. 2648 * @return a reference to this 2649 * @stable ICU 2.0 2650 */ 2651 UnicodeString& trim(void); 2652 2653 2654 /* Miscellaneous operations */ 2655 2656 /** 2657 * Reverse this UnicodeString in place. 2658 * @return a reference to this 2659 * @stable ICU 2.0 2660 */ 2661 inline UnicodeString& reverse(void); 2662 2663 /** 2664 * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in 2665 * this UnicodeString. 2666 * @param start the start of the range to reverse 2667 * @param length the number of characters to to reverse 2668 * @return a reference to this 2669 * @stable ICU 2.0 2670 */ 2671 inline UnicodeString& reverse(int32_t start, 2672 int32_t length); 2673 2674 /** 2675 * Convert the characters in this to UPPER CASE following the conventions of 2676 * the default locale. 2677 * @return A reference to this. 2678 * @stable ICU 2.0 2679 */ 2680 UnicodeString& toUpper(void); 2681 2682 /** 2683 * Convert the characters in this to UPPER CASE following the conventions of 2684 * a specific locale. 2685 * @param locale The locale containing the conventions to use. 2686 * @return A reference to this. 2687 * @stable ICU 2.0 2688 */ 2689 UnicodeString& toUpper(const Locale& locale); 2690 2691 /** 2692 * Convert the characters in this to lower case following the conventions of 2693 * the default locale. 2694 * @return A reference to this. 2695 * @stable ICU 2.0 2696 */ 2697 UnicodeString& toLower(void); 2698 2699 /** 2700 * Convert the characters in this to lower case following the conventions of 2701 * a specific locale. 2702 * @param locale The locale containing the conventions to use. 2703 * @return A reference to this. 2704 * @stable ICU 2.0 2705 */ 2706 UnicodeString& toLower(const Locale& locale); 2707 2708 #if !UCONFIG_NO_BREAK_ITERATION 2709 2710 /** 2711 * Titlecase this string, convenience function using the default locale. 2712 * 2713 * Casing is locale-dependent and context-sensitive. 2714 * Titlecasing uses a break iterator to find the first characters of words 2715 * that are to be titlecased. It titlecases those characters and lowercases 2716 * all others. 2717 * 2718 * The titlecase break iterator can be provided to customize for arbitrary 2719 * styles, using rules and dictionaries beyond the standard iterators. 2720 * It may be more efficient to always provide an iterator to avoid 2721 * opening and closing one for each string. 2722 * The standard titlecase iterator for the root locale implements the 2723 * algorithm of Unicode TR 21. 2724 * 2725 * This function uses only the setText(), first() and next() methods of the 2726 * provided break iterator. 2727 * 2728 * @param titleIter A break iterator to find the first characters of words 2729 * that are to be titlecased. 2730 * If none is provided (0), then a standard titlecase 2731 * break iterator is opened. 2732 * Otherwise the provided iterator is set to the string's text. 2733 * @return A reference to this. 2734 * @stable ICU 2.1 2735 */ 2736 UnicodeString &toTitle(BreakIterator *titleIter); 2737 2738 /** 2739 * Titlecase this string. 2740 * 2741 * Casing is locale-dependent and context-sensitive. 2742 * Titlecasing uses a break iterator to find the first characters of words 2743 * that are to be titlecased. It titlecases those characters and lowercases 2744 * all others. 2745 * 2746 * The titlecase break iterator can be provided to customize for arbitrary 2747 * styles, using rules and dictionaries beyond the standard iterators. 2748 * It may be more efficient to always provide an iterator to avoid 2749 * opening and closing one for each string. 2750 * The standard titlecase iterator for the root locale implements the 2751 * algorithm of Unicode TR 21. 2752 * 2753 * This function uses only the setText(), first() and next() methods of the 2754 * provided break iterator. 2755 * 2756 * @param titleIter A break iterator to find the first characters of words 2757 * that are to be titlecased. 2758 * If none is provided (0), then a standard titlecase 2759 * break iterator is opened. 2760 * Otherwise the provided iterator is set to the string's text. 2761 * @param locale The locale to consider. 2762 * @return A reference to this. 2763 * @stable ICU 2.1 2764 */ 2765 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); 2766 2767 /** 2768 * Titlecase this string, with options. 2769 * 2770 * Casing is locale-dependent and context-sensitive. 2771 * Titlecasing uses a break iterator to find the first characters of words 2772 * that are to be titlecased. It titlecases those characters and lowercases 2773 * all others. (This can be modified with options.) 2774 * 2775 * The titlecase break iterator can be provided to customize for arbitrary 2776 * styles, using rules and dictionaries beyond the standard iterators. 2777 * It may be more efficient to always provide an iterator to avoid 2778 * opening and closing one for each string. 2779 * The standard titlecase iterator for the root locale implements the 2780 * algorithm of Unicode TR 21. 2781 * 2782 * This function uses only the setText(), first() and next() methods of the 2783 * provided break iterator. 2784 * 2785 * @param titleIter A break iterator to find the first characters of words 2786 * that are to be titlecased. 2787 * If none is provided (0), then a standard titlecase 2788 * break iterator is opened. 2789 * Otherwise the provided iterator is set to the string's text. 2790 * @param locale The locale to consider. 2791 * @param options Options bit set, see ucasemap_open(). 2792 * @return A reference to this. 2793 * @see U_TITLECASE_NO_LOWERCASE 2794 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT 2795 * @see ucasemap_open 2796 * @stable ICU 3.8 2797 */ 2798 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); 2799 2800 #endif 2801 2802 /** 2803 * Case-folds the characters in this string. 2804 * 2805 * Case-folding is locale-independent and not context-sensitive, 2806 * but there is an option for whether to include or exclude mappings for dotted I 2807 * and dotless i that are marked with 'T' in CaseFolding.txt. 2808 * 2809 * The result may be longer or shorter than the original. 2810 * 2811 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I 2812 * @return A reference to this. 2813 * @stable ICU 2.0 2814 */ 2815 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); 2816 2817 //======================================== 2818 // Access to the internal buffer 2819 //======================================== 2820 2821 /** 2822 * Get a read/write pointer to the internal buffer. 2823 * The buffer is guaranteed to be large enough for at least minCapacity UChars, 2824 * writable, and is still owned by the UnicodeString object. 2825 * Calls to getBuffer(minCapacity) must not be nested, and 2826 * must be matched with calls to releaseBuffer(newLength). 2827 * If the string buffer was read-only or shared, 2828 * then it will be reallocated and copied. 2829 * 2830 * An attempted nested call will return 0, and will not further modify the 2831 * state of the UnicodeString object. 2832 * It also returns 0 if the string is bogus. 2833 * 2834 * The actual capacity of the string buffer may be larger than minCapacity. 2835 * getCapacity() returns the actual capacity. 2836 * For many operations, the full capacity should be used to avoid reallocations. 2837 * 2838 * While the buffer is "open" between getBuffer(minCapacity) 2839 * and releaseBuffer(newLength), the following applies: 2840 * - The string length is set to 0. 2841 * - Any read API call on the UnicodeString object will behave like on a 0-length string. 2842 * - Any write API call on the UnicodeString object is disallowed and will have no effect. 2843 * - You can read from and write to the returned buffer. 2844 * - The previous string contents will still be in the buffer; 2845 * if you want to use it, then you need to call length() before getBuffer(minCapacity). 2846 * If the length() was greater than minCapacity, then any contents after minCapacity 2847 * may be lost. 2848 * The buffer contents is not NUL-terminated by getBuffer(). 2849 * If length()<getCapacity() then you can terminate it by writing a NUL 2850 * at index length(). 2851 * - You must call releaseBuffer(newLength) before and in order to 2852 * return to normal UnicodeString operation. 2853 * 2854 * @param minCapacity the minimum number of UChars that are to be available 2855 * in the buffer, starting at the returned pointer; 2856 * default to the current string capacity if minCapacity==-1 2857 * @return a writable pointer to the internal string buffer, 2858 * or 0 if an error occurs (nested calls, out of memory) 2859 * 2860 * @see releaseBuffer 2861 * @see getTerminatedBuffer() 2862 * @stable ICU 2.0 2863 */ 2864 UChar *getBuffer(int32_t minCapacity); 2865 2866 /** 2867 * Release a read/write buffer on a UnicodeString object with an 2868 * "open" getBuffer(minCapacity). 2869 * This function must be called in a matched pair with getBuffer(minCapacity). 2870 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open". 2871 * 2872 * It will set the string length to newLength, at most to the current capacity. 2873 * If newLength==-1 then it will set the length according to the 2874 * first NUL in the buffer, or to the capacity if there is no NUL. 2875 * 2876 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation. 2877 * 2878 * @param newLength the new length of the UnicodeString object; 2879 * defaults to the current capacity if newLength is greater than that; 2880 * if newLength==-1, it defaults to u_strlen(buffer) but not more than 2881 * the current capacity of the string 2882 * 2883 * @see getBuffer(int32_t minCapacity) 2884 * @stable ICU 2.0 2885 */ 2886 void releaseBuffer(int32_t newLength=-1); 2887 2888 /** 2889 * Get a read-only pointer to the internal buffer. 2890 * This can be called at any time on a valid UnicodeString. 2891 * 2892 * It returns 0 if the string is bogus, or 2893 * during an "open" getBuffer(minCapacity). 2894 * 2895 * It can be called as many times as desired. 2896 * The pointer that it returns will remain valid until the UnicodeString object is modified, 2897 * at which time the pointer is semantically invalidated and must not be used any more. 2898 * 2899 * The capacity of the buffer can be determined with getCapacity(). 2900 * The part after length() may or may not be initialized and valid, 2901 * depending on the history of the UnicodeString object. 2902 * 2903 * The buffer contents is (probably) not NUL-terminated. 2904 * You can check if it is with 2905 * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>. 2906 * (See getTerminatedBuffer().) 2907 * 2908 * The buffer may reside in read-only memory. Its contents must not 2909 * be modified. 2910 * 2911 * @return a read-only pointer to the internal string buffer, 2912 * or 0 if the string is empty or bogus 2913 * 2914 * @see getBuffer(int32_t minCapacity) 2915 * @see getTerminatedBuffer() 2916 * @stable ICU 2.0 2917 */ 2918 inline const UChar *getBuffer() const; 2919 2920 /** 2921 * Get a read-only pointer to the internal buffer, 2922 * making sure that it is NUL-terminated. 2923 * This can be called at any time on a valid UnicodeString. 2924 * 2925 * It returns 0 if the string is bogus, or 2926 * during an "open" getBuffer(minCapacity), or if the buffer cannot 2927 * be NUL-terminated (because memory allocation failed). 2928 * 2929 * It can be called as many times as desired. 2930 * The pointer that it returns will remain valid until the UnicodeString object is modified, 2931 * at which time the pointer is semantically invalidated and must not be used any more. 2932 * 2933 * The capacity of the buffer can be determined with getCapacity(). 2934 * The part after length()+1 may or may not be initialized and valid, 2935 * depending on the history of the UnicodeString object. 2936 * 2937 * The buffer contents is guaranteed to be NUL-terminated. 2938 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL 2939 * is written. 2940 * For this reason, this function is not const, unlike getBuffer(). 2941 * Note that a UnicodeString may also contain NUL characters as part of its contents. 2942 * 2943 * The buffer may reside in read-only memory. Its contents must not 2944 * be modified. 2945 * 2946 * @return a read-only pointer to the internal string buffer, 2947 * or 0 if the string is empty or bogus 2948 * 2949 * @see getBuffer(int32_t minCapacity) 2950 * @see getBuffer() 2951 * @stable ICU 2.2 2952 */ 2953 const UChar *getTerminatedBuffer(); 2954 2955 //======================================== 2956 // Constructors 2957 //======================================== 2958 2959 /** Construct an empty UnicodeString. 2960 * @stable ICU 2.0 2961 */ 2962 inline UnicodeString(); 2963 2964 /** 2965 * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars 2966 * @param capacity the number of UChars this UnicodeString should hold 2967 * before a resize is necessary; if count is greater than 0 and count 2968 * code points c take up more space than capacity, then capacity is adjusted 2969 * accordingly. 2970 * @param c is used to initially fill the string 2971 * @param count specifies how many code points c are to be written in the 2972 * string 2973 * @stable ICU 2.0 2974 */ 2975 UnicodeString(int32_t capacity, UChar32 c, int32_t count); 2976 2977 /** 2978 * Single UChar (code unit) constructor. 2979 * 2980 * It is recommended to mark this constructor "explicit" by 2981 * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> 2982 * on the compiler command line or similar. 2983 * @param ch the character to place in the UnicodeString 2984 * @stable ICU 2.0 2985 */ 2986 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch); 2987 2988 /** 2989 * Single UChar32 (code point) constructor. 2990 * 2991 * It is recommended to mark this constructor "explicit" by 2992 * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> 2993 * on the compiler command line or similar. 2994 * @param ch the character to place in the UnicodeString 2995 * @stable ICU 2.0 2996 */ 2997 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); 2998 2999 /** 3000 * UChar* constructor. 3001 * 3002 * It is recommended to mark this constructor "explicit" by 3003 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 3004 * on the compiler command line or similar. 3005 * @param text The characters to place in the UnicodeString. <TT>text</TT> 3006 * must be NULL (U+0000) terminated. 3007 * @stable ICU 2.0 3008 */ 3009 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); 3010 3011 /** 3012 * UChar* constructor. 3013 * @param text The characters to place in the UnicodeString. 3014 * @param textLength The number of Unicode characters in <TT>text</TT> 3015 * to copy. 3016 * @stable ICU 2.0 3017 */ 3018 UnicodeString(const UChar *text, 3019 int32_t textLength); 3020 3021 /** 3022 * Readonly-aliasing UChar* constructor. 3023 * The text will be used for the UnicodeString object, but 3024 * it will not be released when the UnicodeString is destroyed. 3025 * This has copy-on-write semantics: 3026 * When the string is modified, then the buffer is first copied into 3027 * newly allocated memory. 3028 * The aliased buffer is never modified. 3029 * 3030 * In an assignment to another UnicodeString, when using the copy constructor 3031 * or the assignment operator, the text will be copied. 3032 * When using fastCopyFrom(), the text will be aliased again, 3033 * so that both strings then alias the same readonly-text. 3034 * 3035 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. 3036 * This must be true if <code>textLength==-1</code>. 3037 * @param text The characters to alias for the UnicodeString. 3038 * @param textLength The number of Unicode characters in <code>text</code> to alias. 3039 * If -1, then this constructor will determine the length 3040 * by calling <code>u_strlen()</code>. 3041 * @stable ICU 2.0 3042 */ 3043 UnicodeString(UBool isTerminated, 3044 const UChar *text, 3045 int32_t textLength); 3046 3047 /** 3048 * Writable-aliasing UChar* constructor. 3049 * The text will be used for the UnicodeString object, but 3050 * it will not be released when the UnicodeString is destroyed. 3051 * This has write-through semantics: 3052 * For as long as the capacity of the buffer is sufficient, write operations 3053 * will directly affect the buffer. When more capacity is necessary, then 3054 * a new buffer will be allocated and the contents copied as with regularly 3055 * constructed strings. 3056 * In an assignment to another UnicodeString, the buffer will be copied. 3057 * The extract(UChar *dst) function detects whether the dst pointer is the same 3058 * as the string buffer itself and will in this case not copy the contents. 3059 * 3060 * @param buffer The characters to alias for the UnicodeString. 3061 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. 3062 * @param buffCapacity The size of <code>buffer</code> in UChars. 3063 * @stable ICU 2.0 3064 */ 3065 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); 3066 3067 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 3068 3069 /** 3070 * char* constructor. 3071 * Uses the default converter (and thus depends on the ICU conversion code) 3072 * unless U_CHARSET_IS_UTF8 is set to 1. 3073 * 3074 * For ASCII (really "invariant character") strings it is more efficient to use 3075 * the constructor that takes a US_INV (for its enum EInvariant). 3076 * For ASCII (invariant-character) string literals, see UNICODE_STRING and 3077 * UNICODE_STRING_SIMPLE. 3078 * 3079 * It is recommended to mark this constructor "explicit" by 3080 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 3081 * on the compiler command line or similar. 3082 * @param codepageData an array of bytes, null-terminated, 3083 * in the platform's default codepage. 3084 * @stable ICU 2.0 3085 * @see UNICODE_STRING 3086 * @see UNICODE_STRING_SIMPLE 3087 */ 3088 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); 3089 3090 /** 3091 * char* constructor. 3092 * Uses the default converter (and thus depends on the ICU conversion code) 3093 * unless U_CHARSET_IS_UTF8 is set to 1. 3094 * @param codepageData an array of bytes in the platform's default codepage. 3095 * @param dataLength The number of bytes in <TT>codepageData</TT>. 3096 * @stable ICU 2.0 3097 */ 3098 UnicodeString(const char *codepageData, int32_t dataLength); 3099 3100 #endif 3101 3102 #if !UCONFIG_NO_CONVERSION 3103 3104 /** 3105 * char* constructor. 3106 * @param codepageData an array of bytes, null-terminated 3107 * @param codepage the encoding of <TT>codepageData</TT>. The special 3108 * value 0 for <TT>codepage</TT> indicates that the text is in the 3109 * platform's default codepage. 3110 * 3111 * If <code>codepage</code> is an empty string (<code>""</code>), 3112 * then a simple conversion is performed on the codepage-invariant 3113 * subset ("invariant characters") of the platform encoding. See utypes.h. 3114 * Recommendation: For invariant-character strings use the constructor 3115 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 3116 * because it avoids object code dependencies of UnicodeString on 3117 * the conversion code. 3118 * 3119 * @stable ICU 2.0 3120 */ 3121 UnicodeString(const char *codepageData, const char *codepage); 3122 3123 /** 3124 * char* constructor. 3125 * @param codepageData an array of bytes. 3126 * @param dataLength The number of bytes in <TT>codepageData</TT>. 3127 * @param codepage the encoding of <TT>codepageData</TT>. The special 3128 * value 0 for <TT>codepage</TT> indicates that the text is in the 3129 * platform's default codepage. 3130 * If <code>codepage</code> is an empty string (<code>""</code>), 3131 * then a simple conversion is performed on the codepage-invariant 3132 * subset ("invariant characters") of the platform encoding. See utypes.h. 3133 * Recommendation: For invariant-character strings use the constructor 3134 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 3135 * because it avoids object code dependencies of UnicodeString on 3136 * the conversion code. 3137 * 3138 * @stable ICU 2.0 3139 */ 3140 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); 3141 3142 /** 3143 * char * / UConverter constructor. 3144 * This constructor uses an existing UConverter object to 3145 * convert the codepage string to Unicode and construct a UnicodeString 3146 * from that. 3147 * 3148 * The converter is reset at first. 3149 * If the error code indicates a failure before this constructor is called, 3150 * or if an error occurs during conversion or construction, 3151 * then the string will be bogus. 3152 * 3153 * This function avoids the overhead of opening and closing a converter if 3154 * multiple strings are constructed. 3155 * 3156 * @param src input codepage string 3157 * @param srcLength length of the input string, can be -1 for NUL-terminated strings 3158 * @param cnv converter object (ucnv_resetToUnicode() will be called), 3159 * can be NULL for the default converter 3160 * @param errorCode normal ICU error code 3161 * @stable ICU 2.0 3162 */ 3163 UnicodeString( 3164 const char *src, int32_t srcLength, 3165 UConverter *cnv, 3166 UErrorCode &errorCode); 3167 3168 #endif 3169 3170 /** 3171 * Constructs a Unicode string from an invariant-character char * string. 3172 * About invariant characters see utypes.h. 3173 * This constructor has no runtime dependency on conversion code and is 3174 * therefore recommended over ones taking a charset name string 3175 * (where the empty string "" indicates invariant-character conversion). 3176 * 3177 * Use the macro US_INV as the third, signature-distinguishing parameter. 3178 * 3179 * For example: 3180 * \code 3181 * void fn(const char *s) { 3182 * UnicodeString ustr(s, -1, US_INV); 3183 * // use ustr ... 3184 * } 3185 * \endcode 3186 * 3187 * @param src String using only invariant characters. 3188 * @param length Length of src, or -1 if NUL-terminated. 3189 * @param inv Signature-distinguishing paramater, use US_INV. 3190 * 3191 * @see US_INV 3192 * @stable ICU 3.2 3193 */ 3194 UnicodeString(const char *src, int32_t length, enum EInvariant inv); 3195 3196 3197 /** 3198 * Copy constructor. 3199 * 3200 * Starting with ICU 2.4, the assignment operator and the copy constructor 3201 * allocate a new buffer and copy the buffer contents even for readonly aliases. 3202 * By contrast, the fastCopyFrom() function implements the old, 3203 * more efficient but less safe behavior 3204 * of making this string also a readonly alias to the same buffer. 3205 * 3206 * If the source object has an "open" buffer from getBuffer(minCapacity), 3207 * then the copy is an empty string. 3208 * 3209 * @param that The UnicodeString object to copy. 3210 * @stable ICU 2.0 3211 * @see fastCopyFrom 3212 */ 3213 UnicodeString(const UnicodeString& that); 3214 3215 #ifndef U_HIDE_DRAFT_API 3216 #if U_HAVE_RVALUE_REFERENCES 3217 /** 3218 * Move constructor, might leave src in bogus state. 3219 * This string will have the same contents and state that the source string had. 3220 * @param src source string 3221 * @draft ICU 56 3222 */ 3223 UnicodeString(UnicodeString &&src) U_NOEXCEPT; 3224 #endif 3225 #endif /* U_HIDE_DRAFT_API */ 3226 3227 /** 3228 * 'Substring' constructor from tail of source string. 3229 * @param src The UnicodeString object to copy. 3230 * @param srcStart The offset into <tt>src</tt> at which to start copying. 3231 * @stable ICU 2.2 3232 */ 3233 UnicodeString(const UnicodeString& src, int32_t srcStart); 3234 3235 /** 3236 * 'Substring' constructor from subrange of source string. 3237 * @param src The UnicodeString object to copy. 3238 * @param srcStart The offset into <tt>src</tt> at which to start copying. 3239 * @param srcLength The number of characters from <tt>src</tt> to copy. 3240 * @stable ICU 2.2 3241 */ 3242 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 3243 3244 /** 3245 * Clone this object, an instance of a subclass of Replaceable. 3246 * Clones can be used concurrently in multiple threads. 3247 * If a subclass does not implement clone(), or if an error occurs, 3248 * then NULL is returned. 3249 * The clone functions in all subclasses return a pointer to a Replaceable 3250 * because some compilers do not support covariant (same-as-this) 3251 * return types; cast to the appropriate subclass if necessary. 3252 * The caller must delete the clone. 3253 * 3254 * @return a clone of this object 3255 * 3256 * @see Replaceable::clone 3257 * @see getDynamicClassID 3258 * @stable ICU 2.6 3259 */ 3260 virtual Replaceable *clone() const; 3261 3262 /** Destructor. 3263 * @stable ICU 2.0 3264 */ 3265 virtual ~UnicodeString(); 3266 3267 /** 3268 * Create a UnicodeString from a UTF-8 string. 3269 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 3270 * Calls u_strFromUTF8WithSub(). 3271 * 3272 * @param utf8 UTF-8 input string. 3273 * Note that a StringPiece can be implicitly constructed 3274 * from a std::string or a NUL-terminated const char * string. 3275 * @return A UnicodeString with equivalent UTF-16 contents. 3276 * @see toUTF8 3277 * @see toUTF8String 3278 * @stable ICU 4.2 3279 */ 3280 static UnicodeString fromUTF8(const StringPiece &utf8); 3281 3282 /** 3283 * Create a UnicodeString from a UTF-32 string. 3284 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 3285 * Calls u_strFromUTF32WithSub(). 3286 * 3287 * @param utf32 UTF-32 input string. Must not be NULL. 3288 * @param length Length of the input string, or -1 if NUL-terminated. 3289 * @return A UnicodeString with equivalent UTF-16 contents. 3290 * @see toUTF32 3291 * @stable ICU 4.2 3292 */ 3293 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); 3294 3295 /* Miscellaneous operations */ 3296 3297 /** 3298 * Unescape a string of characters and return a string containing 3299 * the result. The following escape sequences are recognized: 3300 * 3301 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] 3302 * \\Uhhhhhhhh 8 hex digits 3303 * \\xhh 1-2 hex digits 3304 * \\ooo 1-3 octal digits; o in [0-7] 3305 * \\cX control-X; X is masked with 0x1F 3306 * 3307 * as well as the standard ANSI C escapes: 3308 * 3309 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, 3310 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, 3311 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C 3312 * 3313 * Anything else following a backslash is generically escaped. For 3314 * example, "[a\\-z]" returns "[a-z]". 3315 * 3316 * If an escape sequence is ill-formed, this method returns an empty 3317 * string. An example of an ill-formed sequence is "\\u" followed by 3318 * fewer than 4 hex digits. 3319 * 3320 * This function is similar to u_unescape() but not identical to it. 3321 * The latter takes a source char*, so it does escape recognition 3322 * and also invariant conversion. 3323 * 3324 * @return a string with backslash escapes interpreted, or an 3325 * empty string on error. 3326 * @see UnicodeString#unescapeAt() 3327 * @see u_unescape() 3328 * @see u_unescapeAt() 3329 * @stable ICU 2.0 3330 */ 3331 UnicodeString unescape() const; 3332 3333 /** 3334 * Unescape a single escape sequence and return the represented 3335 * character. See unescape() for a listing of the recognized escape 3336 * sequences. The character at offset-1 is assumed (without 3337 * checking) to be a backslash. If the escape sequence is 3338 * ill-formed, or the offset is out of range, U_SENTINEL=-1 is 3339 * returned. 3340 * 3341 * @param offset an input output parameter. On input, it is the 3342 * offset into this string where the escape sequence is located, 3343 * after the initial backslash. On output, it is advanced after the 3344 * last character parsed. On error, it is not advanced at all. 3345 * @return the character represented by the escape sequence at 3346 * offset, or U_SENTINEL=-1 on error. 3347 * @see UnicodeString#unescape() 3348 * @see u_unescape() 3349 * @see u_unescapeAt() 3350 * @stable ICU 2.0 3351 */ 3352 UChar32 unescapeAt(int32_t &offset) const; 3353 3354 /** 3355 * ICU "poor man's RTTI", returns a UClassID for this class. 3356 * 3357 * @stable ICU 2.2 3358 */ 3359 static UClassID U_EXPORT2 getStaticClassID(); 3360 3361 /** 3362 * ICU "poor man's RTTI", returns a UClassID for the actual class. 3363 * 3364 * @stable ICU 2.2 3365 */ 3366 virtual UClassID getDynamicClassID() const; 3367 3368 //======================================== 3369 // Implementation methods 3370 //======================================== 3371 3372 protected: 3373 /** 3374 * Implement Replaceable::getLength() (see jitterbug 1027). 3375 * @stable ICU 2.4 3376 */ 3377 virtual int32_t getLength() const; 3378 3379 /** 3380 * The change in Replaceable to use virtual getCharAt() allows 3381 * UnicodeString::charAt() to be inline again (see jitterbug 709). 3382 * @stable ICU 2.4 3383 */ 3384 virtual UChar getCharAt(int32_t offset) const; 3385 3386 /** 3387 * The change in Replaceable to use virtual getChar32At() allows 3388 * UnicodeString::char32At() to be inline again (see jitterbug 709). 3389 * @stable ICU 2.4 3390 */ 3391 virtual UChar32 getChar32At(int32_t offset) const; 3392 3393 private: 3394 // For char* constructors. Could be made public. 3395 UnicodeString &setToUTF8(const StringPiece &utf8); 3396 // For extract(char*). 3397 // We could make a toUTF8(target, capacity, errorCode) public but not 3398 // this version: New API will be cleaner if we make callers create substrings 3399 // rather than having start+length on every method, 3400 // and it should take a UErrorCode&. 3401 int32_t 3402 toUTF8(int32_t start, int32_t len, 3403 char *target, int32_t capacity) const; 3404 3405 /** 3406 * Internal string contents comparison, called by operator==. 3407 * Requires: this & text not bogus and have same lengths. 3408 */ 3409 UBool doEquals(const UnicodeString &text, int32_t len) const; 3410 3411 inline int8_t 3412 doCompare(int32_t start, 3413 int32_t length, 3414 const UnicodeString& srcText, 3415 int32_t srcStart, 3416 int32_t srcLength) const; 3417 3418 int8_t doCompare(int32_t start, 3419 int32_t length, 3420 const UChar *srcChars, 3421 int32_t srcStart, 3422 int32_t srcLength) const; 3423 3424 inline int8_t 3425 doCompareCodePointOrder(int32_t start, 3426 int32_t length, 3427 const UnicodeString& srcText, 3428 int32_t srcStart, 3429 int32_t srcLength) const; 3430 3431 int8_t doCompareCodePointOrder(int32_t start, 3432 int32_t length, 3433 const UChar *srcChars, 3434 int32_t srcStart, 3435 int32_t srcLength) const; 3436 3437 inline int8_t 3438 doCaseCompare(int32_t start, 3439 int32_t length, 3440 const UnicodeString &srcText, 3441 int32_t srcStart, 3442 int32_t srcLength, 3443 uint32_t options) const; 3444 3445 int8_t 3446 doCaseCompare(int32_t start, 3447 int32_t length, 3448 const UChar *srcChars, 3449 int32_t srcStart, 3450 int32_t srcLength, 3451 uint32_t options) const; 3452 3453 int32_t doIndexOf(UChar c, 3454 int32_t start, 3455 int32_t length) const; 3456 3457 int32_t doIndexOf(UChar32 c, 3458 int32_t start, 3459 int32_t length) const; 3460 3461 int32_t doLastIndexOf(UChar c, 3462 int32_t start, 3463 int32_t length) const; 3464 3465 int32_t doLastIndexOf(UChar32 c, 3466 int32_t start, 3467 int32_t length) const; 3468 3469 void doExtract(int32_t start, 3470 int32_t length, 3471 UChar *dst, 3472 int32_t dstStart) const; 3473 3474 inline void doExtract(int32_t start, 3475 int32_t length, 3476 UnicodeString& target) const; 3477 3478 inline UChar doCharAt(int32_t offset) const; 3479 3480 UnicodeString& doReplace(int32_t start, 3481 int32_t length, 3482 const UnicodeString& srcText, 3483 int32_t srcStart, 3484 int32_t srcLength); 3485 3486 UnicodeString& doReplace(int32_t start, 3487 int32_t length, 3488 const UChar *srcChars, 3489 int32_t srcStart, 3490 int32_t srcLength); 3491 3492 UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 3493 UnicodeString& doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength); 3494 3495 UnicodeString& doReverse(int32_t start, 3496 int32_t length); 3497 3498 // calculate hash code 3499 int32_t doHashCode(void) const; 3500 3501 // get pointer to start of array 3502 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 3503 inline UChar* getArrayStart(void); 3504 inline const UChar* getArrayStart(void) const; 3505 3506 inline UBool hasShortLength() const; 3507 inline int32_t getShortLength() const; 3508 3509 // A UnicodeString object (not necessarily its current buffer) 3510 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 3511 inline UBool isWritable() const; 3512 3513 // Is the current buffer writable? 3514 inline UBool isBufferWritable() const; 3515 3516 // None of the following does releaseArray(). 3517 inline void setZeroLength(); 3518 inline void setShortLength(int32_t len); 3519 inline void setLength(int32_t len); 3520 inline void setToEmpty(); 3521 inline void setArray(UChar *array, int32_t len, int32_t capacity); // sets length but not flags 3522 3523 // allocate the array; result may be the stack buffer 3524 // sets refCount to 1 if appropriate 3525 // sets fArray, fCapacity, and flags 3526 // sets length to 0 3527 // returns boolean for success or failure 3528 UBool allocate(int32_t capacity); 3529 3530 // release the array if owned 3531 void releaseArray(void); 3532 3533 // turn a bogus string into an empty one 3534 void unBogus(); 3535 3536 // implements assigment operator, copy constructor, and fastCopyFrom() 3537 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); 3538 3539 // Copies just the fields without memory management. 3540 void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT; 3541 3542 // Pin start and limit to acceptable values. 3543 inline void pinIndex(int32_t& start) const; 3544 inline void pinIndices(int32_t& start, 3545 int32_t& length) const; 3546 3547 #if !UCONFIG_NO_CONVERSION 3548 3549 /* Internal extract() using UConverter. */ 3550 int32_t doExtract(int32_t start, int32_t length, 3551 char *dest, int32_t destCapacity, 3552 UConverter *cnv, 3553 UErrorCode &errorCode) const; 3554 3555 /* 3556 * Real constructor for converting from codepage data. 3557 * It assumes that it is called with !fRefCounted. 3558 * 3559 * If <code>codepage==0</code>, then the default converter 3560 * is used for the platform encoding. 3561 * If <code>codepage</code> is an empty string (<code>""</code>), 3562 * then a simple conversion is performed on the codepage-invariant 3563 * subset ("invariant characters") of the platform encoding. See utypes.h. 3564 */ 3565 void doCodepageCreate(const char *codepageData, 3566 int32_t dataLength, 3567 const char *codepage); 3568 3569 /* 3570 * Worker function for creating a UnicodeString from 3571 * a codepage string using a UConverter. 3572 */ 3573 void 3574 doCodepageCreate(const char *codepageData, 3575 int32_t dataLength, 3576 UConverter *converter, 3577 UErrorCode &status); 3578 3579 #endif 3580 3581 /* 3582 * This function is called when write access to the array 3583 * is necessary. 3584 * 3585 * We need to make a copy of the array if 3586 * the buffer is read-only, or 3587 * the buffer is refCounted (shared), and refCount>1, or 3588 * the buffer is too small. 3589 * 3590 * Return FALSE if memory could not be allocated. 3591 */ 3592 UBool cloneArrayIfNeeded(int32_t newCapacity = -1, 3593 int32_t growCapacity = -1, 3594 UBool doCopyArray = TRUE, 3595 int32_t **pBufferToDelete = 0, 3596 UBool forceClone = FALSE); 3597 3598 /** 3599 * Common function for UnicodeString case mappings. 3600 * The stringCaseMapper has the same type UStringCaseMapper 3601 * as in ustr_imp.h for ustrcase_map(). 3602 */ 3603 UnicodeString & 3604 caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper); 3605 3606 // ref counting 3607 void addRef(void); 3608 int32_t removeRef(void); 3609 int32_t refCount(void) const; 3610 3611 // constants 3612 enum { 3613 /** 3614 * Size of stack buffer for short strings. 3615 * Must be at least U16_MAX_LENGTH for the single-code point constructor to work. 3616 * @see UNISTR_OBJECT_SIZE 3617 */ 3618 US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR, 3619 kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index) 3620 kGrowSize=128, // grow size for this buffer 3621 kInvalidHashCode=0, // invalid hash code 3622 kEmptyHashCode=1, // hash code for empty string 3623 3624 // bit flag values for fLengthAndFlags 3625 kIsBogus=1, // this string is bogus, i.e., not valid or NULL 3626 kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields 3627 kRefCounted=4, // there is a refCount field before the characters in fArray 3628 kBufferIsReadonly=8,// do not write to this buffer 3629 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), 3630 // and releaseBuffer(newLength) must be called 3631 kAllStorageFlags=0x1f, 3632 3633 kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long 3634 kLength1=1<<kLengthShift, 3635 kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0) 3636 kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength 3637 3638 // combined values for convenience 3639 kShortString=kUsingStackBuffer, 3640 kLongString=kRefCounted, 3641 kReadonlyAlias=kBufferIsReadonly, 3642 kWritableAlias=0 3643 }; 3644 3645 friend class UnicodeStringAppendable; 3646 3647 union StackBufferOrFields; // forward declaration necessary before friend declaration 3648 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion 3649 3650 /* 3651 * The following are all the class fields that are stored 3652 * in each UnicodeString object. 3653 * Note that UnicodeString has virtual functions, 3654 * therefore there is an implicit vtable pointer 3655 * as the first real field. 3656 * The fields should be aligned such that no padding is necessary. 3657 * On 32-bit machines, the size should be 32 bytes, 3658 * on 64-bit machines (8-byte pointers), it should be 40 bytes. 3659 * 3660 * We use a hack to achieve this. 3661 * 3662 * With at least some compilers, each of the following is forced to 3663 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], 3664 * rounded up with additional padding if the fields do not already fit that requirement: 3665 * - sizeof(class UnicodeString) 3666 * - offsetof(UnicodeString, fUnion) 3667 * - sizeof(fUnion) 3668 * - sizeof(fStackFields) 3669 * 3670 * We optimize for the longest possible internal buffer for short strings. 3671 * fUnion.fStackFields begins with 2 bytes for storage flags 3672 * and the length of relatively short strings, 3673 * followed by the buffer for short string contents. 3674 * There is no padding inside fStackFields. 3675 * 3676 * Heap-allocated and aliased strings use fUnion.fFields. 3677 * Both fStackFields and fFields must begin with the same fields for flags and short length, 3678 * that is, those must have the same memory offsets inside the object, 3679 * because the flags must be inspected in order to decide which half of fUnion is being used. 3680 * We assume that the compiler does not reorder the fields. 3681 * 3682 * (Padding at the end of fFields is ok: 3683 * As long as it is no larger than fStackFields, it is not wasted space.) 3684 * 3685 * For some of the history of the UnicodeString class fields layout, see 3686 * - ICU ticket #11551 "longer UnicodeString contents in stack buffer" 3687 * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays" 3688 * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?" 3689 */ 3690 // (implicit) *vtable; 3691 union StackBufferOrFields { 3692 // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used. 3693 // Each struct of the union must begin with fLengthAndFlags. 3694 struct { 3695 int16_t fLengthAndFlags; // bit fields: see constants above 3696 UChar fBuffer[US_STACKBUF_SIZE]; // buffer for short strings 3697 } fStackFields; 3698 struct { 3699 int16_t fLengthAndFlags; // bit fields: see constants above 3700 int32_t fLength; // number of characters in fArray if >127; else undefined 3701 int32_t fCapacity; // capacity of fArray (in UChars) 3702 // array pointer last to minimize padding for machines with P128 data model 3703 // or pointer sizes that are not a power of 2 3704 UChar *fArray; // the Unicode data 3705 } fFields; 3706 } fUnion; 3707 }; 3708 3709 /** 3710 * Create a new UnicodeString with the concatenation of two others. 3711 * 3712 * @param s1 The first string to be copied to the new one. 3713 * @param s2 The second string to be copied to the new one, after s1. 3714 * @return UnicodeString(s1).append(s2) 3715 * @stable ICU 2.8 3716 */ 3717 U_COMMON_API UnicodeString U_EXPORT2 3718 operator+ (const UnicodeString &s1, const UnicodeString &s2); 3719 3720 //======================================== 3721 // Inline members 3722 //======================================== 3723 3724 //======================================== 3725 // Privates 3726 //======================================== 3727 3728 inline void 3729 UnicodeString::pinIndex(int32_t& start) const 3730 { 3731 // pin index 3732 if(start < 0) { 3733 start = 0; 3734 } else if(start > length()) { 3735 start = length(); 3736 } 3737 } 3738 3739 inline void 3740 UnicodeString::pinIndices(int32_t& start, 3741 int32_t& _length) const 3742 { 3743 // pin indices 3744 int32_t len = length(); 3745 if(start < 0) { 3746 start = 0; 3747 } else if(start > len) { 3748 start = len; 3749 } 3750 if(_length < 0) { 3751 _length = 0; 3752 } else if(_length > (len - start)) { 3753 _length = (len - start); 3754 } 3755 } 3756 3757 inline UChar* 3758 UnicodeString::getArrayStart() { 3759 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 3760 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; 3761 } 3762 3763 inline const UChar* 3764 UnicodeString::getArrayStart() const { 3765 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 3766 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; 3767 } 3768 3769 //======================================== 3770 // Default constructor 3771 //======================================== 3772 3773 inline 3774 UnicodeString::UnicodeString() { 3775 fUnion.fStackFields.fLengthAndFlags=kShortString; 3776 } 3777 3778 //======================================== 3779 // Read-only implementation methods 3780 //======================================== 3781 inline UBool 3782 UnicodeString::hasShortLength() const { 3783 return fUnion.fFields.fLengthAndFlags>=0; 3784 } 3785 3786 inline int32_t 3787 UnicodeString::getShortLength() const { 3788 // fLengthAndFlags must be non-negative -> short length >= 0 3789 // and arithmetic or logical shift does not matter. 3790 return fUnion.fFields.fLengthAndFlags>>kLengthShift; 3791 } 3792 3793 inline int32_t 3794 UnicodeString::length() const { 3795 return hasShortLength() ? getShortLength() : fUnion.fFields.fLength; 3796 } 3797 3798 inline int32_t 3799 UnicodeString::getCapacity() const { 3800 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 3801 US_STACKBUF_SIZE : fUnion.fFields.fCapacity; 3802 } 3803 3804 inline int32_t 3805 UnicodeString::hashCode() const 3806 { return doHashCode(); } 3807 3808 inline UBool 3809 UnicodeString::isBogus() const 3810 { return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); } 3811 3812 inline UBool 3813 UnicodeString::isWritable() const 3814 { return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); } 3815 3816 inline UBool 3817 UnicodeString::isBufferWritable() const 3818 { 3819 return (UBool)( 3820 !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && 3821 (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1)); 3822 } 3823 3824 inline const UChar * 3825 UnicodeString::getBuffer() const { 3826 if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) { 3827 return 0; 3828 } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { 3829 return fUnion.fStackFields.fBuffer; 3830 } else { 3831 return fUnion.fFields.fArray; 3832 } 3833 } 3834 3835 //======================================== 3836 // Read-only alias methods 3837 //======================================== 3838 inline int8_t 3839 UnicodeString::doCompare(int32_t start, 3840 int32_t thisLength, 3841 const UnicodeString& srcText, 3842 int32_t srcStart, 3843 int32_t srcLength) const 3844 { 3845 if(srcText.isBogus()) { 3846 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 3847 } else { 3848 srcText.pinIndices(srcStart, srcLength); 3849 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 3850 } 3851 } 3852 3853 inline UBool 3854 UnicodeString::operator== (const UnicodeString& text) const 3855 { 3856 if(isBogus()) { 3857 return text.isBogus(); 3858 } else { 3859 int32_t len = length(), textLength = text.length(); 3860 return !text.isBogus() && len == textLength && doEquals(text, len); 3861 } 3862 } 3863 3864 inline UBool 3865 UnicodeString::operator!= (const UnicodeString& text) const 3866 { return (! operator==(text)); } 3867 3868 inline UBool 3869 UnicodeString::operator> (const UnicodeString& text) const 3870 { return doCompare(0, length(), text, 0, text.length()) == 1; } 3871 3872 inline UBool 3873 UnicodeString::operator< (const UnicodeString& text) const 3874 { return doCompare(0, length(), text, 0, text.length()) == -1; } 3875 3876 inline UBool 3877 UnicodeString::operator>= (const UnicodeString& text) const 3878 { return doCompare(0, length(), text, 0, text.length()) != -1; } 3879 3880 inline UBool 3881 UnicodeString::operator<= (const UnicodeString& text) const 3882 { return doCompare(0, length(), text, 0, text.length()) != 1; } 3883 3884 inline int8_t 3885 UnicodeString::compare(const UnicodeString& text) const 3886 { return doCompare(0, length(), text, 0, text.length()); } 3887 3888 inline int8_t 3889 UnicodeString::compare(int32_t start, 3890 int32_t _length, 3891 const UnicodeString& srcText) const 3892 { return doCompare(start, _length, srcText, 0, srcText.length()); } 3893 3894 inline int8_t 3895 UnicodeString::compare(const UChar *srcChars, 3896 int32_t srcLength) const 3897 { return doCompare(0, length(), srcChars, 0, srcLength); } 3898 3899 inline int8_t 3900 UnicodeString::compare(int32_t start, 3901 int32_t _length, 3902 const UnicodeString& srcText, 3903 int32_t srcStart, 3904 int32_t srcLength) const 3905 { return doCompare(start, _length, srcText, srcStart, srcLength); } 3906 3907 inline int8_t 3908 UnicodeString::compare(int32_t start, 3909 int32_t _length, 3910 const UChar *srcChars) const 3911 { return doCompare(start, _length, srcChars, 0, _length); } 3912 3913 inline int8_t 3914 UnicodeString::compare(int32_t start, 3915 int32_t _length, 3916 const UChar *srcChars, 3917 int32_t srcStart, 3918 int32_t srcLength) const 3919 { return doCompare(start, _length, srcChars, srcStart, srcLength); } 3920 3921 inline int8_t 3922 UnicodeString::compareBetween(int32_t start, 3923 int32_t limit, 3924 const UnicodeString& srcText, 3925 int32_t srcStart, 3926 int32_t srcLimit) const 3927 { return doCompare(start, limit - start, 3928 srcText, srcStart, srcLimit - srcStart); } 3929 3930 inline int8_t 3931 UnicodeString::doCompareCodePointOrder(int32_t start, 3932 int32_t thisLength, 3933 const UnicodeString& srcText, 3934 int32_t srcStart, 3935 int32_t srcLength) const 3936 { 3937 if(srcText.isBogus()) { 3938 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 3939 } else { 3940 srcText.pinIndices(srcStart, srcLength); 3941 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 3942 } 3943 } 3944 3945 inline int8_t 3946 UnicodeString::compareCodePointOrder(const UnicodeString& text) const 3947 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); } 3948 3949 inline int8_t 3950 UnicodeString::compareCodePointOrder(int32_t start, 3951 int32_t _length, 3952 const UnicodeString& srcText) const 3953 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } 3954 3955 inline int8_t 3956 UnicodeString::compareCodePointOrder(const UChar *srcChars, 3957 int32_t srcLength) const 3958 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } 3959 3960 inline int8_t 3961 UnicodeString::compareCodePointOrder(int32_t start, 3962 int32_t _length, 3963 const UnicodeString& srcText, 3964 int32_t srcStart, 3965 int32_t srcLength) const 3966 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } 3967 3968 inline int8_t 3969 UnicodeString::compareCodePointOrder(int32_t start, 3970 int32_t _length, 3971 const UChar *srcChars) const 3972 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } 3973 3974 inline int8_t 3975 UnicodeString::compareCodePointOrder(int32_t start, 3976 int32_t _length, 3977 const UChar *srcChars, 3978 int32_t srcStart, 3979 int32_t srcLength) const 3980 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } 3981 3982 inline int8_t 3983 UnicodeString::compareCodePointOrderBetween(int32_t start, 3984 int32_t limit, 3985 const UnicodeString& srcText, 3986 int32_t srcStart, 3987 int32_t srcLimit) const 3988 { return doCompareCodePointOrder(start, limit - start, 3989 srcText, srcStart, srcLimit - srcStart); } 3990 3991 inline int8_t 3992 UnicodeString::doCaseCompare(int32_t start, 3993 int32_t thisLength, 3994 const UnicodeString &srcText, 3995 int32_t srcStart, 3996 int32_t srcLength, 3997 uint32_t options) const 3998 { 3999 if(srcText.isBogus()) { 4000 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 4001 } else { 4002 srcText.pinIndices(srcStart, srcLength); 4003 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); 4004 } 4005 } 4006 4007 inline int8_t 4008 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { 4009 return doCaseCompare(0, length(), text, 0, text.length(), options); 4010 } 4011 4012 inline int8_t 4013 UnicodeString::caseCompare(int32_t start, 4014 int32_t _length, 4015 const UnicodeString &srcText, 4016 uint32_t options) const { 4017 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); 4018 } 4019 4020 inline int8_t 4021 UnicodeString::caseCompare(const UChar *srcChars, 4022 int32_t srcLength, 4023 uint32_t options) const { 4024 return doCaseCompare(0, length(), srcChars, 0, srcLength, options); 4025 } 4026 4027 inline int8_t 4028 UnicodeString::caseCompare(int32_t start, 4029 int32_t _length, 4030 const UnicodeString &srcText, 4031 int32_t srcStart, 4032 int32_t srcLength, 4033 uint32_t options) const { 4034 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); 4035 } 4036 4037 inline int8_t 4038 UnicodeString::caseCompare(int32_t start, 4039 int32_t _length, 4040 const UChar *srcChars, 4041 uint32_t options) const { 4042 return doCaseCompare(start, _length, srcChars, 0, _length, options); 4043 } 4044 4045 inline int8_t 4046 UnicodeString::caseCompare(int32_t start, 4047 int32_t _length, 4048 const UChar *srcChars, 4049 int32_t srcStart, 4050 int32_t srcLength, 4051 uint32_t options) const { 4052 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); 4053 } 4054 4055 inline int8_t 4056 UnicodeString::caseCompareBetween(int32_t start, 4057 int32_t limit, 4058 const UnicodeString &srcText, 4059 int32_t srcStart, 4060 int32_t srcLimit, 4061 uint32_t options) const { 4062 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); 4063 } 4064 4065 inline int32_t 4066 UnicodeString::indexOf(const UnicodeString& srcText, 4067 int32_t srcStart, 4068 int32_t srcLength, 4069 int32_t start, 4070 int32_t _length) const 4071 { 4072 if(!srcText.isBogus()) { 4073 srcText.pinIndices(srcStart, srcLength); 4074 if(srcLength > 0) { 4075 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 4076 } 4077 } 4078 return -1; 4079 } 4080 4081 inline int32_t 4082 UnicodeString::indexOf(const UnicodeString& text) const 4083 { return indexOf(text, 0, text.length(), 0, length()); } 4084 4085 inline int32_t 4086 UnicodeString::indexOf(const UnicodeString& text, 4087 int32_t start) const { 4088 pinIndex(start); 4089 return indexOf(text, 0, text.length(), start, length() - start); 4090 } 4091 4092 inline int32_t 4093 UnicodeString::indexOf(const UnicodeString& text, 4094 int32_t start, 4095 int32_t _length) const 4096 { return indexOf(text, 0, text.length(), start, _length); } 4097 4098 inline int32_t 4099 UnicodeString::indexOf(const UChar *srcChars, 4100 int32_t srcLength, 4101 int32_t start) const { 4102 pinIndex(start); 4103 return indexOf(srcChars, 0, srcLength, start, length() - start); 4104 } 4105 4106 inline int32_t 4107 UnicodeString::indexOf(const UChar *srcChars, 4108 int32_t srcLength, 4109 int32_t start, 4110 int32_t _length) const 4111 { return indexOf(srcChars, 0, srcLength, start, _length); } 4112 4113 inline int32_t 4114 UnicodeString::indexOf(UChar c, 4115 int32_t start, 4116 int32_t _length) const 4117 { return doIndexOf(c, start, _length); } 4118 4119 inline int32_t 4120 UnicodeString::indexOf(UChar32 c, 4121 int32_t start, 4122 int32_t _length) const 4123 { return doIndexOf(c, start, _length); } 4124 4125 inline int32_t 4126 UnicodeString::indexOf(UChar c) const 4127 { return doIndexOf(c, 0, length()); } 4128 4129 inline int32_t 4130 UnicodeString::indexOf(UChar32 c) const 4131 { return indexOf(c, 0, length()); } 4132 4133 inline int32_t 4134 UnicodeString::indexOf(UChar c, 4135 int32_t start) const { 4136 pinIndex(start); 4137 return doIndexOf(c, start, length() - start); 4138 } 4139 4140 inline int32_t 4141 UnicodeString::indexOf(UChar32 c, 4142 int32_t start) const { 4143 pinIndex(start); 4144 return indexOf(c, start, length() - start); 4145 } 4146 4147 inline int32_t 4148 UnicodeString::lastIndexOf(const UChar *srcChars, 4149 int32_t srcLength, 4150 int32_t start, 4151 int32_t _length) const 4152 { return lastIndexOf(srcChars, 0, srcLength, start, _length); } 4153 4154 inline int32_t 4155 UnicodeString::lastIndexOf(const UChar *srcChars, 4156 int32_t srcLength, 4157 int32_t start) const { 4158 pinIndex(start); 4159 return lastIndexOf(srcChars, 0, srcLength, start, length() - start); 4160 } 4161 4162 inline int32_t 4163 UnicodeString::lastIndexOf(const UnicodeString& srcText, 4164 int32_t srcStart, 4165 int32_t srcLength, 4166 int32_t start, 4167 int32_t _length) const 4168 { 4169 if(!srcText.isBogus()) { 4170 srcText.pinIndices(srcStart, srcLength); 4171 if(srcLength > 0) { 4172 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 4173 } 4174 } 4175 return -1; 4176 } 4177 4178 inline int32_t 4179 UnicodeString::lastIndexOf(const UnicodeString& text, 4180 int32_t start, 4181 int32_t _length) const 4182 { return lastIndexOf(text, 0, text.length(), start, _length); } 4183 4184 inline int32_t 4185 UnicodeString::lastIndexOf(const UnicodeString& text, 4186 int32_t start) const { 4187 pinIndex(start); 4188 return lastIndexOf(text, 0, text.length(), start, length() - start); 4189 } 4190 4191 inline int32_t 4192 UnicodeString::lastIndexOf(const UnicodeString& text) const 4193 { return lastIndexOf(text, 0, text.length(), 0, length()); } 4194 4195 inline int32_t 4196 UnicodeString::lastIndexOf(UChar c, 4197 int32_t start, 4198 int32_t _length) const 4199 { return doLastIndexOf(c, start, _length); } 4200 4201 inline int32_t 4202 UnicodeString::lastIndexOf(UChar32 c, 4203 int32_t start, 4204 int32_t _length) const { 4205 return doLastIndexOf(c, start, _length); 4206 } 4207 4208 inline int32_t 4209 UnicodeString::lastIndexOf(UChar c) const 4210 { return doLastIndexOf(c, 0, length()); } 4211 4212 inline int32_t 4213 UnicodeString::lastIndexOf(UChar32 c) const { 4214 return lastIndexOf(c, 0, length()); 4215 } 4216 4217 inline int32_t 4218 UnicodeString::lastIndexOf(UChar c, 4219 int32_t start) const { 4220 pinIndex(start); 4221 return doLastIndexOf(c, start, length() - start); 4222 } 4223 4224 inline int32_t 4225 UnicodeString::lastIndexOf(UChar32 c, 4226 int32_t start) const { 4227 pinIndex(start); 4228 return lastIndexOf(c, start, length() - start); 4229 } 4230 4231 inline UBool 4232 UnicodeString::startsWith(const UnicodeString& text) const 4233 { return compare(0, text.length(), text, 0, text.length()) == 0; } 4234 4235 inline UBool 4236 UnicodeString::startsWith(const UnicodeString& srcText, 4237 int32_t srcStart, 4238 int32_t srcLength) const 4239 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } 4240 4241 inline UBool 4242 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const { 4243 if(srcLength < 0) { 4244 srcLength = u_strlen(srcChars); 4245 } 4246 return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; 4247 } 4248 4249 inline UBool 4250 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const { 4251 if(srcLength < 0) { 4252 srcLength = u_strlen(srcChars); 4253 } 4254 return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; 4255 } 4256 4257 inline UBool 4258 UnicodeString::endsWith(const UnicodeString& text) const 4259 { return doCompare(length() - text.length(), text.length(), 4260 text, 0, text.length()) == 0; } 4261 4262 inline UBool 4263 UnicodeString::endsWith(const UnicodeString& srcText, 4264 int32_t srcStart, 4265 int32_t srcLength) const { 4266 srcText.pinIndices(srcStart, srcLength); 4267 return doCompare(length() - srcLength, srcLength, 4268 srcText, srcStart, srcLength) == 0; 4269 } 4270 4271 inline UBool 4272 UnicodeString::endsWith(const UChar *srcChars, 4273 int32_t srcLength) const { 4274 if(srcLength < 0) { 4275 srcLength = u_strlen(srcChars); 4276 } 4277 return doCompare(length() - srcLength, srcLength, 4278 srcChars, 0, srcLength) == 0; 4279 } 4280 4281 inline UBool 4282 UnicodeString::endsWith(const UChar *srcChars, 4283 int32_t srcStart, 4284 int32_t srcLength) const { 4285 if(srcLength < 0) { 4286 srcLength = u_strlen(srcChars + srcStart); 4287 } 4288 return doCompare(length() - srcLength, srcLength, 4289 srcChars, srcStart, srcLength) == 0; 4290 } 4291 4292 //======================================== 4293 // replace 4294 //======================================== 4295 inline UnicodeString& 4296 UnicodeString::replace(int32_t start, 4297 int32_t _length, 4298 const UnicodeString& srcText) 4299 { return doReplace(start, _length, srcText, 0, srcText.length()); } 4300 4301 inline UnicodeString& 4302 UnicodeString::replace(int32_t start, 4303 int32_t _length, 4304 const UnicodeString& srcText, 4305 int32_t srcStart, 4306 int32_t srcLength) 4307 { return doReplace(start, _length, srcText, srcStart, srcLength); } 4308 4309 inline UnicodeString& 4310 UnicodeString::replace(int32_t start, 4311 int32_t _length, 4312 const UChar *srcChars, 4313 int32_t srcLength) 4314 { return doReplace(start, _length, srcChars, 0, srcLength); } 4315 4316 inline UnicodeString& 4317 UnicodeString::replace(int32_t start, 4318 int32_t _length, 4319 const UChar *srcChars, 4320 int32_t srcStart, 4321 int32_t srcLength) 4322 { return doReplace(start, _length, srcChars, srcStart, srcLength); } 4323 4324 inline UnicodeString& 4325 UnicodeString::replace(int32_t start, 4326 int32_t _length, 4327 UChar srcChar) 4328 { return doReplace(start, _length, &srcChar, 0, 1); } 4329 4330 inline UnicodeString& 4331 UnicodeString::replaceBetween(int32_t start, 4332 int32_t limit, 4333 const UnicodeString& srcText) 4334 { return doReplace(start, limit - start, srcText, 0, srcText.length()); } 4335 4336 inline UnicodeString& 4337 UnicodeString::replaceBetween(int32_t start, 4338 int32_t limit, 4339 const UnicodeString& srcText, 4340 int32_t srcStart, 4341 int32_t srcLimit) 4342 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } 4343 4344 inline UnicodeString& 4345 UnicodeString::findAndReplace(const UnicodeString& oldText, 4346 const UnicodeString& newText) 4347 { return findAndReplace(0, length(), oldText, 0, oldText.length(), 4348 newText, 0, newText.length()); } 4349 4350 inline UnicodeString& 4351 UnicodeString::findAndReplace(int32_t start, 4352 int32_t _length, 4353 const UnicodeString& oldText, 4354 const UnicodeString& newText) 4355 { return findAndReplace(start, _length, oldText, 0, oldText.length(), 4356 newText, 0, newText.length()); } 4357 4358 // ============================ 4359 // extract 4360 // ============================ 4361 inline void 4362 UnicodeString::doExtract(int32_t start, 4363 int32_t _length, 4364 UnicodeString& target) const 4365 { target.replace(0, target.length(), *this, start, _length); } 4366 4367 inline void 4368 UnicodeString::extract(int32_t start, 4369 int32_t _length, 4370 UChar *target, 4371 int32_t targetStart) const 4372 { doExtract(start, _length, target, targetStart); } 4373 4374 inline void 4375 UnicodeString::extract(int32_t start, 4376 int32_t _length, 4377 UnicodeString& target) const 4378 { doExtract(start, _length, target); } 4379 4380 #if !UCONFIG_NO_CONVERSION 4381 4382 inline int32_t 4383 UnicodeString::extract(int32_t start, 4384 int32_t _length, 4385 char *dst, 4386 const char *codepage) const 4387 4388 { 4389 // This dstSize value will be checked explicitly 4390 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); 4391 } 4392 4393 #endif 4394 4395 inline void 4396 UnicodeString::extractBetween(int32_t start, 4397 int32_t limit, 4398 UChar *dst, 4399 int32_t dstStart) const { 4400 pinIndex(start); 4401 pinIndex(limit); 4402 doExtract(start, limit - start, dst, dstStart); 4403 } 4404 4405 inline UnicodeString 4406 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { 4407 return tempSubString(start, limit - start); 4408 } 4409 4410 inline UChar 4411 UnicodeString::doCharAt(int32_t offset) const 4412 { 4413 if((uint32_t)offset < (uint32_t)length()) { 4414 return getArrayStart()[offset]; 4415 } else { 4416 return kInvalidUChar; 4417 } 4418 } 4419 4420 inline UChar 4421 UnicodeString::charAt(int32_t offset) const 4422 { return doCharAt(offset); } 4423 4424 inline UChar 4425 UnicodeString::operator[] (int32_t offset) const 4426 { return doCharAt(offset); } 4427 4428 inline UBool 4429 UnicodeString::isEmpty() const { 4430 // Arithmetic or logical right shift does not matter: only testing for 0. 4431 return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0; 4432 } 4433 4434 //======================================== 4435 // Write implementation methods 4436 //======================================== 4437 inline void 4438 UnicodeString::setZeroLength() { 4439 fUnion.fFields.fLengthAndFlags &= kAllStorageFlags; 4440 } 4441 4442 inline void 4443 UnicodeString::setShortLength(int32_t len) { 4444 // requires 0 <= len <= kMaxShortLength 4445 fUnion.fFields.fLengthAndFlags = 4446 (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift)); 4447 } 4448 4449 inline void 4450 UnicodeString::setLength(int32_t len) { 4451 if(len <= kMaxShortLength) { 4452 setShortLength(len); 4453 } else { 4454 fUnion.fFields.fLengthAndFlags |= kLengthIsLarge; 4455 fUnion.fFields.fLength = len; 4456 } 4457 } 4458 4459 inline void 4460 UnicodeString::setToEmpty() { 4461 fUnion.fFields.fLengthAndFlags = kShortString; 4462 } 4463 4464 inline void 4465 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { 4466 setLength(len); 4467 fUnion.fFields.fArray = array; 4468 fUnion.fFields.fCapacity = capacity; 4469 } 4470 4471 inline UnicodeString& 4472 UnicodeString::operator= (UChar ch) 4473 { return doReplace(0, length(), &ch, 0, 1); } 4474 4475 inline UnicodeString& 4476 UnicodeString::operator= (UChar32 ch) 4477 { return replace(0, length(), ch); } 4478 4479 inline UnicodeString& 4480 UnicodeString::setTo(const UnicodeString& srcText, 4481 int32_t srcStart, 4482 int32_t srcLength) 4483 { 4484 unBogus(); 4485 return doReplace(0, length(), srcText, srcStart, srcLength); 4486 } 4487 4488 inline UnicodeString& 4489 UnicodeString::setTo(const UnicodeString& srcText, 4490 int32_t srcStart) 4491 { 4492 unBogus(); 4493 srcText.pinIndex(srcStart); 4494 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); 4495 } 4496 4497 inline UnicodeString& 4498 UnicodeString::setTo(const UnicodeString& srcText) 4499 { 4500 return copyFrom(srcText); 4501 } 4502 4503 inline UnicodeString& 4504 UnicodeString::setTo(const UChar *srcChars, 4505 int32_t srcLength) 4506 { 4507 unBogus(); 4508 return doReplace(0, length(), srcChars, 0, srcLength); 4509 } 4510 4511 inline UnicodeString& 4512 UnicodeString::setTo(UChar srcChar) 4513 { 4514 unBogus(); 4515 return doReplace(0, length(), &srcChar, 0, 1); 4516 } 4517 4518 inline UnicodeString& 4519 UnicodeString::setTo(UChar32 srcChar) 4520 { 4521 unBogus(); 4522 return replace(0, length(), srcChar); 4523 } 4524 4525 inline UnicodeString& 4526 UnicodeString::append(const UnicodeString& srcText, 4527 int32_t srcStart, 4528 int32_t srcLength) 4529 { return doAppend(srcText, srcStart, srcLength); } 4530 4531 inline UnicodeString& 4532 UnicodeString::append(const UnicodeString& srcText) 4533 { return doAppend(srcText, 0, srcText.length()); } 4534 4535 inline UnicodeString& 4536 UnicodeString::append(const UChar *srcChars, 4537 int32_t srcStart, 4538 int32_t srcLength) 4539 { return doAppend(srcChars, srcStart, srcLength); } 4540 4541 inline UnicodeString& 4542 UnicodeString::append(const UChar *srcChars, 4543 int32_t srcLength) 4544 { return doAppend(srcChars, 0, srcLength); } 4545 4546 inline UnicodeString& 4547 UnicodeString::append(UChar srcChar) 4548 { return doAppend(&srcChar, 0, 1); } 4549 4550 inline UnicodeString& 4551 UnicodeString::operator+= (UChar ch) 4552 { return doAppend(&ch, 0, 1); } 4553 4554 inline UnicodeString& 4555 UnicodeString::operator+= (UChar32 ch) { 4556 return append(ch); 4557 } 4558 4559 inline UnicodeString& 4560 UnicodeString::operator+= (const UnicodeString& srcText) 4561 { return doAppend(srcText, 0, srcText.length()); } 4562 4563 inline UnicodeString& 4564 UnicodeString::insert(int32_t start, 4565 const UnicodeString& srcText, 4566 int32_t srcStart, 4567 int32_t srcLength) 4568 { return doReplace(start, 0, srcText, srcStart, srcLength); } 4569 4570 inline UnicodeString& 4571 UnicodeString::insert(int32_t start, 4572 const UnicodeString& srcText) 4573 { return doReplace(start, 0, srcText, 0, srcText.length()); } 4574 4575 inline UnicodeString& 4576 UnicodeString::insert(int32_t start, 4577 const UChar *srcChars, 4578 int32_t srcStart, 4579 int32_t srcLength) 4580 { return doReplace(start, 0, srcChars, srcStart, srcLength); } 4581 4582 inline UnicodeString& 4583 UnicodeString::insert(int32_t start, 4584 const UChar *srcChars, 4585 int32_t srcLength) 4586 { return doReplace(start, 0, srcChars, 0, srcLength); } 4587 4588 inline UnicodeString& 4589 UnicodeString::insert(int32_t start, 4590 UChar srcChar) 4591 { return doReplace(start, 0, &srcChar, 0, 1); } 4592 4593 inline UnicodeString& 4594 UnicodeString::insert(int32_t start, 4595 UChar32 srcChar) 4596 { return replace(start, 0, srcChar); } 4597 4598 4599 inline UnicodeString& 4600 UnicodeString::remove() 4601 { 4602 // remove() of a bogus string makes the string empty and non-bogus 4603 if(isBogus()) { 4604 setToEmpty(); 4605 } else { 4606 setZeroLength(); 4607 } 4608 return *this; 4609 } 4610 4611 inline UnicodeString& 4612 UnicodeString::remove(int32_t start, 4613 int32_t _length) 4614 { 4615 if(start <= 0 && _length == INT32_MAX) { 4616 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 4617 return remove(); 4618 } 4619 return doReplace(start, _length, NULL, 0, 0); 4620 } 4621 4622 inline UnicodeString& 4623 UnicodeString::removeBetween(int32_t start, 4624 int32_t limit) 4625 { return doReplace(start, limit - start, NULL, 0, 0); } 4626 4627 inline UnicodeString & 4628 UnicodeString::retainBetween(int32_t start, int32_t limit) { 4629 truncate(limit); 4630 return doReplace(0, start, NULL, 0, 0); 4631 } 4632 4633 inline UBool 4634 UnicodeString::truncate(int32_t targetLength) 4635 { 4636 if(isBogus() && targetLength == 0) { 4637 // truncate(0) of a bogus string makes the string empty and non-bogus 4638 unBogus(); 4639 return FALSE; 4640 } else if((uint32_t)targetLength < (uint32_t)length()) { 4641 setLength(targetLength); 4642 return TRUE; 4643 } else { 4644 return FALSE; 4645 } 4646 } 4647 4648 inline UnicodeString& 4649 UnicodeString::reverse() 4650 { return doReverse(0, length()); } 4651 4652 inline UnicodeString& 4653 UnicodeString::reverse(int32_t start, 4654 int32_t _length) 4655 { return doReverse(start, _length); } 4656 4657 U_NAMESPACE_END 4658 4659 #endif 4660