1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 1998-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * 9 * File unistr.h 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 09/25/98 stephen Creation. 15 * 11/11/98 stephen Changed per 11/9 code review. 16 * 04/20/99 stephen Overhauled per 4/16 code review. 17 * 11/18/99 aliu Made to inherit from Replaceable. Added method 18 * handleReplaceBetween(); other methods unchanged. 19 * 06/25/01 grhoten Remove dependency on iostream. 20 ****************************************************************************** 21 */ 22 23 #ifndef UNISTR_H 24 #define UNISTR_H 25 26 /** 27 * \file 28 * \brief C++ API: Unicode String 29 */ 30 31 #include <cstddef> 32 #include "unicode/utypes.h" 33 #include "unicode/char16ptr.h" 34 #include "unicode/rep.h" 35 #include "unicode/std_string.h" 36 #include "unicode/stringpiece.h" 37 #include "unicode/bytestream.h" 38 39 struct UConverter; // unicode/ucnv.h 40 41 #ifndef USTRING_H 42 /** 43 * \ingroup ustring_ustrlen 44 */ 45 U_STABLE int32_t U_EXPORT2 46 u_strlen(const UChar *s); 47 #endif 48 49 U_NAMESPACE_BEGIN 50 51 #if !UCONFIG_NO_BREAK_ITERATION 52 class BreakIterator; // unicode/brkiter.h 53 #endif 54 class Edits; 55 56 U_NAMESPACE_END 57 58 // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper. 59 /** 60 * Internal string case mapping function type. 61 * All error checking must be done. 62 * src and dest must not overlap. 63 * @internal 64 */ 65 typedef int32_t U_CALLCONV 66 UStringCaseMapper(int32_t caseLocale, uint32_t options, 67 #if !UCONFIG_NO_BREAK_ITERATION 68 icu::BreakIterator *iter, 69 #endif 70 char16_t *dest, int32_t destCapacity, 71 const char16_t *src, int32_t srcLength, 72 icu::Edits *edits, 73 UErrorCode &errorCode); 74 75 U_NAMESPACE_BEGIN 76 77 class Locale; // unicode/locid.h 78 class StringCharacterIterator; 79 class UnicodeStringAppendable; // unicode/appendable.h 80 81 /* The <iostream> include has been moved to unicode/ustream.h */ 82 83 /** 84 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 85 * which constructs a Unicode string from an invariant-character char * string. 86 * About invariant characters see utypes.h. 87 * This constructor has no runtime dependency on conversion code and is 88 * therefore recommended over ones taking a charset name string 89 * (where the empty string "" indicates invariant-character conversion). 90 * 91 * @stable ICU 3.2 92 */ 93 #define US_INV icu::UnicodeString::kInvariant 94 95 /** 96 * Unicode String literals in C++. 97 * 98 * Note: these macros are not recommended for new code. 99 * Prior to the availability of C++11 and u"unicode string literals", 100 * these macros were provided for portability and efficiency when 101 * initializing UnicodeStrings from literals. 102 * 103 * They work only for strings that contain "invariant characters", i.e., 104 * only latin letters, digits, and some punctuation. 105 * See utypes.h for details. 106 * 107 * The string parameter must be a C string literal. 108 * The length of the string, not including the terminating 109 * <code>NUL</code>, must be specified as a constant. 110 * @stable ICU 2.0 111 */ 112 #if !U_CHAR16_IS_TYPEDEF 113 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length) 114 #else 115 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length) 116 #endif 117 118 /** 119 * Unicode String literals in C++. 120 * Dependent on the platform properties, different UnicodeString 121 * constructors should be used to create a UnicodeString object from 122 * a string literal. 123 * The macros are defined for improved performance. 124 * They work only for strings that contain "invariant characters", i.e., 125 * only latin letters, digits, and some punctuation. 126 * See utypes.h for details. 127 * 128 * The string parameter must be a C string literal. 129 * @stable ICU 2.0 130 */ 131 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 132 133 /** 134 * \def UNISTR_FROM_CHAR_EXPLICIT 135 * This can be defined to be empty or "explicit". 136 * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32) 137 * constructors are marked as explicit, preventing their inadvertent use. 138 * @stable ICU 49 139 */ 140 #ifndef UNISTR_FROM_CHAR_EXPLICIT 141 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 142 // Auto-"explicit" in ICU library code. 143 # define UNISTR_FROM_CHAR_EXPLICIT explicit 144 # else 145 // Empty by default for source code compatibility. 146 # define UNISTR_FROM_CHAR_EXPLICIT 147 # endif 148 #endif 149 150 /** 151 * \def UNISTR_FROM_STRING_EXPLICIT 152 * This can be defined to be empty or "explicit". 153 * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *) 154 * constructors are marked as explicit, preventing their inadvertent use. 155 * 156 * In particular, this helps prevent accidentally depending on ICU conversion code 157 * by passing a string literal into an API with a const UnicodeString & parameter. 158 * @stable ICU 49 159 */ 160 #ifndef UNISTR_FROM_STRING_EXPLICIT 161 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 162 // Auto-"explicit" in ICU library code. 163 # define UNISTR_FROM_STRING_EXPLICIT explicit 164 # else 165 // Empty by default for source code compatibility. 166 # define UNISTR_FROM_STRING_EXPLICIT 167 # endif 168 #endif 169 170 /** 171 * \def UNISTR_OBJECT_SIZE 172 * Desired sizeof(UnicodeString) in bytes. 173 * It should be a multiple of sizeof(pointer) to avoid unusable space for padding. 174 * The object size may want to be a multiple of 16 bytes, 175 * which is a common granularity for heap allocation. 176 * 177 * Any space inside the object beyond sizeof(vtable pointer) + 2 178 * is available for storing short strings inside the object. 179 * The bigger the object, the longer a string that can be stored inside the object, 180 * without additional heap allocation. 181 * 182 * Depending on a platform's pointer size, pointer alignment requirements, 183 * and struct padding, the compiler will usually round up sizeof(UnicodeString) 184 * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models), 185 * to hold the fields for heap-allocated strings. 186 * Such a minimum size also ensures that the object is easily large enough 187 * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH). 188 * 189 * sizeof(UnicodeString) >= 48 should work for all known platforms. 190 * 191 * For example, on a 64-bit machine where sizeof(vtable pointer) is 8, 192 * sizeof(UnicodeString) = 64 would leave space for 193 * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27 194 * char16_ts stored inside the object. 195 * 196 * The minimum object size on a 64-bit machine would be 197 * 4 * sizeof(pointer) = 4 * 8 = 32 bytes, 198 * and the internal buffer would hold up to 11 char16_ts in that case. 199 * 200 * @see U16_MAX_LENGTH 201 * @stable ICU 56 202 */ 203 #ifndef UNISTR_OBJECT_SIZE 204 # define UNISTR_OBJECT_SIZE 64 205 #endif 206 207 /** 208 * UnicodeString is a string class that stores Unicode characters directly and provides 209 * similar functionality as the Java String and StringBuffer/StringBuilder classes. 210 * It is a concrete implementation of the abstract class Replaceable (for transliteration). 211 * 212 * A UnicodeString may also "alias" an external array of characters 213 * (that is, point to it, rather than own the array) 214 * whose lifetime must then at least match the lifetime of the aliasing object. 215 * This aliasing may be preserved when returning a UnicodeString by value, 216 * depending on the compiler and the function implementation, 217 * via Return Value Optimization (RVO) or the move assignment operator. 218 * (However, the copy assignment operator does not preserve aliasing.) 219 * For details see the description of storage models at the end of the class API docs 220 * and in the User Guide chapter linked from there. 221 * 222 * The UnicodeString class is not suitable for subclassing. 223 * 224 * <p>For an overview of Unicode strings in C and C++ see the 225 * <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p> 226 * 227 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>. 228 * A Unicode character may be stored with either one code unit 229 * (the most common case) or with a matched pair of special code units 230 * ("surrogates"). The data type for code units is char16_t. 231 * For single-character handling, a Unicode character code <em>point</em> is a value 232 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p> 233 * 234 * <p>Indexes and offsets into and lengths of strings always count code units, not code points. 235 * This is the same as with multi-byte char* strings in traditional string handling. 236 * Operations on partial strings typically do not test for code point boundaries. 237 * If necessary, the user needs to take care of such boundaries by testing for the code unit 238 * values or by using functions like 239 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() 240 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p> 241 * 242 * UnicodeString methods are more lenient with regard to input parameter values 243 * than other ICU APIs. In particular: 244 * - If indexes are out of bounds for a UnicodeString object 245 * (<0 or >length()) then they are "pinned" to the nearest boundary. 246 * - If primitive string pointer values (e.g., const char16_t * or char *) 247 * for input strings are NULL, then those input string parameters are treated 248 * as if they pointed to an empty string. 249 * However, this is <em>not</em> the case for char * parameters for charset names 250 * or other IDs. 251 * - Most UnicodeString methods do not take a UErrorCode parameter because 252 * there are usually very few opportunities for failure other than a shortage 253 * of memory, error codes in low-level C++ string methods would be inconvenient, 254 * and the error code as the last parameter (ICU convention) would prevent 255 * the use of default parameter values. 256 * Instead, such methods set the UnicodeString into a "bogus" state 257 * (see isBogus()) if an error occurs. 258 * 259 * In string comparisons, two UnicodeString objects that are both "bogus" 260 * compare equal (to be transitive and prevent endless loops in sorting), 261 * and a "bogus" string compares less than any non-"bogus" one. 262 * 263 * Const UnicodeString methods are thread-safe. Multiple threads can use 264 * const methods on the same UnicodeString object simultaneously, 265 * but non-const methods must not be called concurrently (in multiple threads) 266 * with any other (const or non-const) methods. 267 * 268 * Similarly, const UnicodeString & parameters are thread-safe. 269 * One object may be passed in as such a parameter concurrently in multiple threads. 270 * This includes the const UnicodeString & parameters for 271 * copy construction, assignment, and cloning. 272 * 273 * <p>UnicodeString uses several storage methods. 274 * String contents can be stored inside the UnicodeString object itself, 275 * in an allocated and shared buffer, or in an outside buffer that is "aliased". 276 * Most of this is done transparently, but careful aliasing in particular provides 277 * significant performance improvements. 278 * Also, the internal buffer is accessible via special functions. 279 * For details see the 280 * <a href="http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model">User Guide Strings chapter</a>.</p> 281 * 282 * @see utf.h 283 * @see CharacterIterator 284 * @stable ICU 2.0 285 */ 286 class U_COMMON_API UnicodeString : public Replaceable 287 { 288 public: 289 290 /** 291 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 292 * which constructs a Unicode string from an invariant-character char * string. 293 * Use the macro US_INV instead of the full qualification for this value. 294 * 295 * @see US_INV 296 * @stable ICU 3.2 297 */ 298 enum EInvariant { 299 /** 300 * @see EInvariant 301 * @stable ICU 3.2 302 */ 303 kInvariant 304 }; 305 306 //======================================== 307 // Read-only operations 308 //======================================== 309 310 /* Comparison - bitwise only - for international comparison use collation */ 311 312 /** 313 * Equality operator. Performs only bitwise comparison. 314 * @param text The UnicodeString to compare to this one. 315 * @return TRUE if <TT>text</TT> contains the same characters as this one, 316 * FALSE otherwise. 317 * @stable ICU 2.0 318 */ 319 inline UBool operator== (const UnicodeString& text) const; 320 321 /** 322 * Inequality operator. Performs only bitwise comparison. 323 * @param text The UnicodeString to compare to this one. 324 * @return FALSE if <TT>text</TT> contains the same characters as this one, 325 * TRUE otherwise. 326 * @stable ICU 2.0 327 */ 328 inline UBool operator!= (const UnicodeString& text) const; 329 330 /** 331 * Greater than operator. Performs only bitwise comparison. 332 * @param text The UnicodeString to compare to this one. 333 * @return TRUE if the characters in this are bitwise 334 * greater than the characters in <code>text</code>, FALSE otherwise 335 * @stable ICU 2.0 336 */ 337 inline UBool operator> (const UnicodeString& text) const; 338 339 /** 340 * Less than operator. Performs only bitwise comparison. 341 * @param text The UnicodeString to compare to this one. 342 * @return TRUE if the characters in this are bitwise 343 * less than the characters in <code>text</code>, FALSE otherwise 344 * @stable ICU 2.0 345 */ 346 inline UBool operator< (const UnicodeString& text) const; 347 348 /** 349 * Greater than or equal operator. Performs only bitwise comparison. 350 * @param text The UnicodeString to compare to this one. 351 * @return TRUE if the characters in this are bitwise 352 * greater than or equal to the characters in <code>text</code>, FALSE otherwise 353 * @stable ICU 2.0 354 */ 355 inline UBool operator>= (const UnicodeString& text) const; 356 357 /** 358 * Less than or equal operator. Performs only bitwise comparison. 359 * @param text The UnicodeString to compare to this one. 360 * @return TRUE if the characters in this are bitwise 361 * less than or equal to the characters in <code>text</code>, FALSE otherwise 362 * @stable ICU 2.0 363 */ 364 inline UBool operator<= (const UnicodeString& text) const; 365 366 /** 367 * Compare the characters bitwise in this UnicodeString to 368 * the characters in <code>text</code>. 369 * @param text The UnicodeString to compare to this one. 370 * @return The result of bitwise character comparison: 0 if this 371 * contains the same characters as <code>text</code>, -1 if the characters in 372 * this are bitwise less than the characters in <code>text</code>, +1 if the 373 * characters in this are bitwise greater than the characters 374 * in <code>text</code>. 375 * @stable ICU 2.0 376 */ 377 inline int8_t compare(const UnicodeString& text) const; 378 379 /** 380 * Compare the characters bitwise in the range 381 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 382 * in the <b>entire string</b> <TT>text</TT>. 383 * (The parameters "start" and "length" are not applied to the other text "text".) 384 * @param start the offset at which the compare operation begins 385 * @param length the number of characters of text to compare. 386 * @param text the other text to be compared against this string. 387 * @return The result of bitwise character comparison: 0 if this 388 * contains the same characters as <code>text</code>, -1 if the characters in 389 * this are bitwise less than the characters in <code>text</code>, +1 if the 390 * characters in this are bitwise greater than the characters 391 * in <code>text</code>. 392 * @stable ICU 2.0 393 */ 394 inline int8_t compare(int32_t start, 395 int32_t length, 396 const UnicodeString& text) const; 397 398 /** 399 * Compare the characters bitwise in the range 400 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 401 * in <TT>srcText</TT> in the range 402 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 403 * @param start the offset at which the compare operation begins 404 * @param length the number of characters in this to compare. 405 * @param srcText the text to be compared 406 * @param srcStart the offset into <TT>srcText</TT> to start comparison 407 * @param srcLength the number of characters in <TT>src</TT> to compare 408 * @return The result of bitwise character comparison: 0 if this 409 * contains the same characters as <code>srcText</code>, -1 if the characters in 410 * this are bitwise less than the characters in <code>srcText</code>, +1 if the 411 * characters in this are bitwise greater than the characters 412 * in <code>srcText</code>. 413 * @stable ICU 2.0 414 */ 415 inline int8_t compare(int32_t start, 416 int32_t length, 417 const UnicodeString& srcText, 418 int32_t srcStart, 419 int32_t srcLength) const; 420 421 /** 422 * Compare the characters bitwise in this UnicodeString with the first 423 * <TT>srcLength</TT> characters in <TT>srcChars</TT>. 424 * @param srcChars The characters to compare to this UnicodeString. 425 * @param srcLength the number of characters in <TT>srcChars</TT> to compare 426 * @return The result of bitwise character comparison: 0 if this 427 * contains the same characters as <code>srcChars</code>, -1 if the characters in 428 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 429 * characters in this are bitwise greater than the characters 430 * in <code>srcChars</code>. 431 * @stable ICU 2.0 432 */ 433 inline int8_t compare(ConstChar16Ptr srcChars, 434 int32_t srcLength) const; 435 436 /** 437 * Compare the characters bitwise in the range 438 * [<TT>start</TT>, <TT>start + length</TT>) with the first 439 * <TT>length</TT> characters in <TT>srcChars</TT> 440 * @param start the offset at which the compare operation begins 441 * @param length the number of characters to compare. 442 * @param srcChars the characters to be compared 443 * @return The result of bitwise character comparison: 0 if this 444 * contains the same characters as <code>srcChars</code>, -1 if the characters in 445 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 446 * characters in this are bitwise greater than the characters 447 * in <code>srcChars</code>. 448 * @stable ICU 2.0 449 */ 450 inline int8_t compare(int32_t start, 451 int32_t length, 452 const char16_t *srcChars) const; 453 454 /** 455 * Compare the characters bitwise in the range 456 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 457 * in <TT>srcChars</TT> in the range 458 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 459 * @param start the offset at which the compare operation begins 460 * @param length the number of characters in this to compare 461 * @param srcChars the characters to be compared 462 * @param srcStart the offset into <TT>srcChars</TT> to start comparison 463 * @param srcLength the number of characters in <TT>srcChars</TT> to compare 464 * @return The result of bitwise character comparison: 0 if this 465 * contains the same characters as <code>srcChars</code>, -1 if the characters in 466 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 467 * characters in this are bitwise greater than the characters 468 * in <code>srcChars</code>. 469 * @stable ICU 2.0 470 */ 471 inline int8_t compare(int32_t start, 472 int32_t length, 473 const char16_t *srcChars, 474 int32_t srcStart, 475 int32_t srcLength) const; 476 477 /** 478 * Compare the characters bitwise in the range 479 * [<TT>start</TT>, <TT>limit</TT>) with the characters 480 * in <TT>srcText</TT> in the range 481 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). 482 * @param start the offset at which the compare operation begins 483 * @param limit the offset immediately following the compare operation 484 * @param srcText the text to be compared 485 * @param srcStart the offset into <TT>srcText</TT> to start comparison 486 * @param srcLimit the offset into <TT>srcText</TT> to limit comparison 487 * @return The result of bitwise character comparison: 0 if this 488 * contains the same characters as <code>srcText</code>, -1 if the characters in 489 * this are bitwise less than the characters in <code>srcText</code>, +1 if the 490 * characters in this are bitwise greater than the characters 491 * in <code>srcText</code>. 492 * @stable ICU 2.0 493 */ 494 inline int8_t compareBetween(int32_t start, 495 int32_t limit, 496 const UnicodeString& srcText, 497 int32_t srcStart, 498 int32_t srcLimit) const; 499 500 /** 501 * Compare two Unicode strings in code point order. 502 * The result may be different from the results of compare(), operator<, etc. 503 * if supplementary characters are present: 504 * 505 * In UTF-16, supplementary characters (with code points U+10000 and above) are 506 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 507 * which means that they compare as less than some other BMP characters like U+feff. 508 * This function compares Unicode strings in code point order. 509 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 510 * 511 * @param text Another string to compare this one to. 512 * @return a negative/zero/positive integer corresponding to whether 513 * this string is less than/equal to/greater than the second one 514 * in code point order 515 * @stable ICU 2.0 516 */ 517 inline int8_t compareCodePointOrder(const UnicodeString& text) const; 518 519 /** 520 * Compare two Unicode strings in code point order. 521 * The result may be different from the results of compare(), operator<, etc. 522 * if supplementary characters are present: 523 * 524 * In UTF-16, supplementary characters (with code points U+10000 and above) are 525 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 526 * which means that they compare as less than some other BMP characters like U+feff. 527 * This function compares Unicode strings in code point order. 528 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 529 * 530 * @param start The start offset in this string at which the compare operation begins. 531 * @param length The number of code units from this string to compare. 532 * @param srcText Another string to compare this one to. 533 * @return a negative/zero/positive integer corresponding to whether 534 * this string is less than/equal to/greater than the second one 535 * in code point order 536 * @stable ICU 2.0 537 */ 538 inline int8_t compareCodePointOrder(int32_t start, 539 int32_t length, 540 const UnicodeString& srcText) const; 541 542 /** 543 * Compare two Unicode strings in code point order. 544 * The result may be different from the results of compare(), operator<, etc. 545 * if supplementary characters are present: 546 * 547 * In UTF-16, supplementary characters (with code points U+10000 and above) are 548 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 549 * which means that they compare as less than some other BMP characters like U+feff. 550 * This function compares Unicode strings in code point order. 551 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 552 * 553 * @param start The start offset in this string at which the compare operation begins. 554 * @param length The number of code units from this string to compare. 555 * @param srcText Another string to compare this one to. 556 * @param srcStart The start offset in that string at which the compare operation begins. 557 * @param srcLength The number of code units from that string to compare. 558 * @return a negative/zero/positive integer corresponding to whether 559 * this string is less than/equal to/greater than the second one 560 * in code point order 561 * @stable ICU 2.0 562 */ 563 inline int8_t compareCodePointOrder(int32_t start, 564 int32_t length, 565 const UnicodeString& srcText, 566 int32_t srcStart, 567 int32_t srcLength) const; 568 569 /** 570 * Compare two Unicode strings in code point order. 571 * The result may be different from the results of compare(), operator<, etc. 572 * if supplementary characters are present: 573 * 574 * In UTF-16, supplementary characters (with code points U+10000 and above) are 575 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 576 * which means that they compare as less than some other BMP characters like U+feff. 577 * This function compares Unicode strings in code point order. 578 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 579 * 580 * @param srcChars A pointer to another string to compare this one to. 581 * @param srcLength The number of code units from that string to compare. 582 * @return a negative/zero/positive integer corresponding to whether 583 * this string is less than/equal to/greater than the second one 584 * in code point order 585 * @stable ICU 2.0 586 */ 587 inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars, 588 int32_t srcLength) const; 589 590 /** 591 * Compare two Unicode strings in code point order. 592 * The result may be different from the results of compare(), operator<, etc. 593 * if supplementary characters are present: 594 * 595 * In UTF-16, supplementary characters (with code points U+10000 and above) are 596 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 597 * which means that they compare as less than some other BMP characters like U+feff. 598 * This function compares Unicode strings in code point order. 599 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 600 * 601 * @param start The start offset in this string at which the compare operation begins. 602 * @param length The number of code units from this string to compare. 603 * @param srcChars A pointer to another string to compare this one to. 604 * @return a negative/zero/positive integer corresponding to whether 605 * this string is less than/equal to/greater than the second one 606 * in code point order 607 * @stable ICU 2.0 608 */ 609 inline int8_t compareCodePointOrder(int32_t start, 610 int32_t length, 611 const char16_t *srcChars) const; 612 613 /** 614 * Compare two Unicode strings in code point order. 615 * The result may be different from the results of compare(), operator<, etc. 616 * if supplementary characters are present: 617 * 618 * In UTF-16, supplementary characters (with code points U+10000 and above) are 619 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 620 * which means that they compare as less than some other BMP characters like U+feff. 621 * This function compares Unicode strings in code point order. 622 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 623 * 624 * @param start The start offset in this string at which the compare operation begins. 625 * @param length The number of code units from this string to compare. 626 * @param srcChars A pointer to another string to compare this one to. 627 * @param srcStart The start offset in that string at which the compare operation begins. 628 * @param srcLength The number of code units from that string to compare. 629 * @return a negative/zero/positive integer corresponding to whether 630 * this string is less than/equal to/greater than the second one 631 * in code point order 632 * @stable ICU 2.0 633 */ 634 inline int8_t compareCodePointOrder(int32_t start, 635 int32_t length, 636 const char16_t *srcChars, 637 int32_t srcStart, 638 int32_t srcLength) const; 639 640 /** 641 * Compare two Unicode strings in code point order. 642 * The result may be different from the results of compare(), operator<, etc. 643 * if supplementary characters are present: 644 * 645 * In UTF-16, supplementary characters (with code points U+10000 and above) are 646 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 647 * which means that they compare as less than some other BMP characters like U+feff. 648 * This function compares Unicode strings in code point order. 649 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 650 * 651 * @param start The start offset in this string at which the compare operation begins. 652 * @param limit The offset after the last code unit from this string to compare. 653 * @param srcText Another string to compare this one to. 654 * @param srcStart The start offset in that string at which the compare operation begins. 655 * @param srcLimit The offset after the last code unit from that string to compare. 656 * @return a negative/zero/positive integer corresponding to whether 657 * this string is less than/equal to/greater than the second one 658 * in code point order 659 * @stable ICU 2.0 660 */ 661 inline int8_t compareCodePointOrderBetween(int32_t start, 662 int32_t limit, 663 const UnicodeString& srcText, 664 int32_t srcStart, 665 int32_t srcLimit) const; 666 667 /** 668 * Compare two strings case-insensitively using full case folding. 669 * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). 670 * 671 * @param text Another string to compare this one to. 672 * @param options A bit set of options: 673 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 674 * Comparison in code unit order with default case folding. 675 * 676 * - U_COMPARE_CODE_POINT_ORDER 677 * Set to choose code point order instead of code unit order 678 * (see u_strCompare for details). 679 * 680 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 681 * 682 * @return A negative, zero, or positive integer indicating the comparison result. 683 * @stable ICU 2.0 684 */ 685 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; 686 687 /** 688 * Compare two strings case-insensitively using full case folding. 689 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 690 * 691 * @param start The start offset in this string at which the compare operation begins. 692 * @param length The number of code units from this string to compare. 693 * @param srcText Another string to compare this one to. 694 * @param options A bit set of options: 695 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 696 * Comparison in code unit order with default case folding. 697 * 698 * - U_COMPARE_CODE_POINT_ORDER 699 * Set to choose code point order instead of code unit order 700 * (see u_strCompare for details). 701 * 702 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 703 * 704 * @return A negative, zero, or positive integer indicating the comparison result. 705 * @stable ICU 2.0 706 */ 707 inline int8_t caseCompare(int32_t start, 708 int32_t length, 709 const UnicodeString& srcText, 710 uint32_t options) const; 711 712 /** 713 * Compare two strings case-insensitively using full case folding. 714 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 715 * 716 * @param start The start offset in this string at which the compare operation begins. 717 * @param length The number of code units from this string to compare. 718 * @param srcText Another string to compare this one to. 719 * @param srcStart The start offset in that string at which the compare operation begins. 720 * @param srcLength The number of code units from that string to compare. 721 * @param options A bit set of options: 722 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 723 * Comparison in code unit order with default case folding. 724 * 725 * - U_COMPARE_CODE_POINT_ORDER 726 * Set to choose code point order instead of code unit order 727 * (see u_strCompare for details). 728 * 729 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 730 * 731 * @return A negative, zero, or positive integer indicating the comparison result. 732 * @stable ICU 2.0 733 */ 734 inline int8_t caseCompare(int32_t start, 735 int32_t length, 736 const UnicodeString& srcText, 737 int32_t srcStart, 738 int32_t srcLength, 739 uint32_t options) const; 740 741 /** 742 * Compare two strings case-insensitively using full case folding. 743 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 744 * 745 * @param srcChars A pointer to another string to compare this one to. 746 * @param srcLength The number of code units from that string to compare. 747 * @param options A bit set of options: 748 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 749 * Comparison in code unit order with default case folding. 750 * 751 * - U_COMPARE_CODE_POINT_ORDER 752 * Set to choose code point order instead of code unit order 753 * (see u_strCompare for details). 754 * 755 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 756 * 757 * @return A negative, zero, or positive integer indicating the comparison result. 758 * @stable ICU 2.0 759 */ 760 inline int8_t caseCompare(ConstChar16Ptr srcChars, 761 int32_t srcLength, 762 uint32_t options) const; 763 764 /** 765 * Compare two strings case-insensitively using full case folding. 766 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 767 * 768 * @param start The start offset in this string at which the compare operation begins. 769 * @param length The number of code units from this string to compare. 770 * @param srcChars A pointer to another string to compare this one to. 771 * @param options A bit set of options: 772 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 773 * Comparison in code unit order with default case folding. 774 * 775 * - U_COMPARE_CODE_POINT_ORDER 776 * Set to choose code point order instead of code unit order 777 * (see u_strCompare for details). 778 * 779 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 780 * 781 * @return A negative, zero, or positive integer indicating the comparison result. 782 * @stable ICU 2.0 783 */ 784 inline int8_t caseCompare(int32_t start, 785 int32_t length, 786 const char16_t *srcChars, 787 uint32_t options) const; 788 789 /** 790 * Compare two strings case-insensitively using full case folding. 791 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 792 * 793 * @param start The start offset in this string at which the compare operation begins. 794 * @param length The number of code units from this string to compare. 795 * @param srcChars A pointer to another string to compare this one to. 796 * @param srcStart The start offset in that string at which the compare operation begins. 797 * @param srcLength The number of code units from that string to compare. 798 * @param options A bit set of options: 799 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 800 * Comparison in code unit order with default case folding. 801 * 802 * - U_COMPARE_CODE_POINT_ORDER 803 * Set to choose code point order instead of code unit order 804 * (see u_strCompare for details). 805 * 806 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 807 * 808 * @return A negative, zero, or positive integer indicating the comparison result. 809 * @stable ICU 2.0 810 */ 811 inline int8_t caseCompare(int32_t start, 812 int32_t length, 813 const char16_t *srcChars, 814 int32_t srcStart, 815 int32_t srcLength, 816 uint32_t options) const; 817 818 /** 819 * Compare two strings case-insensitively using full case folding. 820 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). 821 * 822 * @param start The start offset in this string at which the compare operation begins. 823 * @param limit The offset after the last code unit from this string to compare. 824 * @param srcText Another string to compare this one to. 825 * @param srcStart The start offset in that string at which the compare operation begins. 826 * @param srcLimit The offset after the last code unit from that string to compare. 827 * @param options A bit set of options: 828 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 829 * Comparison in code unit order with default case folding. 830 * 831 * - U_COMPARE_CODE_POINT_ORDER 832 * Set to choose code point order instead of code unit order 833 * (see u_strCompare for details). 834 * 835 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 836 * 837 * @return A negative, zero, or positive integer indicating the comparison result. 838 * @stable ICU 2.0 839 */ 840 inline int8_t caseCompareBetween(int32_t start, 841 int32_t limit, 842 const UnicodeString& srcText, 843 int32_t srcStart, 844 int32_t srcLimit, 845 uint32_t options) const; 846 847 /** 848 * Determine if this starts with the characters in <TT>text</TT> 849 * @param text The text to match. 850 * @return TRUE if this starts with the characters in <TT>text</TT>, 851 * FALSE otherwise 852 * @stable ICU 2.0 853 */ 854 inline UBool startsWith(const UnicodeString& text) const; 855 856 /** 857 * Determine if this starts with the characters in <TT>srcText</TT> 858 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 859 * @param srcText The text to match. 860 * @param srcStart the offset into <TT>srcText</TT> to start matching 861 * @param srcLength the number of characters in <TT>srcText</TT> to match 862 * @return TRUE if this starts with the characters in <TT>text</TT>, 863 * FALSE otherwise 864 * @stable ICU 2.0 865 */ 866 inline UBool startsWith(const UnicodeString& srcText, 867 int32_t srcStart, 868 int32_t srcLength) const; 869 870 /** 871 * Determine if this starts with the characters in <TT>srcChars</TT> 872 * @param srcChars The characters to match. 873 * @param srcLength the number of characters in <TT>srcChars</TT> 874 * @return TRUE if this starts with the characters in <TT>srcChars</TT>, 875 * FALSE otherwise 876 * @stable ICU 2.0 877 */ 878 inline UBool startsWith(ConstChar16Ptr srcChars, 879 int32_t srcLength) const; 880 881 /** 882 * Determine if this ends with the characters in <TT>srcChars</TT> 883 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 884 * @param srcChars The characters to match. 885 * @param srcStart the offset into <TT>srcText</TT> to start matching 886 * @param srcLength the number of characters in <TT>srcChars</TT> to match 887 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise 888 * @stable ICU 2.0 889 */ 890 inline UBool startsWith(const char16_t *srcChars, 891 int32_t srcStart, 892 int32_t srcLength) const; 893 894 /** 895 * Determine if this ends with the characters in <TT>text</TT> 896 * @param text The text to match. 897 * @return TRUE if this ends with the characters in <TT>text</TT>, 898 * FALSE otherwise 899 * @stable ICU 2.0 900 */ 901 inline UBool endsWith(const UnicodeString& text) const; 902 903 /** 904 * Determine if this ends with the characters in <TT>srcText</TT> 905 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 906 * @param srcText The text to match. 907 * @param srcStart the offset into <TT>srcText</TT> to start matching 908 * @param srcLength the number of characters in <TT>srcText</TT> to match 909 * @return TRUE if this ends with the characters in <TT>text</TT>, 910 * FALSE otherwise 911 * @stable ICU 2.0 912 */ 913 inline UBool endsWith(const UnicodeString& srcText, 914 int32_t srcStart, 915 int32_t srcLength) const; 916 917 /** 918 * Determine if this ends with the characters in <TT>srcChars</TT> 919 * @param srcChars The characters to match. 920 * @param srcLength the number of characters in <TT>srcChars</TT> 921 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 922 * FALSE otherwise 923 * @stable ICU 2.0 924 */ 925 inline UBool endsWith(ConstChar16Ptr srcChars, 926 int32_t srcLength) const; 927 928 /** 929 * Determine if this ends with the characters in <TT>srcChars</TT> 930 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 931 * @param srcChars The characters to match. 932 * @param srcStart the offset into <TT>srcText</TT> to start matching 933 * @param srcLength the number of characters in <TT>srcChars</TT> to match 934 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 935 * FALSE otherwise 936 * @stable ICU 2.0 937 */ 938 inline UBool endsWith(const char16_t *srcChars, 939 int32_t srcStart, 940 int32_t srcLength) const; 941 942 943 /* Searching - bitwise only */ 944 945 /** 946 * Locate in this the first occurrence of the characters in <TT>text</TT>, 947 * using bitwise comparison. 948 * @param text The text to search for. 949 * @return The offset into this of the start of <TT>text</TT>, 950 * or -1 if not found. 951 * @stable ICU 2.0 952 */ 953 inline int32_t indexOf(const UnicodeString& text) const; 954 955 /** 956 * Locate in this the first occurrence of the characters in <TT>text</TT> 957 * starting at offset <TT>start</TT>, using bitwise comparison. 958 * @param text The text to search for. 959 * @param start The offset at which searching will start. 960 * @return The offset into this of the start of <TT>text</TT>, 961 * or -1 if not found. 962 * @stable ICU 2.0 963 */ 964 inline int32_t indexOf(const UnicodeString& text, 965 int32_t start) const; 966 967 /** 968 * Locate in this the first occurrence in the range 969 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 970 * in <TT>text</TT>, using bitwise comparison. 971 * @param text The text to search for. 972 * @param start The offset at which searching will start. 973 * @param length The number of characters to search 974 * @return The offset into this of the start of <TT>text</TT>, 975 * or -1 if not found. 976 * @stable ICU 2.0 977 */ 978 inline int32_t indexOf(const UnicodeString& text, 979 int32_t start, 980 int32_t length) const; 981 982 /** 983 * Locate in this the first occurrence in the range 984 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 985 * in <TT>srcText</TT> in the range 986 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 987 * using bitwise comparison. 988 * @param srcText The text to search for. 989 * @param srcStart the offset into <TT>srcText</TT> at which 990 * to start matching 991 * @param srcLength the number of characters in <TT>srcText</TT> to match 992 * @param start the offset into this at which to start matching 993 * @param length the number of characters in this to search 994 * @return The offset into this of the start of <TT>text</TT>, 995 * or -1 if not found. 996 * @stable ICU 2.0 997 */ 998 inline int32_t indexOf(const UnicodeString& srcText, 999 int32_t srcStart, 1000 int32_t srcLength, 1001 int32_t start, 1002 int32_t length) const; 1003 1004 /** 1005 * Locate in this the first occurrence of the characters in 1006 * <TT>srcChars</TT> 1007 * starting at offset <TT>start</TT>, using bitwise comparison. 1008 * @param srcChars The text to search for. 1009 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1010 * @param start the offset into this at which to start matching 1011 * @return The offset into this of the start of <TT>text</TT>, 1012 * or -1 if not found. 1013 * @stable ICU 2.0 1014 */ 1015 inline int32_t indexOf(const char16_t *srcChars, 1016 int32_t srcLength, 1017 int32_t start) const; 1018 1019 /** 1020 * Locate in this the first occurrence in the range 1021 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1022 * in <TT>srcChars</TT>, using bitwise comparison. 1023 * @param srcChars The text to search for. 1024 * @param srcLength the number of characters in <TT>srcChars</TT> 1025 * @param start The offset at which searching will start. 1026 * @param length The number of characters to search 1027 * @return The offset into this of the start of <TT>srcChars</TT>, 1028 * or -1 if not found. 1029 * @stable ICU 2.0 1030 */ 1031 inline int32_t indexOf(ConstChar16Ptr srcChars, 1032 int32_t srcLength, 1033 int32_t start, 1034 int32_t length) const; 1035 1036 /** 1037 * Locate in this the first occurrence in the range 1038 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1039 * in <TT>srcChars</TT> in the range 1040 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1041 * using bitwise comparison. 1042 * @param srcChars The text to search for. 1043 * @param srcStart the offset into <TT>srcChars</TT> at which 1044 * to start matching 1045 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1046 * @param start the offset into this at which to start matching 1047 * @param length the number of characters in this to search 1048 * @return The offset into this of the start of <TT>text</TT>, 1049 * or -1 if not found. 1050 * @stable ICU 2.0 1051 */ 1052 int32_t indexOf(const char16_t *srcChars, 1053 int32_t srcStart, 1054 int32_t srcLength, 1055 int32_t start, 1056 int32_t length) const; 1057 1058 /** 1059 * Locate in this the first occurrence of the BMP code point <code>c</code>, 1060 * using bitwise comparison. 1061 * @param c The code unit to search for. 1062 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1063 * @stable ICU 2.0 1064 */ 1065 inline int32_t indexOf(char16_t c) const; 1066 1067 /** 1068 * Locate in this the first occurrence of the code point <TT>c</TT>, 1069 * using bitwise comparison. 1070 * 1071 * @param c The code point to search for. 1072 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1073 * @stable ICU 2.0 1074 */ 1075 inline int32_t indexOf(UChar32 c) const; 1076 1077 /** 1078 * Locate in this the first occurrence of the BMP code point <code>c</code>, 1079 * starting at offset <TT>start</TT>, using bitwise comparison. 1080 * @param c The code unit to search for. 1081 * @param start The offset at which searching will start. 1082 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1083 * @stable ICU 2.0 1084 */ 1085 inline int32_t indexOf(char16_t c, 1086 int32_t start) const; 1087 1088 /** 1089 * Locate in this the first occurrence of the code point <TT>c</TT> 1090 * starting at offset <TT>start</TT>, using bitwise comparison. 1091 * 1092 * @param c The code point to search for. 1093 * @param start The offset at which searching will start. 1094 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1095 * @stable ICU 2.0 1096 */ 1097 inline int32_t indexOf(UChar32 c, 1098 int32_t start) const; 1099 1100 /** 1101 * Locate in this the first occurrence of the BMP code point <code>c</code> 1102 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1103 * using bitwise comparison. 1104 * @param c The code unit to search for. 1105 * @param start the offset into this at which to start matching 1106 * @param length the number of characters in this to search 1107 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1108 * @stable ICU 2.0 1109 */ 1110 inline int32_t indexOf(char16_t c, 1111 int32_t start, 1112 int32_t length) const; 1113 1114 /** 1115 * Locate in this the first occurrence of the code point <TT>c</TT> 1116 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1117 * using bitwise comparison. 1118 * 1119 * @param c The code point to search for. 1120 * @param start the offset into this at which to start matching 1121 * @param length the number of characters in this to search 1122 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1123 * @stable ICU 2.0 1124 */ 1125 inline int32_t indexOf(UChar32 c, 1126 int32_t start, 1127 int32_t length) const; 1128 1129 /** 1130 * Locate in this the last occurrence of the characters in <TT>text</TT>, 1131 * using bitwise comparison. 1132 * @param text The text to search for. 1133 * @return The offset into this of the start of <TT>text</TT>, 1134 * or -1 if not found. 1135 * @stable ICU 2.0 1136 */ 1137 inline int32_t lastIndexOf(const UnicodeString& text) const; 1138 1139 /** 1140 * Locate in this the last occurrence of the characters in <TT>text</TT> 1141 * starting at offset <TT>start</TT>, using bitwise comparison. 1142 * @param text The text to search for. 1143 * @param start The offset at which searching will start. 1144 * @return The offset into this of the start of <TT>text</TT>, 1145 * or -1 if not found. 1146 * @stable ICU 2.0 1147 */ 1148 inline int32_t lastIndexOf(const UnicodeString& text, 1149 int32_t start) const; 1150 1151 /** 1152 * Locate in this the last occurrence in the range 1153 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1154 * in <TT>text</TT>, using bitwise comparison. 1155 * @param text The text to search for. 1156 * @param start The offset at which searching will start. 1157 * @param length The number of characters to search 1158 * @return The offset into this of the start of <TT>text</TT>, 1159 * or -1 if not found. 1160 * @stable ICU 2.0 1161 */ 1162 inline int32_t lastIndexOf(const UnicodeString& text, 1163 int32_t start, 1164 int32_t length) const; 1165 1166 /** 1167 * Locate in this the last occurrence in the range 1168 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1169 * in <TT>srcText</TT> in the range 1170 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1171 * using bitwise comparison. 1172 * @param srcText The text to search for. 1173 * @param srcStart the offset into <TT>srcText</TT> at which 1174 * to start matching 1175 * @param srcLength the number of characters in <TT>srcText</TT> to match 1176 * @param start the offset into this at which to start matching 1177 * @param length the number of characters in this to search 1178 * @return The offset into this of the start of <TT>text</TT>, 1179 * or -1 if not found. 1180 * @stable ICU 2.0 1181 */ 1182 inline int32_t lastIndexOf(const UnicodeString& srcText, 1183 int32_t srcStart, 1184 int32_t srcLength, 1185 int32_t start, 1186 int32_t length) const; 1187 1188 /** 1189 * Locate in this the last occurrence of the characters in <TT>srcChars</TT> 1190 * starting at offset <TT>start</TT>, using bitwise comparison. 1191 * @param srcChars The text to search for. 1192 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1193 * @param start the offset into this at which to start matching 1194 * @return The offset into this of the start of <TT>text</TT>, 1195 * or -1 if not found. 1196 * @stable ICU 2.0 1197 */ 1198 inline int32_t lastIndexOf(const char16_t *srcChars, 1199 int32_t srcLength, 1200 int32_t start) const; 1201 1202 /** 1203 * Locate in this the last occurrence in the range 1204 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1205 * in <TT>srcChars</TT>, using bitwise comparison. 1206 * @param srcChars The text to search for. 1207 * @param srcLength the number of characters in <TT>srcChars</TT> 1208 * @param start The offset at which searching will start. 1209 * @param length The number of characters to search 1210 * @return The offset into this of the start of <TT>srcChars</TT>, 1211 * or -1 if not found. 1212 * @stable ICU 2.0 1213 */ 1214 inline int32_t lastIndexOf(ConstChar16Ptr srcChars, 1215 int32_t srcLength, 1216 int32_t start, 1217 int32_t length) const; 1218 1219 /** 1220 * Locate in this the last occurrence in the range 1221 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1222 * in <TT>srcChars</TT> in the range 1223 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1224 * using bitwise comparison. 1225 * @param srcChars The text to search for. 1226 * @param srcStart the offset into <TT>srcChars</TT> at which 1227 * to start matching 1228 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1229 * @param start the offset into this at which to start matching 1230 * @param length the number of characters in this to search 1231 * @return The offset into this of the start of <TT>text</TT>, 1232 * or -1 if not found. 1233 * @stable ICU 2.0 1234 */ 1235 int32_t lastIndexOf(const char16_t *srcChars, 1236 int32_t srcStart, 1237 int32_t srcLength, 1238 int32_t start, 1239 int32_t length) const; 1240 1241 /** 1242 * Locate in this the last occurrence of the BMP code point <code>c</code>, 1243 * using bitwise comparison. 1244 * @param c The code unit to search for. 1245 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1246 * @stable ICU 2.0 1247 */ 1248 inline int32_t lastIndexOf(char16_t c) const; 1249 1250 /** 1251 * Locate in this the last occurrence of the code point <TT>c</TT>, 1252 * using bitwise comparison. 1253 * 1254 * @param c The code point to search for. 1255 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1256 * @stable ICU 2.0 1257 */ 1258 inline int32_t lastIndexOf(UChar32 c) const; 1259 1260 /** 1261 * Locate in this the last occurrence of the BMP code point <code>c</code> 1262 * starting at offset <TT>start</TT>, using bitwise comparison. 1263 * @param c The code unit to search for. 1264 * @param start The offset at which searching will start. 1265 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1266 * @stable ICU 2.0 1267 */ 1268 inline int32_t lastIndexOf(char16_t c, 1269 int32_t start) const; 1270 1271 /** 1272 * Locate in this the last occurrence of the code point <TT>c</TT> 1273 * starting at offset <TT>start</TT>, using bitwise comparison. 1274 * 1275 * @param c The code point to search for. 1276 * @param start The offset at which searching will start. 1277 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1278 * @stable ICU 2.0 1279 */ 1280 inline int32_t lastIndexOf(UChar32 c, 1281 int32_t start) const; 1282 1283 /** 1284 * Locate in this the last occurrence of the BMP code point <code>c</code> 1285 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1286 * using bitwise comparison. 1287 * @param c The code unit to search for. 1288 * @param start the offset into this at which to start matching 1289 * @param length the number of characters in this to search 1290 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1291 * @stable ICU 2.0 1292 */ 1293 inline int32_t lastIndexOf(char16_t c, 1294 int32_t start, 1295 int32_t length) const; 1296 1297 /** 1298 * Locate in this the last occurrence of the code point <TT>c</TT> 1299 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1300 * using bitwise comparison. 1301 * 1302 * @param c The code point to search for. 1303 * @param start the offset into this at which to start matching 1304 * @param length the number of characters in this to search 1305 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1306 * @stable ICU 2.0 1307 */ 1308 inline int32_t lastIndexOf(UChar32 c, 1309 int32_t start, 1310 int32_t length) const; 1311 1312 1313 /* Character access */ 1314 1315 /** 1316 * Return the code unit at offset <tt>offset</tt>. 1317 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1318 * @param offset a valid offset into the text 1319 * @return the code unit at offset <tt>offset</tt> 1320 * or 0xffff if the offset is not valid for this string 1321 * @stable ICU 2.0 1322 */ 1323 inline char16_t charAt(int32_t offset) const; 1324 1325 /** 1326 * Return the code unit at offset <tt>offset</tt>. 1327 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1328 * @param offset a valid offset into the text 1329 * @return the code unit at offset <tt>offset</tt> 1330 * @stable ICU 2.0 1331 */ 1332 inline char16_t operator[] (int32_t offset) const; 1333 1334 /** 1335 * Return the code point that contains the code unit 1336 * at offset <tt>offset</tt>. 1337 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1338 * @param offset a valid offset into the text 1339 * that indicates the text offset of any of the code units 1340 * that will be assembled into a code point (21-bit value) and returned 1341 * @return the code point of text at <tt>offset</tt> 1342 * or 0xffff if the offset is not valid for this string 1343 * @stable ICU 2.0 1344 */ 1345 UChar32 char32At(int32_t offset) const; 1346 1347 /** 1348 * Adjust a random-access offset so that 1349 * it points to the beginning of a Unicode character. 1350 * The offset that is passed in points to 1351 * any code unit of a code point, 1352 * while the returned offset will point to the first code unit 1353 * of the same code point. 1354 * In UTF-16, if the input offset points to a second surrogate 1355 * of a surrogate pair, then the returned offset will point 1356 * to the first surrogate. 1357 * @param offset a valid offset into one code point of the text 1358 * @return offset of the first code unit of the same code point 1359 * @see U16_SET_CP_START 1360 * @stable ICU 2.0 1361 */ 1362 int32_t getChar32Start(int32_t offset) const; 1363 1364 /** 1365 * Adjust a random-access offset so that 1366 * it points behind a Unicode character. 1367 * The offset that is passed in points behind 1368 * any code unit of a code point, 1369 * while the returned offset will point behind the last code unit 1370 * of the same code point. 1371 * In UTF-16, if the input offset points behind the first surrogate 1372 * (i.e., to the second surrogate) 1373 * of a surrogate pair, then the returned offset will point 1374 * behind the second surrogate (i.e., to the first surrogate). 1375 * @param offset a valid offset after any code unit of a code point of the text 1376 * @return offset of the first code unit after the same code point 1377 * @see U16_SET_CP_LIMIT 1378 * @stable ICU 2.0 1379 */ 1380 int32_t getChar32Limit(int32_t offset) const; 1381 1382 /** 1383 * Move the code unit index along the string by delta code points. 1384 * Interpret the input index as a code unit-based offset into the string, 1385 * move the index forward or backward by delta code points, and 1386 * return the resulting index. 1387 * The input index should point to the first code unit of a code point, 1388 * if there is more than one. 1389 * 1390 * Both input and output indexes are code unit-based as for all 1391 * string indexes/offsets in ICU (and other libraries, like MBCS char*). 1392 * If delta<0 then the index is moved backward (toward the start of the string). 1393 * If delta>0 then the index is moved forward (toward the end of the string). 1394 * 1395 * This behaves like CharacterIterator::move32(delta, kCurrent). 1396 * 1397 * Behavior for out-of-bounds indexes: 1398 * <code>moveIndex32</code> pins the input index to 0..length(), i.e., 1399 * if the input index<0 then it is pinned to 0; 1400 * if it is index>length() then it is pinned to length(). 1401 * Afterwards, the index is moved by <code>delta</code> code points 1402 * forward or backward, 1403 * but no further backward than to 0 and no further forward than to length(). 1404 * The resulting index return value will be in between 0 and length(), inclusively. 1405 * 1406 * Examples: 1407 * <pre> 1408 * // s has code points 'a' U+10000 'b' U+10ffff U+2029 1409 * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape(); 1410 * 1411 * // initial index: position of U+10000 1412 * int32_t index=1; 1413 * 1414 * // the following examples will all result in index==4, position of U+10ffff 1415 * 1416 * // skip 2 code points from some position in the string 1417 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b' 1418 * 1419 * // go to the 3rd code point from the start of s (0-based) 1420 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b' 1421 * 1422 * // go to the next-to-last code point of s 1423 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff 1424 * </pre> 1425 * 1426 * @param index input code unit index 1427 * @param delta (signed) code point count to move the index forward or backward 1428 * in the string 1429 * @return the resulting code unit index 1430 * @stable ICU 2.0 1431 */ 1432 int32_t moveIndex32(int32_t index, int32_t delta) const; 1433 1434 /* Substring extraction */ 1435 1436 /** 1437 * Copy the characters in the range 1438 * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>, 1439 * beginning at <tt>dstStart</tt>. 1440 * If the string aliases to <code>dst</code> itself as an external buffer, 1441 * then extract() will not copy the contents. 1442 * 1443 * @param start offset of first character which will be copied into the array 1444 * @param length the number of characters to extract 1445 * @param dst array in which to copy characters. The length of <tt>dst</tt> 1446 * must be at least (<tt>dstStart + length</tt>). 1447 * @param dstStart the offset in <TT>dst</TT> where the first character 1448 * will be extracted 1449 * @stable ICU 2.0 1450 */ 1451 inline void extract(int32_t start, 1452 int32_t length, 1453 Char16Ptr dst, 1454 int32_t dstStart = 0) const; 1455 1456 /** 1457 * Copy the contents of the string into dest. 1458 * This is a convenience function that 1459 * checks if there is enough space in dest, 1460 * extracts the entire string if possible, 1461 * and NUL-terminates dest if possible. 1462 * 1463 * If the string fits into dest but cannot be NUL-terminated 1464 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. 1465 * If the string itself does not fit into dest 1466 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. 1467 * 1468 * If the string aliases to <code>dest</code> itself as an external buffer, 1469 * then extract() will not copy the contents. 1470 * 1471 * @param dest Destination string buffer. 1472 * @param destCapacity Number of char16_ts available at dest. 1473 * @param errorCode ICU error code. 1474 * @return length() 1475 * @stable ICU 2.0 1476 */ 1477 int32_t 1478 extract(Char16Ptr dest, int32_t destCapacity, 1479 UErrorCode &errorCode) const; 1480 1481 /** 1482 * Copy the characters in the range 1483 * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString 1484 * <tt>target</tt>. 1485 * @param start offset of first character which will be copied 1486 * @param length the number of characters to extract 1487 * @param target UnicodeString into which to copy characters. 1488 * @return A reference to <TT>target</TT> 1489 * @stable ICU 2.0 1490 */ 1491 inline void extract(int32_t start, 1492 int32_t length, 1493 UnicodeString& target) const; 1494 1495 /** 1496 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 1497 * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>. 1498 * @param start offset of first character which will be copied into the array 1499 * @param limit offset immediately following the last character to be copied 1500 * @param dst array in which to copy characters. The length of <tt>dst</tt> 1501 * must be at least (<tt>dstStart + (limit - start)</tt>). 1502 * @param dstStart the offset in <TT>dst</TT> where the first character 1503 * will be extracted 1504 * @stable ICU 2.0 1505 */ 1506 inline void extractBetween(int32_t start, 1507 int32_t limit, 1508 char16_t *dst, 1509 int32_t dstStart = 0) const; 1510 1511 /** 1512 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 1513 * into the UnicodeString <tt>target</tt>. Replaceable API. 1514 * @param start offset of first character which will be copied 1515 * @param limit offset immediately following the last character to be copied 1516 * @param target UnicodeString into which to copy characters. 1517 * @return A reference to <TT>target</TT> 1518 * @stable ICU 2.0 1519 */ 1520 virtual void extractBetween(int32_t start, 1521 int32_t limit, 1522 UnicodeString& target) const; 1523 1524 /** 1525 * Copy the characters in the range 1526 * [<tt>start</TT>, <tt>start + startLength</TT>) into an array of characters. 1527 * All characters must be invariant (see utypes.h). 1528 * Use US_INV as the last, signature-distinguishing parameter. 1529 * 1530 * This function does not write any more than <code>targetCapacity</code> 1531 * characters but returns the length of the entire output string 1532 * so that one can allocate a larger buffer and call the function again 1533 * if necessary. 1534 * The output string is NUL-terminated if possible. 1535 * 1536 * @param start offset of first character which will be copied 1537 * @param startLength the number of characters to extract 1538 * @param target the target buffer for extraction, can be NULL 1539 * if targetLength is 0 1540 * @param targetCapacity the length of the target buffer 1541 * @param inv Signature-distinguishing paramater, use US_INV. 1542 * @return the output string length, not including the terminating NUL 1543 * @stable ICU 3.2 1544 */ 1545 int32_t extract(int32_t start, 1546 int32_t startLength, 1547 char *target, 1548 int32_t targetCapacity, 1549 enum EInvariant inv) const; 1550 1551 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 1552 1553 /** 1554 * Copy the characters in the range 1555 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1556 * in the platform's default codepage. 1557 * This function does not write any more than <code>targetLength</code> 1558 * characters but returns the length of the entire output string 1559 * so that one can allocate a larger buffer and call the function again 1560 * if necessary. 1561 * The output string is NUL-terminated if possible. 1562 * 1563 * @param start offset of first character which will be copied 1564 * @param startLength the number of characters to extract 1565 * @param target the target buffer for extraction 1566 * @param targetLength the length of the target buffer 1567 * If <TT>target</TT> is NULL, then the number of bytes required for 1568 * <TT>target</TT> is returned. 1569 * @return the output string length, not including the terminating NUL 1570 * @stable ICU 2.0 1571 */ 1572 int32_t extract(int32_t start, 1573 int32_t startLength, 1574 char *target, 1575 uint32_t targetLength) const; 1576 1577 #endif 1578 1579 #if !UCONFIG_NO_CONVERSION 1580 1581 /** 1582 * Copy the characters in the range 1583 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1584 * in a specified codepage. 1585 * The output string is NUL-terminated. 1586 * 1587 * Recommendation: For invariant-character strings use 1588 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1589 * because it avoids object code dependencies of UnicodeString on 1590 * the conversion code. 1591 * 1592 * @param start offset of first character which will be copied 1593 * @param startLength the number of characters to extract 1594 * @param target the target buffer for extraction 1595 * @param codepage the desired codepage for the characters. 0 has 1596 * the special meaning of the default codepage 1597 * If <code>codepage</code> is an empty string (<code>""</code>), 1598 * then a simple conversion is performed on the codepage-invariant 1599 * subset ("invariant characters") of the platform encoding. See utypes.h. 1600 * If <TT>target</TT> is NULL, then the number of bytes required for 1601 * <TT>target</TT> is returned. It is assumed that the target is big enough 1602 * to fit all of the characters. 1603 * @return the output string length, not including the terminating NUL 1604 * @stable ICU 2.0 1605 */ 1606 inline int32_t extract(int32_t start, 1607 int32_t startLength, 1608 char *target, 1609 const char *codepage = 0) const; 1610 1611 /** 1612 * Copy the characters in the range 1613 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1614 * in a specified codepage. 1615 * This function does not write any more than <code>targetLength</code> 1616 * characters but returns the length of the entire output string 1617 * so that one can allocate a larger buffer and call the function again 1618 * if necessary. 1619 * The output string is NUL-terminated if possible. 1620 * 1621 * Recommendation: For invariant-character strings use 1622 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1623 * because it avoids object code dependencies of UnicodeString on 1624 * the conversion code. 1625 * 1626 * @param start offset of first character which will be copied 1627 * @param startLength the number of characters to extract 1628 * @param target the target buffer for extraction 1629 * @param targetLength the length of the target buffer 1630 * @param codepage the desired codepage for the characters. 0 has 1631 * the special meaning of the default codepage 1632 * If <code>codepage</code> is an empty string (<code>""</code>), 1633 * then a simple conversion is performed on the codepage-invariant 1634 * subset ("invariant characters") of the platform encoding. See utypes.h. 1635 * If <TT>target</TT> is NULL, then the number of bytes required for 1636 * <TT>target</TT> is returned. 1637 * @return the output string length, not including the terminating NUL 1638 * @stable ICU 2.0 1639 */ 1640 int32_t extract(int32_t start, 1641 int32_t startLength, 1642 char *target, 1643 uint32_t targetLength, 1644 const char *codepage) const; 1645 1646 /** 1647 * Convert the UnicodeString into a codepage string using an existing UConverter. 1648 * The output string is NUL-terminated if possible. 1649 * 1650 * This function avoids the overhead of opening and closing a converter if 1651 * multiple strings are extracted. 1652 * 1653 * @param dest destination string buffer, can be NULL if destCapacity==0 1654 * @param destCapacity the number of chars available at dest 1655 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), 1656 * or NULL for the default converter 1657 * @param errorCode normal ICU error code 1658 * @return the length of the output string, not counting the terminating NUL; 1659 * if the length is greater than destCapacity, then the string will not fit 1660 * and a buffer of the indicated length would need to be passed in 1661 * @stable ICU 2.0 1662 */ 1663 int32_t extract(char *dest, int32_t destCapacity, 1664 UConverter *cnv, 1665 UErrorCode &errorCode) const; 1666 1667 #endif 1668 1669 /** 1670 * Create a temporary substring for the specified range. 1671 * Unlike the substring constructor and setTo() functions, 1672 * the object returned here will be a read-only alias (using getBuffer()) 1673 * rather than copying the text. 1674 * As a result, this substring operation is much faster but requires 1675 * that the original string not be modified or deleted during the lifetime 1676 * of the returned substring object. 1677 * @param start offset of the first character visible in the substring 1678 * @param length length of the substring 1679 * @return a read-only alias UnicodeString object for the substring 1680 * @stable ICU 4.4 1681 */ 1682 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; 1683 1684 /** 1685 * Create a temporary substring for the specified range. 1686 * Same as tempSubString(start, length) except that the substring range 1687 * is specified as a (start, limit) pair (with an exclusive limit index) 1688 * rather than a (start, length) pair. 1689 * @param start offset of the first character visible in the substring 1690 * @param limit offset immediately following the last character visible in the substring 1691 * @return a read-only alias UnicodeString object for the substring 1692 * @stable ICU 4.4 1693 */ 1694 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; 1695 1696 /** 1697 * Convert the UnicodeString to UTF-8 and write the result 1698 * to a ByteSink. This is called by toUTF8String(). 1699 * Unpaired surrogates are replaced with U+FFFD. 1700 * Calls u_strToUTF8WithSub(). 1701 * 1702 * @param sink A ByteSink to which the UTF-8 version of the string is written. 1703 * sink.Flush() is called at the end. 1704 * @stable ICU 4.2 1705 * @see toUTF8String 1706 */ 1707 void toUTF8(ByteSink &sink) const; 1708 1709 /** 1710 * Convert the UnicodeString to UTF-8 and append the result 1711 * to a standard string. 1712 * Unpaired surrogates are replaced with U+FFFD. 1713 * Calls toUTF8(). 1714 * 1715 * @param result A standard string (or a compatible object) 1716 * to which the UTF-8 version of the string is appended. 1717 * @return The string object. 1718 * @stable ICU 4.2 1719 * @see toUTF8 1720 */ 1721 template<typename StringClass> 1722 StringClass &toUTF8String(StringClass &result) const { 1723 StringByteSink<StringClass> sbs(&result, length()); 1724 toUTF8(sbs); 1725 return result; 1726 } 1727 1728 /** 1729 * Convert the UnicodeString to UTF-32. 1730 * Unpaired surrogates are replaced with U+FFFD. 1731 * Calls u_strToUTF32WithSub(). 1732 * 1733 * @param utf32 destination string buffer, can be NULL if capacity==0 1734 * @param capacity the number of UChar32s available at utf32 1735 * @param errorCode Standard ICU error code. Its input value must 1736 * pass the U_SUCCESS() test, or else the function returns 1737 * immediately. Check for U_FAILURE() on output or use with 1738 * function chaining. (See User Guide for details.) 1739 * @return The length of the UTF-32 string. 1740 * @see fromUTF32 1741 * @stable ICU 4.2 1742 */ 1743 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; 1744 1745 /* Length operations */ 1746 1747 /** 1748 * Return the length of the UnicodeString object. 1749 * The length is the number of char16_t code units are in the UnicodeString. 1750 * If you want the number of code points, please use countChar32(). 1751 * @return the length of the UnicodeString object 1752 * @see countChar32 1753 * @stable ICU 2.0 1754 */ 1755 inline int32_t length(void) const; 1756 1757 /** 1758 * Count Unicode code points in the length char16_t code units of the string. 1759 * A code point may occupy either one or two char16_t code units. 1760 * Counting code points involves reading all code units. 1761 * 1762 * This functions is basically the inverse of moveIndex32(). 1763 * 1764 * @param start the index of the first code unit to check 1765 * @param length the number of char16_t code units to check 1766 * @return the number of code points in the specified code units 1767 * @see length 1768 * @stable ICU 2.0 1769 */ 1770 int32_t 1771 countChar32(int32_t start=0, int32_t length=INT32_MAX) const; 1772 1773 /** 1774 * Check if the length char16_t code units of the string 1775 * contain more Unicode code points than a certain number. 1776 * This is more efficient than counting all code points in this part of the string 1777 * and comparing that number with a threshold. 1778 * This function may not need to scan the string at all if the length 1779 * falls within a certain range, and 1780 * never needs to count more than 'number+1' code points. 1781 * Logically equivalent to (countChar32(start, length)>number). 1782 * A Unicode code point may occupy either one or two char16_t code units. 1783 * 1784 * @param start the index of the first code unit to check (0 for the entire string) 1785 * @param length the number of char16_t code units to check 1786 * (use INT32_MAX for the entire string; remember that start/length 1787 * values are pinned) 1788 * @param number The number of code points in the (sub)string is compared against 1789 * the 'number' parameter. 1790 * @return Boolean value for whether the string contains more Unicode code points 1791 * than 'number'. Same as (u_countChar32(s, length)>number). 1792 * @see countChar32 1793 * @see u_strHasMoreChar32Than 1794 * @stable ICU 2.4 1795 */ 1796 UBool 1797 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; 1798 1799 /** 1800 * Determine if this string is empty. 1801 * @return TRUE if this string contains 0 characters, FALSE otherwise. 1802 * @stable ICU 2.0 1803 */ 1804 inline UBool isEmpty(void) const; 1805 1806 /** 1807 * Return the capacity of the internal buffer of the UnicodeString object. 1808 * This is useful together with the getBuffer functions. 1809 * See there for details. 1810 * 1811 * @return the number of char16_ts available in the internal buffer 1812 * @see getBuffer 1813 * @stable ICU 2.0 1814 */ 1815 inline int32_t getCapacity(void) const; 1816 1817 /* Other operations */ 1818 1819 /** 1820 * Generate a hash code for this object. 1821 * @return The hash code of this UnicodeString. 1822 * @stable ICU 2.0 1823 */ 1824 inline int32_t hashCode(void) const; 1825 1826 /** 1827 * Determine if this object contains a valid string. 1828 * A bogus string has no value. It is different from an empty string, 1829 * although in both cases isEmpty() returns TRUE and length() returns 0. 1830 * setToBogus() and isBogus() can be used to indicate that no string value is available. 1831 * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and 1832 * length() returns 0. 1833 * 1834 * @return TRUE if the string is bogus/invalid, FALSE otherwise 1835 * @see setToBogus() 1836 * @stable ICU 2.0 1837 */ 1838 inline UBool isBogus(void) const; 1839 1840 1841 //======================================== 1842 // Write operations 1843 //======================================== 1844 1845 /* Assignment operations */ 1846 1847 /** 1848 * Assignment operator. Replace the characters in this UnicodeString 1849 * with the characters from <TT>srcText</TT>. 1850 * 1851 * Starting with ICU 2.4, the assignment operator and the copy constructor 1852 * allocate a new buffer and copy the buffer contents even for readonly aliases. 1853 * By contrast, the fastCopyFrom() function implements the old, 1854 * more efficient but less safe behavior 1855 * of making this string also a readonly alias to the same buffer. 1856 * 1857 * If the source object has an "open" buffer from getBuffer(minCapacity), 1858 * then the copy is an empty string. 1859 * 1860 * @param srcText The text containing the characters to replace 1861 * @return a reference to this 1862 * @stable ICU 2.0 1863 * @see fastCopyFrom 1864 */ 1865 UnicodeString &operator=(const UnicodeString &srcText); 1866 1867 /** 1868 * Almost the same as the assignment operator. 1869 * Replace the characters in this UnicodeString 1870 * with the characters from <code>srcText</code>. 1871 * 1872 * This function works the same as the assignment operator 1873 * for all strings except for ones that are readonly aliases. 1874 * 1875 * Starting with ICU 2.4, the assignment operator and the copy constructor 1876 * allocate a new buffer and copy the buffer contents even for readonly aliases. 1877 * This function implements the old, more efficient but less safe behavior 1878 * of making this string also a readonly alias to the same buffer. 1879 * 1880 * The fastCopyFrom function must be used only if it is known that the lifetime of 1881 * this UnicodeString does not exceed the lifetime of the aliased buffer 1882 * including its contents, for example for strings from resource bundles 1883 * or aliases to string constants. 1884 * 1885 * If the source object has an "open" buffer from getBuffer(minCapacity), 1886 * then the copy is an empty string. 1887 * 1888 * @param src The text containing the characters to replace. 1889 * @return a reference to this 1890 * @stable ICU 2.4 1891 */ 1892 UnicodeString &fastCopyFrom(const UnicodeString &src); 1893 1894 /** 1895 * Move assignment operator, might leave src in bogus state. 1896 * This string will have the same contents and state that the source string had. 1897 * The behavior is undefined if *this and src are the same object. 1898 * @param src source string 1899 * @return *this 1900 * @stable ICU 56 1901 */ 1902 UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT { 1903 return moveFrom(src); 1904 } 1905 1906 // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API 1907 /** 1908 * Move assignment, might leave src in bogus state. 1909 * This string will have the same contents and state that the source string had. 1910 * The behavior is undefined if *this and src are the same object. 1911 * 1912 * Can be called explicitly, does not need C++11 support. 1913 * @param src source string 1914 * @return *this 1915 * @draft ICU 56 1916 */ 1917 UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT; 1918 1919 /** 1920 * Swap strings. 1921 * @param other other string 1922 * @stable ICU 56 1923 */ 1924 void swap(UnicodeString &other) U_NOEXCEPT; 1925 1926 /** 1927 * Non-member UnicodeString swap function. 1928 * @param s1 will get s2's contents and state 1929 * @param s2 will get s1's contents and state 1930 * @stable ICU 56 1931 */ 1932 friend U_COMMON_API inline void U_EXPORT2 1933 swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT { 1934 s1.swap(s2); 1935 } 1936 1937 /** 1938 * Assignment operator. Replace the characters in this UnicodeString 1939 * with the code unit <TT>ch</TT>. 1940 * @param ch the code unit to replace 1941 * @return a reference to this 1942 * @stable ICU 2.0 1943 */ 1944 inline UnicodeString& operator= (char16_t ch); 1945 1946 /** 1947 * Assignment operator. Replace the characters in this UnicodeString 1948 * with the code point <TT>ch</TT>. 1949 * @param ch the code point to replace 1950 * @return a reference to this 1951 * @stable ICU 2.0 1952 */ 1953 inline UnicodeString& operator= (UChar32 ch); 1954 1955 /** 1956 * Set the text in the UnicodeString object to the characters 1957 * in <TT>srcText</TT> in the range 1958 * [<TT>srcStart</TT>, <TT>srcText.length()</TT>). 1959 * <TT>srcText</TT> is not modified. 1960 * @param srcText the source for the new characters 1961 * @param srcStart the offset into <TT>srcText</TT> where new characters 1962 * will be obtained 1963 * @return a reference to this 1964 * @stable ICU 2.2 1965 */ 1966 inline UnicodeString& setTo(const UnicodeString& srcText, 1967 int32_t srcStart); 1968 1969 /** 1970 * Set the text in the UnicodeString object to the characters 1971 * in <TT>srcText</TT> in the range 1972 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 1973 * <TT>srcText</TT> is not modified. 1974 * @param srcText the source for the new characters 1975 * @param srcStart the offset into <TT>srcText</TT> where new characters 1976 * will be obtained 1977 * @param srcLength the number of characters in <TT>srcText</TT> in the 1978 * replace string. 1979 * @return a reference to this 1980 * @stable ICU 2.0 1981 */ 1982 inline UnicodeString& setTo(const UnicodeString& srcText, 1983 int32_t srcStart, 1984 int32_t srcLength); 1985 1986 /** 1987 * Set the text in the UnicodeString object to the characters in 1988 * <TT>srcText</TT>. 1989 * <TT>srcText</TT> is not modified. 1990 * @param srcText the source for the new characters 1991 * @return a reference to this 1992 * @stable ICU 2.0 1993 */ 1994 inline UnicodeString& setTo(const UnicodeString& srcText); 1995 1996 /** 1997 * Set the characters in the UnicodeString object to the characters 1998 * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. 1999 * @param srcChars the source for the new characters 2000 * @param srcLength the number of Unicode characters in srcChars. 2001 * @return a reference to this 2002 * @stable ICU 2.0 2003 */ 2004 inline UnicodeString& setTo(const char16_t *srcChars, 2005 int32_t srcLength); 2006 2007 /** 2008 * Set the characters in the UnicodeString object to the code unit 2009 * <TT>srcChar</TT>. 2010 * @param srcChar the code unit which becomes the UnicodeString's character 2011 * content 2012 * @return a reference to this 2013 * @stable ICU 2.0 2014 */ 2015 UnicodeString& setTo(char16_t srcChar); 2016 2017 /** 2018 * Set the characters in the UnicodeString object to the code point 2019 * <TT>srcChar</TT>. 2020 * @param srcChar the code point which becomes the UnicodeString's character 2021 * content 2022 * @return a reference to this 2023 * @stable ICU 2.0 2024 */ 2025 UnicodeString& setTo(UChar32 srcChar); 2026 2027 /** 2028 * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor. 2029 * The text will be used for the UnicodeString object, but 2030 * it will not be released when the UnicodeString is destroyed. 2031 * This has copy-on-write semantics: 2032 * When the string is modified, then the buffer is first copied into 2033 * newly allocated memory. 2034 * The aliased buffer is never modified. 2035 * 2036 * In an assignment to another UnicodeString, when using the copy constructor 2037 * or the assignment operator, the text will be copied. 2038 * When using fastCopyFrom(), the text will be aliased again, 2039 * so that both strings then alias the same readonly-text. 2040 * 2041 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. 2042 * This must be true if <code>textLength==-1</code>. 2043 * @param text The characters to alias for the UnicodeString. 2044 * @param textLength The number of Unicode characters in <code>text</code> to alias. 2045 * If -1, then this constructor will determine the length 2046 * by calling <code>u_strlen()</code>. 2047 * @return a reference to this 2048 * @stable ICU 2.0 2049 */ 2050 UnicodeString &setTo(UBool isTerminated, 2051 ConstChar16Ptr text, 2052 int32_t textLength); 2053 2054 /** 2055 * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor. 2056 * The text will be used for the UnicodeString object, but 2057 * it will not be released when the UnicodeString is destroyed. 2058 * This has write-through semantics: 2059 * For as long as the capacity of the buffer is sufficient, write operations 2060 * will directly affect the buffer. When more capacity is necessary, then 2061 * a new buffer will be allocated and the contents copied as with regularly 2062 * constructed strings. 2063 * In an assignment to another UnicodeString, the buffer will be copied. 2064 * The extract(Char16Ptr dst) function detects whether the dst pointer is the same 2065 * as the string buffer itself and will in this case not copy the contents. 2066 * 2067 * @param buffer The characters to alias for the UnicodeString. 2068 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. 2069 * @param buffCapacity The size of <code>buffer</code> in char16_ts. 2070 * @return a reference to this 2071 * @stable ICU 2.0 2072 */ 2073 UnicodeString &setTo(char16_t *buffer, 2074 int32_t buffLength, 2075 int32_t buffCapacity); 2076 2077 /** 2078 * Make this UnicodeString object invalid. 2079 * The string will test TRUE with isBogus(). 2080 * 2081 * A bogus string has no value. It is different from an empty string. 2082 * It can be used to indicate that no string value is available. 2083 * getBuffer() and getTerminatedBuffer() return NULL, and 2084 * length() returns 0. 2085 * 2086 * This utility function is used throughout the UnicodeString 2087 * implementation to indicate that a UnicodeString operation failed, 2088 * and may be used in other functions, 2089 * especially but not exclusively when such functions do not 2090 * take a UErrorCode for simplicity. 2091 * 2092 * The following methods, and no others, will clear a string object's bogus flag: 2093 * - remove() 2094 * - remove(0, INT32_MAX) 2095 * - truncate(0) 2096 * - operator=() (assignment operator) 2097 * - setTo(...) 2098 * 2099 * The simplest ways to turn a bogus string into an empty one 2100 * is to use the remove() function. 2101 * Examples for other functions that are equivalent to "set to empty string": 2102 * \code 2103 * if(s.isBogus()) { 2104 * s.remove(); // set to an empty string (remove all), or 2105 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or 2106 * s.truncate(0); // set to an empty string (complete truncation), or 2107 * s=UnicodeString(); // assign an empty string, or 2108 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or 2109 * static const char16_t nul=0; 2110 * s.setTo(&nul, 0); // set to an empty C Unicode string 2111 * } 2112 * \endcode 2113 * 2114 * @see isBogus() 2115 * @stable ICU 2.0 2116 */ 2117 void setToBogus(); 2118 2119 /** 2120 * Set the character at the specified offset to the specified character. 2121 * @param offset A valid offset into the text of the character to set 2122 * @param ch The new character 2123 * @return A reference to this 2124 * @stable ICU 2.0 2125 */ 2126 UnicodeString& setCharAt(int32_t offset, 2127 char16_t ch); 2128 2129 2130 /* Append operations */ 2131 2132 /** 2133 * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString 2134 * object. 2135 * @param ch the code unit to be appended 2136 * @return a reference to this 2137 * @stable ICU 2.0 2138 */ 2139 inline UnicodeString& operator+= (char16_t ch); 2140 2141 /** 2142 * Append operator. Append the code point <TT>ch</TT> to the UnicodeString 2143 * object. 2144 * @param ch the code point to be appended 2145 * @return a reference to this 2146 * @stable ICU 2.0 2147 */ 2148 inline UnicodeString& operator+= (UChar32 ch); 2149 2150 /** 2151 * Append operator. Append the characters in <TT>srcText</TT> to the 2152 * UnicodeString object. <TT>srcText</TT> is not modified. 2153 * @param srcText the source for the new characters 2154 * @return a reference to this 2155 * @stable ICU 2.0 2156 */ 2157 inline UnicodeString& operator+= (const UnicodeString& srcText); 2158 2159 /** 2160 * Append the characters 2161 * in <TT>srcText</TT> in the range 2162 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the 2163 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> 2164 * is not modified. 2165 * @param srcText the source for the new characters 2166 * @param srcStart the offset into <TT>srcText</TT> where new characters 2167 * will be obtained 2168 * @param srcLength the number of characters in <TT>srcText</TT> in 2169 * the append string 2170 * @return a reference to this 2171 * @stable ICU 2.0 2172 */ 2173 inline UnicodeString& append(const UnicodeString& srcText, 2174 int32_t srcStart, 2175 int32_t srcLength); 2176 2177 /** 2178 * Append the characters in <TT>srcText</TT> to the UnicodeString object. 2179 * <TT>srcText</TT> is not modified. 2180 * @param srcText the source for the new characters 2181 * @return a reference to this 2182 * @stable ICU 2.0 2183 */ 2184 inline UnicodeString& append(const UnicodeString& srcText); 2185 2186 /** 2187 * Append the characters in <TT>srcChars</TT> in the range 2188 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString 2189 * object at offset 2190 * <TT>start</TT>. <TT>srcChars</TT> is not modified. 2191 * @param srcChars the source for the new characters 2192 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2193 * will be obtained 2194 * @param srcLength the number of characters in <TT>srcChars</TT> in 2195 * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated 2196 * @return a reference to this 2197 * @stable ICU 2.0 2198 */ 2199 inline UnicodeString& append(const char16_t *srcChars, 2200 int32_t srcStart, 2201 int32_t srcLength); 2202 2203 /** 2204 * Append the characters in <TT>srcChars</TT> to the UnicodeString object 2205 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2206 * @param srcChars the source for the new characters 2207 * @param srcLength the number of Unicode characters in <TT>srcChars</TT>; 2208 * can be -1 if <TT>srcChars</TT> is NUL-terminated 2209 * @return a reference to this 2210 * @stable ICU 2.0 2211 */ 2212 inline UnicodeString& append(ConstChar16Ptr srcChars, 2213 int32_t srcLength); 2214 2215 /** 2216 * Append the code unit <TT>srcChar</TT> to the UnicodeString object. 2217 * @param srcChar the code unit to append 2218 * @return a reference to this 2219 * @stable ICU 2.0 2220 */ 2221 inline UnicodeString& append(char16_t srcChar); 2222 2223 /** 2224 * Append the code point <TT>srcChar</TT> to the UnicodeString object. 2225 * @param srcChar the code point to append 2226 * @return a reference to this 2227 * @stable ICU 2.0 2228 */ 2229 UnicodeString& append(UChar32 srcChar); 2230 2231 2232 /* Insert operations */ 2233 2234 /** 2235 * Insert the characters in <TT>srcText</TT> in the range 2236 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 2237 * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified. 2238 * @param start the offset where the insertion begins 2239 * @param srcText the source for the new characters 2240 * @param srcStart the offset into <TT>srcText</TT> where new characters 2241 * will be obtained 2242 * @param srcLength the number of characters in <TT>srcText</TT> in 2243 * the insert string 2244 * @return a reference to this 2245 * @stable ICU 2.0 2246 */ 2247 inline UnicodeString& insert(int32_t start, 2248 const UnicodeString& srcText, 2249 int32_t srcStart, 2250 int32_t srcLength); 2251 2252 /** 2253 * Insert the characters in <TT>srcText</TT> into the UnicodeString object 2254 * at offset <TT>start</TT>. <TT>srcText</TT> is not modified. 2255 * @param start the offset where the insertion begins 2256 * @param srcText the source for the new characters 2257 * @return a reference to this 2258 * @stable ICU 2.0 2259 */ 2260 inline UnicodeString& insert(int32_t start, 2261 const UnicodeString& srcText); 2262 2263 /** 2264 * Insert the characters in <TT>srcChars</TT> in the range 2265 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 2266 * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2267 * @param start the offset at which the insertion begins 2268 * @param srcChars the source for the new characters 2269 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2270 * will be obtained 2271 * @param srcLength the number of characters in <TT>srcChars</TT> 2272 * in the insert string 2273 * @return a reference to this 2274 * @stable ICU 2.0 2275 */ 2276 inline UnicodeString& insert(int32_t start, 2277 const char16_t *srcChars, 2278 int32_t srcStart, 2279 int32_t srcLength); 2280 2281 /** 2282 * Insert the characters in <TT>srcChars</TT> into the UnicodeString object 2283 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2284 * @param start the offset where the insertion begins 2285 * @param srcChars the source for the new characters 2286 * @param srcLength the number of Unicode characters in srcChars. 2287 * @return a reference to this 2288 * @stable ICU 2.0 2289 */ 2290 inline UnicodeString& insert(int32_t start, 2291 ConstChar16Ptr srcChars, 2292 int32_t srcLength); 2293 2294 /** 2295 * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at 2296 * offset <TT>start</TT>. 2297 * @param start the offset at which the insertion occurs 2298 * @param srcChar the code unit to insert 2299 * @return a reference to this 2300 * @stable ICU 2.0 2301 */ 2302 inline UnicodeString& insert(int32_t start, 2303 char16_t srcChar); 2304 2305 /** 2306 * Insert the code point <TT>srcChar</TT> into the UnicodeString object at 2307 * offset <TT>start</TT>. 2308 * @param start the offset at which the insertion occurs 2309 * @param srcChar the code point to insert 2310 * @return a reference to this 2311 * @stable ICU 2.0 2312 */ 2313 inline UnicodeString& insert(int32_t start, 2314 UChar32 srcChar); 2315 2316 2317 /* Replace operations */ 2318 2319 /** 2320 * Replace the characters in the range 2321 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2322 * <TT>srcText</TT> in the range 2323 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 2324 * <TT>srcText</TT> is not modified. 2325 * @param start the offset at which the replace operation begins 2326 * @param length the number of characters to replace. The character at 2327 * <TT>start + length</TT> is not modified. 2328 * @param srcText the source for the new characters 2329 * @param srcStart the offset into <TT>srcText</TT> where new characters 2330 * will be obtained 2331 * @param srcLength the number of characters in <TT>srcText</TT> in 2332 * the replace string 2333 * @return a reference to this 2334 * @stable ICU 2.0 2335 */ 2336 UnicodeString& replace(int32_t start, 2337 int32_t length, 2338 const UnicodeString& srcText, 2339 int32_t srcStart, 2340 int32_t srcLength); 2341 2342 /** 2343 * Replace the characters in the range 2344 * [<TT>start</TT>, <TT>start + length</TT>) 2345 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is 2346 * not modified. 2347 * @param start the offset at which the replace operation begins 2348 * @param length the number of characters to replace. The character at 2349 * <TT>start + length</TT> is not modified. 2350 * @param srcText the source for the new characters 2351 * @return a reference to this 2352 * @stable ICU 2.0 2353 */ 2354 UnicodeString& replace(int32_t start, 2355 int32_t length, 2356 const UnicodeString& srcText); 2357 2358 /** 2359 * Replace the characters in the range 2360 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2361 * <TT>srcChars</TT> in the range 2362 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT> 2363 * is not modified. 2364 * @param start the offset at which the replace operation begins 2365 * @param length the number of characters to replace. The character at 2366 * <TT>start + length</TT> is not modified. 2367 * @param srcChars the source for the new characters 2368 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2369 * will be obtained 2370 * @param srcLength the number of characters in <TT>srcChars</TT> 2371 * in the replace string 2372 * @return a reference to this 2373 * @stable ICU 2.0 2374 */ 2375 UnicodeString& replace(int32_t start, 2376 int32_t length, 2377 const char16_t *srcChars, 2378 int32_t srcStart, 2379 int32_t srcLength); 2380 2381 /** 2382 * Replace the characters in the range 2383 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2384 * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. 2385 * @param start the offset at which the replace operation begins 2386 * @param length number of characters to replace. The character at 2387 * <TT>start + length</TT> is not modified. 2388 * @param srcChars the source for the new characters 2389 * @param srcLength the number of Unicode characters in srcChars 2390 * @return a reference to this 2391 * @stable ICU 2.0 2392 */ 2393 inline UnicodeString& replace(int32_t start, 2394 int32_t length, 2395 ConstChar16Ptr srcChars, 2396 int32_t srcLength); 2397 2398 /** 2399 * Replace the characters in the range 2400 * [<TT>start</TT>, <TT>start + length</TT>) with the code unit 2401 * <TT>srcChar</TT>. 2402 * @param start the offset at which the replace operation begins 2403 * @param length the number of characters to replace. The character at 2404 * <TT>start + length</TT> is not modified. 2405 * @param srcChar the new code unit 2406 * @return a reference to this 2407 * @stable ICU 2.0 2408 */ 2409 inline UnicodeString& replace(int32_t start, 2410 int32_t length, 2411 char16_t srcChar); 2412 2413 /** 2414 * Replace the characters in the range 2415 * [<TT>start</TT>, <TT>start + length</TT>) with the code point 2416 * <TT>srcChar</TT>. 2417 * @param start the offset at which the replace operation begins 2418 * @param length the number of characters to replace. The character at 2419 * <TT>start + length</TT> is not modified. 2420 * @param srcChar the new code point 2421 * @return a reference to this 2422 * @stable ICU 2.0 2423 */ 2424 UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); 2425 2426 /** 2427 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 2428 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified. 2429 * @param start the offset at which the replace operation begins 2430 * @param limit the offset immediately following the replace range 2431 * @param srcText the source for the new characters 2432 * @return a reference to this 2433 * @stable ICU 2.0 2434 */ 2435 inline UnicodeString& replaceBetween(int32_t start, 2436 int32_t limit, 2437 const UnicodeString& srcText); 2438 2439 /** 2440 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 2441 * with the characters in <TT>srcText</TT> in the range 2442 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified. 2443 * @param start the offset at which the replace operation begins 2444 * @param limit the offset immediately following the replace range 2445 * @param srcText the source for the new characters 2446 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2447 * will be obtained 2448 * @param srcLimit the offset immediately following the range to copy 2449 * in <TT>srcText</TT> 2450 * @return a reference to this 2451 * @stable ICU 2.0 2452 */ 2453 inline UnicodeString& replaceBetween(int32_t start, 2454 int32_t limit, 2455 const UnicodeString& srcText, 2456 int32_t srcStart, 2457 int32_t srcLimit); 2458 2459 /** 2460 * Replace a substring of this object with the given text. 2461 * @param start the beginning index, inclusive; <code>0 <= start 2462 * <= limit</code>. 2463 * @param limit the ending index, exclusive; <code>start <= limit 2464 * <= length()</code>. 2465 * @param text the text to replace characters <code>start</code> 2466 * to <code>limit - 1</code> 2467 * @stable ICU 2.0 2468 */ 2469 virtual void handleReplaceBetween(int32_t start, 2470 int32_t limit, 2471 const UnicodeString& text); 2472 2473 /** 2474 * Replaceable API 2475 * @return TRUE if it has MetaData 2476 * @stable ICU 2.4 2477 */ 2478 virtual UBool hasMetaData() const; 2479 2480 /** 2481 * Copy a substring of this object, retaining attribute (out-of-band) 2482 * information. This method is used to duplicate or reorder substrings. 2483 * The destination index must not overlap the source range. 2484 * 2485 * @param start the beginning index, inclusive; <code>0 <= start <= 2486 * limit</code>. 2487 * @param limit the ending index, exclusive; <code>start <= limit <= 2488 * length()</code>. 2489 * @param dest the destination index. The characters from 2490 * <code>start..limit-1</code> will be copied to <code>dest</code>. 2491 * Implementations of this method may assume that <code>dest <= start || 2492 * dest >= limit</code>. 2493 * @stable ICU 2.0 2494 */ 2495 virtual void copy(int32_t start, int32_t limit, int32_t dest); 2496 2497 /* Search and replace operations */ 2498 2499 /** 2500 * Replace all occurrences of characters in oldText with the characters 2501 * in newText 2502 * @param oldText the text containing the search text 2503 * @param newText the text containing the replacement text 2504 * @return a reference to this 2505 * @stable ICU 2.0 2506 */ 2507 inline UnicodeString& findAndReplace(const UnicodeString& oldText, 2508 const UnicodeString& newText); 2509 2510 /** 2511 * Replace all occurrences of characters in oldText with characters 2512 * in newText 2513 * in the range [<TT>start</TT>, <TT>start + length</TT>). 2514 * @param start the start of the range in which replace will performed 2515 * @param length the length of the range in which replace will be performed 2516 * @param oldText the text containing the search text 2517 * @param newText the text containing the replacement text 2518 * @return a reference to this 2519 * @stable ICU 2.0 2520 */ 2521 inline UnicodeString& findAndReplace(int32_t start, 2522 int32_t length, 2523 const UnicodeString& oldText, 2524 const UnicodeString& newText); 2525 2526 /** 2527 * Replace all occurrences of characters in oldText in the range 2528 * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters 2529 * in newText in the range 2530 * [<TT>newStart</TT>, <TT>newStart + newLength</TT>) 2531 * in the range [<TT>start</TT>, <TT>start + length</TT>). 2532 * @param start the start of the range in which replace will performed 2533 * @param length the length of the range in which replace will be performed 2534 * @param oldText the text containing the search text 2535 * @param oldStart the start of the search range in <TT>oldText</TT> 2536 * @param oldLength the length of the search range in <TT>oldText</TT> 2537 * @param newText the text containing the replacement text 2538 * @param newStart the start of the replacement range in <TT>newText</TT> 2539 * @param newLength the length of the replacement range in <TT>newText</TT> 2540 * @return a reference to this 2541 * @stable ICU 2.0 2542 */ 2543 UnicodeString& findAndReplace(int32_t start, 2544 int32_t length, 2545 const UnicodeString& oldText, 2546 int32_t oldStart, 2547 int32_t oldLength, 2548 const UnicodeString& newText, 2549 int32_t newStart, 2550 int32_t newLength); 2551 2552 2553 /* Remove operations */ 2554 2555 /** 2556 * Remove all characters from the UnicodeString object. 2557 * @return a reference to this 2558 * @stable ICU 2.0 2559 */ 2560 inline UnicodeString& remove(void); 2561 2562 /** 2563 * Remove the characters in the range 2564 * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object. 2565 * @param start the offset of the first character to remove 2566 * @param length the number of characters to remove 2567 * @return a reference to this 2568 * @stable ICU 2.0 2569 */ 2570 inline UnicodeString& remove(int32_t start, 2571 int32_t length = (int32_t)INT32_MAX); 2572 2573 /** 2574 * Remove the characters in the range 2575 * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object. 2576 * @param start the offset of the first character to remove 2577 * @param limit the offset immediately following the range to remove 2578 * @return a reference to this 2579 * @stable ICU 2.0 2580 */ 2581 inline UnicodeString& removeBetween(int32_t start, 2582 int32_t limit = (int32_t)INT32_MAX); 2583 2584 /** 2585 * Retain only the characters in the range 2586 * [<code>start</code>, <code>limit</code>) from the UnicodeString object. 2587 * Removes characters before <code>start</code> and at and after <code>limit</code>. 2588 * @param start the offset of the first character to retain 2589 * @param limit the offset immediately following the range to retain 2590 * @return a reference to this 2591 * @stable ICU 4.4 2592 */ 2593 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); 2594 2595 /* Length operations */ 2596 2597 /** 2598 * Pad the start of this UnicodeString with the character <TT>padChar</TT>. 2599 * If the length of this UnicodeString is less than targetLength, 2600 * length() - targetLength copies of padChar will be added to the 2601 * beginning of this UnicodeString. 2602 * @param targetLength the desired length of the string 2603 * @param padChar the character to use for padding. Defaults to 2604 * space (U+0020) 2605 * @return TRUE if the text was padded, FALSE otherwise. 2606 * @stable ICU 2.0 2607 */ 2608 UBool padLeading(int32_t targetLength, 2609 char16_t padChar = 0x0020); 2610 2611 /** 2612 * Pad the end of this UnicodeString with the character <TT>padChar</TT>. 2613 * If the length of this UnicodeString is less than targetLength, 2614 * length() - targetLength copies of padChar will be added to the 2615 * end of this UnicodeString. 2616 * @param targetLength the desired length of the string 2617 * @param padChar the character to use for padding. Defaults to 2618 * space (U+0020) 2619 * @return TRUE if the text was padded, FALSE otherwise. 2620 * @stable ICU 2.0 2621 */ 2622 UBool padTrailing(int32_t targetLength, 2623 char16_t padChar = 0x0020); 2624 2625 /** 2626 * Truncate this UnicodeString to the <TT>targetLength</TT>. 2627 * @param targetLength the desired length of this UnicodeString. 2628 * @return TRUE if the text was truncated, FALSE otherwise 2629 * @stable ICU 2.0 2630 */ 2631 inline UBool truncate(int32_t targetLength); 2632 2633 /** 2634 * Trims leading and trailing whitespace from this UnicodeString. 2635 * @return a reference to this 2636 * @stable ICU 2.0 2637 */ 2638 UnicodeString& trim(void); 2639 2640 2641 /* Miscellaneous operations */ 2642 2643 /** 2644 * Reverse this UnicodeString in place. 2645 * @return a reference to this 2646 * @stable ICU 2.0 2647 */ 2648 inline UnicodeString& reverse(void); 2649 2650 /** 2651 * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in 2652 * this UnicodeString. 2653 * @param start the start of the range to reverse 2654 * @param length the number of characters to to reverse 2655 * @return a reference to this 2656 * @stable ICU 2.0 2657 */ 2658 inline UnicodeString& reverse(int32_t start, 2659 int32_t length); 2660 2661 /** 2662 * Convert the characters in this to UPPER CASE following the conventions of 2663 * the default locale. 2664 * @return A reference to this. 2665 * @stable ICU 2.0 2666 */ 2667 UnicodeString& toUpper(void); 2668 2669 /** 2670 * Convert the characters in this to UPPER CASE following the conventions of 2671 * a specific locale. 2672 * @param locale The locale containing the conventions to use. 2673 * @return A reference to this. 2674 * @stable ICU 2.0 2675 */ 2676 UnicodeString& toUpper(const Locale& locale); 2677 2678 /** 2679 * Convert the characters in this to lower case following the conventions of 2680 * the default locale. 2681 * @return A reference to this. 2682 * @stable ICU 2.0 2683 */ 2684 UnicodeString& toLower(void); 2685 2686 /** 2687 * Convert the characters in this to lower case following the conventions of 2688 * a specific locale. 2689 * @param locale The locale containing the conventions to use. 2690 * @return A reference to this. 2691 * @stable ICU 2.0 2692 */ 2693 UnicodeString& toLower(const Locale& locale); 2694 2695 #if !UCONFIG_NO_BREAK_ITERATION 2696 2697 /** 2698 * Titlecase this string, convenience function using the default locale. 2699 * 2700 * Casing is locale-dependent and context-sensitive. 2701 * Titlecasing uses a break iterator to find the first characters of words 2702 * that are to be titlecased. It titlecases those characters and lowercases 2703 * all others. 2704 * 2705 * The titlecase break iterator can be provided to customize for arbitrary 2706 * styles, using rules and dictionaries beyond the standard iterators. 2707 * It may be more efficient to always provide an iterator to avoid 2708 * opening and closing one for each string. 2709 * The standard titlecase iterator for the root locale implements the 2710 * algorithm of Unicode TR 21. 2711 * 2712 * This function uses only the setText(), first() and next() methods of the 2713 * provided break iterator. 2714 * 2715 * @param titleIter A break iterator to find the first characters of words 2716 * that are to be titlecased. 2717 * If none is provided (0), then a standard titlecase 2718 * break iterator is opened. 2719 * Otherwise the provided iterator is set to the string's text. 2720 * @return A reference to this. 2721 * @stable ICU 2.1 2722 */ 2723 UnicodeString &toTitle(BreakIterator *titleIter); 2724 2725 /** 2726 * Titlecase this string. 2727 * 2728 * Casing is locale-dependent and context-sensitive. 2729 * Titlecasing uses a break iterator to find the first characters of words 2730 * that are to be titlecased. It titlecases those characters and lowercases 2731 * all others. 2732 * 2733 * The titlecase break iterator can be provided to customize for arbitrary 2734 * styles, using rules and dictionaries beyond the standard iterators. 2735 * It may be more efficient to always provide an iterator to avoid 2736 * opening and closing one for each string. 2737 * The standard titlecase iterator for the root locale implements the 2738 * algorithm of Unicode TR 21. 2739 * 2740 * This function uses only the setText(), first() and next() methods of the 2741 * provided break iterator. 2742 * 2743 * @param titleIter A break iterator to find the first characters of words 2744 * that are to be titlecased. 2745 * If none is provided (0), then a standard titlecase 2746 * break iterator is opened. 2747 * Otherwise the provided iterator is set to the string's text. 2748 * @param locale The locale to consider. 2749 * @return A reference to this. 2750 * @stable ICU 2.1 2751 */ 2752 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); 2753 2754 /** 2755 * Titlecase this string, with options. 2756 * 2757 * Casing is locale-dependent and context-sensitive. 2758 * Titlecasing uses a break iterator to find the first characters of words 2759 * that are to be titlecased. It titlecases those characters and lowercases 2760 * all others. (This can be modified with options.) 2761 * 2762 * The titlecase break iterator can be provided to customize for arbitrary 2763 * styles, using rules and dictionaries beyond the standard iterators. 2764 * It may be more efficient to always provide an iterator to avoid 2765 * opening and closing one for each string. 2766 * The standard titlecase iterator for the root locale implements the 2767 * algorithm of Unicode TR 21. 2768 * 2769 * This function uses only the setText(), first() and next() methods of the 2770 * provided break iterator. 2771 * 2772 * @param titleIter A break iterator to find the first characters of words 2773 * that are to be titlecased. 2774 * If none is provided (0), then a standard titlecase 2775 * break iterator is opened. 2776 * Otherwise the provided iterator is set to the string's text. 2777 * @param locale The locale to consider. 2778 * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE, 2779 * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, 2780 * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. 2781 * @param options Options bit set, see ucasemap_open(). 2782 * @return A reference to this. 2783 * @stable ICU 3.8 2784 */ 2785 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); 2786 2787 #endif 2788 2789 /** 2790 * Case-folds the characters in this string. 2791 * 2792 * Case-folding is locale-independent and not context-sensitive, 2793 * but there is an option for whether to include or exclude mappings for dotted I 2794 * and dotless i that are marked with 'T' in CaseFolding.txt. 2795 * 2796 * The result may be longer or shorter than the original. 2797 * 2798 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I 2799 * @return A reference to this. 2800 * @stable ICU 2.0 2801 */ 2802 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); 2803 2804 //======================================== 2805 // Access to the internal buffer 2806 //======================================== 2807 2808 /** 2809 * Get a read/write pointer to the internal buffer. 2810 * The buffer is guaranteed to be large enough for at least minCapacity char16_ts, 2811 * writable, and is still owned by the UnicodeString object. 2812 * Calls to getBuffer(minCapacity) must not be nested, and 2813 * must be matched with calls to releaseBuffer(newLength). 2814 * If the string buffer was read-only or shared, 2815 * then it will be reallocated and copied. 2816 * 2817 * An attempted nested call will return 0, and will not further modify the 2818 * state of the UnicodeString object. 2819 * It also returns 0 if the string is bogus. 2820 * 2821 * The actual capacity of the string buffer may be larger than minCapacity. 2822 * getCapacity() returns the actual capacity. 2823 * For many operations, the full capacity should be used to avoid reallocations. 2824 * 2825 * While the buffer is "open" between getBuffer(minCapacity) 2826 * and releaseBuffer(newLength), the following applies: 2827 * - The string length is set to 0. 2828 * - Any read API call on the UnicodeString object will behave like on a 0-length string. 2829 * - Any write API call on the UnicodeString object is disallowed and will have no effect. 2830 * - You can read from and write to the returned buffer. 2831 * - The previous string contents will still be in the buffer; 2832 * if you want to use it, then you need to call length() before getBuffer(minCapacity). 2833 * If the length() was greater than minCapacity, then any contents after minCapacity 2834 * may be lost. 2835 * The buffer contents is not NUL-terminated by getBuffer(). 2836 * If length()<getCapacity() then you can terminate it by writing a NUL 2837 * at index length(). 2838 * - You must call releaseBuffer(newLength) before and in order to 2839 * return to normal UnicodeString operation. 2840 * 2841 * @param minCapacity the minimum number of char16_ts that are to be available 2842 * in the buffer, starting at the returned pointer; 2843 * default to the current string capacity if minCapacity==-1 2844 * @return a writable pointer to the internal string buffer, 2845 * or nullptr if an error occurs (nested calls, out of memory) 2846 * 2847 * @see releaseBuffer 2848 * @see getTerminatedBuffer() 2849 * @stable ICU 2.0 2850 */ 2851 char16_t *getBuffer(int32_t minCapacity); 2852 2853 /** 2854 * Release a read/write buffer on a UnicodeString object with an 2855 * "open" getBuffer(minCapacity). 2856 * This function must be called in a matched pair with getBuffer(minCapacity). 2857 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open". 2858 * 2859 * It will set the string length to newLength, at most to the current capacity. 2860 * If newLength==-1 then it will set the length according to the 2861 * first NUL in the buffer, or to the capacity if there is no NUL. 2862 * 2863 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation. 2864 * 2865 * @param newLength the new length of the UnicodeString object; 2866 * defaults to the current capacity if newLength is greater than that; 2867 * if newLength==-1, it defaults to u_strlen(buffer) but not more than 2868 * the current capacity of the string 2869 * 2870 * @see getBuffer(int32_t minCapacity) 2871 * @stable ICU 2.0 2872 */ 2873 void releaseBuffer(int32_t newLength=-1); 2874 2875 /** 2876 * Get a read-only pointer to the internal buffer. 2877 * This can be called at any time on a valid UnicodeString. 2878 * 2879 * It returns 0 if the string is bogus, or 2880 * during an "open" getBuffer(minCapacity). 2881 * 2882 * It can be called as many times as desired. 2883 * The pointer that it returns will remain valid until the UnicodeString object is modified, 2884 * at which time the pointer is semantically invalidated and must not be used any more. 2885 * 2886 * The capacity of the buffer can be determined with getCapacity(). 2887 * The part after length() may or may not be initialized and valid, 2888 * depending on the history of the UnicodeString object. 2889 * 2890 * The buffer contents is (probably) not NUL-terminated. 2891 * You can check if it is with 2892 * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>. 2893 * (See getTerminatedBuffer().) 2894 * 2895 * The buffer may reside in read-only memory. Its contents must not 2896 * be modified. 2897 * 2898 * @return a read-only pointer to the internal string buffer, 2899 * or nullptr if the string is empty or bogus 2900 * 2901 * @see getBuffer(int32_t minCapacity) 2902 * @see getTerminatedBuffer() 2903 * @stable ICU 2.0 2904 */ 2905 inline const char16_t *getBuffer() const; 2906 2907 /** 2908 * Get a read-only pointer to the internal buffer, 2909 * making sure that it is NUL-terminated. 2910 * This can be called at any time on a valid UnicodeString. 2911 * 2912 * It returns 0 if the string is bogus, or 2913 * during an "open" getBuffer(minCapacity), or if the buffer cannot 2914 * be NUL-terminated (because memory allocation failed). 2915 * 2916 * It can be called as many times as desired. 2917 * The pointer that it returns will remain valid until the UnicodeString object is modified, 2918 * at which time the pointer is semantically invalidated and must not be used any more. 2919 * 2920 * The capacity of the buffer can be determined with getCapacity(). 2921 * The part after length()+1 may or may not be initialized and valid, 2922 * depending on the history of the UnicodeString object. 2923 * 2924 * The buffer contents is guaranteed to be NUL-terminated. 2925 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL 2926 * is written. 2927 * For this reason, this function is not const, unlike getBuffer(). 2928 * Note that a UnicodeString may also contain NUL characters as part of its contents. 2929 * 2930 * The buffer may reside in read-only memory. Its contents must not 2931 * be modified. 2932 * 2933 * @return a read-only pointer to the internal string buffer, 2934 * or 0 if the string is empty or bogus 2935 * 2936 * @see getBuffer(int32_t minCapacity) 2937 * @see getBuffer() 2938 * @stable ICU 2.2 2939 */ 2940 const char16_t *getTerminatedBuffer(); 2941 2942 //======================================== 2943 // Constructors 2944 //======================================== 2945 2946 /** Construct an empty UnicodeString. 2947 * @stable ICU 2.0 2948 */ 2949 inline UnicodeString(); 2950 2951 /** 2952 * Construct a UnicodeString with capacity to hold <TT>capacity</TT> char16_ts 2953 * @param capacity the number of char16_ts this UnicodeString should hold 2954 * before a resize is necessary; if count is greater than 0 and count 2955 * code points c take up more space than capacity, then capacity is adjusted 2956 * accordingly. 2957 * @param c is used to initially fill the string 2958 * @param count specifies how many code points c are to be written in the 2959 * string 2960 * @stable ICU 2.0 2961 */ 2962 UnicodeString(int32_t capacity, UChar32 c, int32_t count); 2963 2964 /** 2965 * Single char16_t (code unit) constructor. 2966 * 2967 * It is recommended to mark this constructor "explicit" by 2968 * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> 2969 * on the compiler command line or similar. 2970 * @param ch the character to place in the UnicodeString 2971 * @stable ICU 2.0 2972 */ 2973 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch); 2974 2975 /** 2976 * Single UChar32 (code point) constructor. 2977 * 2978 * It is recommended to mark this constructor "explicit" by 2979 * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> 2980 * on the compiler command line or similar. 2981 * @param ch the character to place in the UnicodeString 2982 * @stable ICU 2.0 2983 */ 2984 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); 2985 2986 /** 2987 * char16_t* constructor. 2988 * 2989 * It is recommended to mark this constructor "explicit" by 2990 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 2991 * on the compiler command line or similar. 2992 * @param text The characters to place in the UnicodeString. <TT>text</TT> 2993 * must be NULL (U+0000) terminated. 2994 * @stable ICU 2.0 2995 */ 2996 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text); 2997 2998 /* 2999 * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, 3000 * it should always be available regardless of U_HIDE_DRAFT_API status 3001 */ 3002 #if !U_CHAR16_IS_TYPEDEF 3003 /** 3004 * uint16_t * constructor. 3005 * Delegates to UnicodeString(const char16_t *). 3006 * 3007 * It is recommended to mark this constructor "explicit" by 3008 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 3009 * on the compiler command line or similar. 3010 * @param text NUL-terminated UTF-16 string 3011 * @draft ICU 59 3012 */ 3013 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : 3014 UnicodeString(ConstChar16Ptr(text)) {} 3015 #endif 3016 3017 /* 3018 * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, 3019 * it should always be available regardless of U_HIDE_DRAFT_API status 3020 */ 3021 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) 3022 /** 3023 * wchar_t * constructor. 3024 * (Only defined if U_SIZEOF_WCHAR_T==2.) 3025 * Delegates to UnicodeString(const char16_t *). 3026 * 3027 * It is recommended to mark this constructor "explicit" by 3028 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 3029 * on the compiler command line or similar. 3030 * @param text NUL-terminated UTF-16 string 3031 * @draft ICU 59 3032 */ 3033 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : 3034 UnicodeString(ConstChar16Ptr(text)) {} 3035 #endif 3036 3037 /* 3038 * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, 3039 * it should always be available regardless of U_HIDE_DRAFT_API status 3040 */ 3041 /** 3042 * nullptr_t constructor. 3043 * Effectively the same as the default constructor, makes an empty string object. 3044 * 3045 * It is recommended to mark this constructor "explicit" by 3046 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 3047 * on the compiler command line or similar. 3048 * @param text nullptr 3049 * @draft ICU 59 3050 */ 3051 UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); 3052 3053 /** 3054 * char16_t* constructor. 3055 * @param text The characters to place in the UnicodeString. 3056 * @param textLength The number of Unicode characters in <TT>text</TT> 3057 * to copy. 3058 * @stable ICU 2.0 3059 */ 3060 UnicodeString(const char16_t *text, 3061 int32_t textLength); 3062 3063 /* 3064 * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, 3065 * it should always be available regardless of U_HIDE_DRAFT_API status 3066 */ 3067 #if !U_CHAR16_IS_TYPEDEF 3068 /** 3069 * uint16_t * constructor. 3070 * Delegates to UnicodeString(const char16_t *, int32_t). 3071 * @param text UTF-16 string 3072 * @param length string length 3073 * @draft ICU 59 3074 */ 3075 UnicodeString(const uint16_t *text, int32_t length) : 3076 UnicodeString(ConstChar16Ptr(text), length) {} 3077 #endif 3078 3079 /* 3080 * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, 3081 * it should always be available regardless of U_HIDE_DRAFT_API status 3082 */ 3083 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) 3084 /** 3085 * wchar_t * constructor. 3086 * (Only defined if U_SIZEOF_WCHAR_T==2.) 3087 * Delegates to UnicodeString(const char16_t *, int32_t). 3088 * @param text NUL-terminated UTF-16 string 3089 * @param length string length 3090 * @draft ICU 59 3091 */ 3092 UnicodeString(const wchar_t *text, int32_t length) : 3093 UnicodeString(ConstChar16Ptr(text), length) {} 3094 #endif 3095 3096 /* 3097 * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, 3098 * it should always be available regardless of U_HIDE_DRAFT_API status 3099 */ 3100 /** 3101 * nullptr_t constructor. 3102 * Effectively the same as the default constructor, makes an empty string object. 3103 * @param text nullptr 3104 * @param length ignored 3105 * @draft ICU 59 3106 */ 3107 inline UnicodeString(const std::nullptr_t text, int32_t length); 3108 3109 /** 3110 * Readonly-aliasing char16_t* constructor. 3111 * The text will be used for the UnicodeString object, but 3112 * it will not be released when the UnicodeString is destroyed. 3113 * This has copy-on-write semantics: 3114 * When the string is modified, then the buffer is first copied into 3115 * newly allocated memory. 3116 * The aliased buffer is never modified. 3117 * 3118 * In an assignment to another UnicodeString, when using the copy constructor 3119 * or the assignment operator, the text will be copied. 3120 * When using fastCopyFrom(), the text will be aliased again, 3121 * so that both strings then alias the same readonly-text. 3122 * 3123 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. 3124 * This must be true if <code>textLength==-1</code>. 3125 * @param text The characters to alias for the UnicodeString. 3126 * @param textLength The number of Unicode characters in <code>text</code> to alias. 3127 * If -1, then this constructor will determine the length 3128 * by calling <code>u_strlen()</code>. 3129 * @stable ICU 2.0 3130 */ 3131 UnicodeString(UBool isTerminated, 3132 ConstChar16Ptr text, 3133 int32_t textLength); 3134 3135 /** 3136 * Writable-aliasing char16_t* constructor. 3137 * The text will be used for the UnicodeString object, but 3138 * it will not be released when the UnicodeString is destroyed. 3139 * This has write-through semantics: 3140 * For as long as the capacity of the buffer is sufficient, write operations 3141 * will directly affect the buffer. When more capacity is necessary, then 3142 * a new buffer will be allocated and the contents copied as with regularly 3143 * constructed strings. 3144 * In an assignment to another UnicodeString, the buffer will be copied. 3145 * The extract(Char16Ptr dst) function detects whether the dst pointer is the same 3146 * as the string buffer itself and will in this case not copy the contents. 3147 * 3148 * @param buffer The characters to alias for the UnicodeString. 3149 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. 3150 * @param buffCapacity The size of <code>buffer</code> in char16_ts. 3151 * @stable ICU 2.0 3152 */ 3153 UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); 3154 3155 /* 3156 * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, 3157 * it should always be available regardless of U_HIDE_DRAFT_API status 3158 */ 3159 #if !U_CHAR16_IS_TYPEDEF 3160 /** 3161 * Writable-aliasing uint16_t * constructor. 3162 * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). 3163 * @param buffer writable buffer of/for UTF-16 text 3164 * @param buffLength length of the current buffer contents 3165 * @param buffCapacity buffer capacity 3166 * @draft ICU 59 3167 */ 3168 UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : 3169 UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} 3170 #endif 3171 3172 /* 3173 * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, 3174 * it should always be available regardless of U_HIDE_DRAFT_API status 3175 */ 3176 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) 3177 /** 3178 * Writable-aliasing wchar_t * constructor. 3179 * (Only defined if U_SIZEOF_WCHAR_T==2.) 3180 * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). 3181 * @param buffer writable buffer of/for UTF-16 text 3182 * @param buffLength length of the current buffer contents 3183 * @param buffCapacity buffer capacity 3184 * @draft ICU 59 3185 */ 3186 UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : 3187 UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} 3188 #endif 3189 3190 /* 3191 * Do not use #ifndef U_HIDE_DRAFT_API for the following constructor, 3192 * it should always be available regardless of U_HIDE_DRAFT_API status 3193 */ 3194 /** 3195 * Writable-aliasing nullptr_t constructor. 3196 * Effectively the same as the default constructor, makes an empty string object. 3197 * @param buffer nullptr 3198 * @param buffLength ignored 3199 * @param buffCapacity ignored 3200 * @draft ICU 59 3201 */ 3202 inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity); 3203 3204 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 3205 3206 /** 3207 * char* constructor. 3208 * Uses the default converter (and thus depends on the ICU conversion code) 3209 * unless U_CHARSET_IS_UTF8 is set to 1. 3210 * 3211 * For ASCII (really "invariant character") strings it is more efficient to use 3212 * the constructor that takes a US_INV (for its enum EInvariant). 3213 * For ASCII (invariant-character) string literals, see UNICODE_STRING and 3214 * UNICODE_STRING_SIMPLE. 3215 * 3216 * It is recommended to mark this constructor "explicit" by 3217 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 3218 * on the compiler command line or similar. 3219 * @param codepageData an array of bytes, null-terminated, 3220 * in the platform's default codepage. 3221 * @stable ICU 2.0 3222 * @see UNICODE_STRING 3223 * @see UNICODE_STRING_SIMPLE 3224 */ 3225 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); 3226 3227 /** 3228 * char* constructor. 3229 * Uses the default converter (and thus depends on the ICU conversion code) 3230 * unless U_CHARSET_IS_UTF8 is set to 1. 3231 * @param codepageData an array of bytes in the platform's default codepage. 3232 * @param dataLength The number of bytes in <TT>codepageData</TT>. 3233 * @stable ICU 2.0 3234 */ 3235 UnicodeString(const char *codepageData, int32_t dataLength); 3236 3237 #endif 3238 3239 #if !UCONFIG_NO_CONVERSION 3240 3241 /** 3242 * char* constructor. 3243 * @param codepageData an array of bytes, null-terminated 3244 * @param codepage the encoding of <TT>codepageData</TT>. The special 3245 * value 0 for <TT>codepage</TT> indicates that the text is in the 3246 * platform's default codepage. 3247 * 3248 * If <code>codepage</code> is an empty string (<code>""</code>), 3249 * then a simple conversion is performed on the codepage-invariant 3250 * subset ("invariant characters") of the platform encoding. See utypes.h. 3251 * Recommendation: For invariant-character strings use the constructor 3252 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 3253 * because it avoids object code dependencies of UnicodeString on 3254 * the conversion code. 3255 * 3256 * @stable ICU 2.0 3257 */ 3258 UnicodeString(const char *codepageData, const char *codepage); 3259 3260 /** 3261 * char* constructor. 3262 * @param codepageData an array of bytes. 3263 * @param dataLength The number of bytes in <TT>codepageData</TT>. 3264 * @param codepage the encoding of <TT>codepageData</TT>. The special 3265 * value 0 for <TT>codepage</TT> indicates that the text is in the 3266 * platform's default codepage. 3267 * If <code>codepage</code> is an empty string (<code>""</code>), 3268 * then a simple conversion is performed on the codepage-invariant 3269 * subset ("invariant characters") of the platform encoding. See utypes.h. 3270 * Recommendation: For invariant-character strings use the constructor 3271 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 3272 * because it avoids object code dependencies of UnicodeString on 3273 * the conversion code. 3274 * 3275 * @stable ICU 2.0 3276 */ 3277 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); 3278 3279 /** 3280 * char * / UConverter constructor. 3281 * This constructor uses an existing UConverter object to 3282 * convert the codepage string to Unicode and construct a UnicodeString 3283 * from that. 3284 * 3285 * The converter is reset at first. 3286 * If the error code indicates a failure before this constructor is called, 3287 * or if an error occurs during conversion or construction, 3288 * then the string will be bogus. 3289 * 3290 * This function avoids the overhead of opening and closing a converter if 3291 * multiple strings are constructed. 3292 * 3293 * @param src input codepage string 3294 * @param srcLength length of the input string, can be -1 for NUL-terminated strings 3295 * @param cnv converter object (ucnv_resetToUnicode() will be called), 3296 * can be NULL for the default converter 3297 * @param errorCode normal ICU error code 3298 * @stable ICU 2.0 3299 */ 3300 UnicodeString( 3301 const char *src, int32_t srcLength, 3302 UConverter *cnv, 3303 UErrorCode &errorCode); 3304 3305 #endif 3306 3307 /** 3308 * Constructs a Unicode string from an invariant-character char * string. 3309 * About invariant characters see utypes.h. 3310 * This constructor has no runtime dependency on conversion code and is 3311 * therefore recommended over ones taking a charset name string 3312 * (where the empty string "" indicates invariant-character conversion). 3313 * 3314 * Use the macro US_INV as the third, signature-distinguishing parameter. 3315 * 3316 * For example: 3317 * \code 3318 * void fn(const char *s) { 3319 * UnicodeString ustr(s, -1, US_INV); 3320 * // use ustr ... 3321 * } 3322 * \endcode 3323 * 3324 * @param src String using only invariant characters. 3325 * @param length Length of src, or -1 if NUL-terminated. 3326 * @param inv Signature-distinguishing paramater, use US_INV. 3327 * 3328 * @see US_INV 3329 * @stable ICU 3.2 3330 */ 3331 UnicodeString(const char *src, int32_t length, enum EInvariant inv); 3332 3333 3334 /** 3335 * Copy constructor. 3336 * 3337 * Starting with ICU 2.4, the assignment operator and the copy constructor 3338 * allocate a new buffer and copy the buffer contents even for readonly aliases. 3339 * By contrast, the fastCopyFrom() function implements the old, 3340 * more efficient but less safe behavior 3341 * of making this string also a readonly alias to the same buffer. 3342 * 3343 * If the source object has an "open" buffer from getBuffer(minCapacity), 3344 * then the copy is an empty string. 3345 * 3346 * @param that The UnicodeString object to copy. 3347 * @stable ICU 2.0 3348 * @see fastCopyFrom 3349 */ 3350 UnicodeString(const UnicodeString& that); 3351 3352 /** 3353 * Move constructor, might leave src in bogus state. 3354 * This string will have the same contents and state that the source string had. 3355 * @param src source string 3356 * @stable ICU 56 3357 */ 3358 UnicodeString(UnicodeString &&src) U_NOEXCEPT; 3359 3360 /** 3361 * 'Substring' constructor from tail of source string. 3362 * @param src The UnicodeString object to copy. 3363 * @param srcStart The offset into <tt>src</tt> at which to start copying. 3364 * @stable ICU 2.2 3365 */ 3366 UnicodeString(const UnicodeString& src, int32_t srcStart); 3367 3368 /** 3369 * 'Substring' constructor from subrange of source string. 3370 * @param src The UnicodeString object to copy. 3371 * @param srcStart The offset into <tt>src</tt> at which to start copying. 3372 * @param srcLength The number of characters from <tt>src</tt> to copy. 3373 * @stable ICU 2.2 3374 */ 3375 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 3376 3377 /** 3378 * Clone this object, an instance of a subclass of Replaceable. 3379 * Clones can be used concurrently in multiple threads. 3380 * If a subclass does not implement clone(), or if an error occurs, 3381 * then NULL is returned. 3382 * The clone functions in all subclasses return a pointer to a Replaceable 3383 * because some compilers do not support covariant (same-as-this) 3384 * return types; cast to the appropriate subclass if necessary. 3385 * The caller must delete the clone. 3386 * 3387 * @return a clone of this object 3388 * 3389 * @see Replaceable::clone 3390 * @see getDynamicClassID 3391 * @stable ICU 2.6 3392 */ 3393 virtual Replaceable *clone() const; 3394 3395 /** Destructor. 3396 * @stable ICU 2.0 3397 */ 3398 virtual ~UnicodeString(); 3399 3400 /** 3401 * Create a UnicodeString from a UTF-8 string. 3402 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 3403 * Calls u_strFromUTF8WithSub(). 3404 * 3405 * @param utf8 UTF-8 input string. 3406 * Note that a StringPiece can be implicitly constructed 3407 * from a std::string or a NUL-terminated const char * string. 3408 * @return A UnicodeString with equivalent UTF-16 contents. 3409 * @see toUTF8 3410 * @see toUTF8String 3411 * @stable ICU 4.2 3412 */ 3413 static UnicodeString fromUTF8(StringPiece utf8); 3414 3415 /** 3416 * Create a UnicodeString from a UTF-32 string. 3417 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 3418 * Calls u_strFromUTF32WithSub(). 3419 * 3420 * @param utf32 UTF-32 input string. Must not be NULL. 3421 * @param length Length of the input string, or -1 if NUL-terminated. 3422 * @return A UnicodeString with equivalent UTF-16 contents. 3423 * @see toUTF32 3424 * @stable ICU 4.2 3425 */ 3426 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); 3427 3428 /* Miscellaneous operations */ 3429 3430 /** 3431 * Unescape a string of characters and return a string containing 3432 * the result. The following escape sequences are recognized: 3433 * 3434 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] 3435 * \\Uhhhhhhhh 8 hex digits 3436 * \\xhh 1-2 hex digits 3437 * \\ooo 1-3 octal digits; o in [0-7] 3438 * \\cX control-X; X is masked with 0x1F 3439 * 3440 * as well as the standard ANSI C escapes: 3441 * 3442 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, 3443 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, 3444 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C 3445 * 3446 * Anything else following a backslash is generically escaped. For 3447 * example, "[a\\-z]" returns "[a-z]". 3448 * 3449 * If an escape sequence is ill-formed, this method returns an empty 3450 * string. An example of an ill-formed sequence is "\\u" followed by 3451 * fewer than 4 hex digits. 3452 * 3453 * This function is similar to u_unescape() but not identical to it. 3454 * The latter takes a source char*, so it does escape recognition 3455 * and also invariant conversion. 3456 * 3457 * @return a string with backslash escapes interpreted, or an 3458 * empty string on error. 3459 * @see UnicodeString#unescapeAt() 3460 * @see u_unescape() 3461 * @see u_unescapeAt() 3462 * @stable ICU 2.0 3463 */ 3464 UnicodeString unescape() const; 3465 3466 /** 3467 * Unescape a single escape sequence and return the represented 3468 * character. See unescape() for a listing of the recognized escape 3469 * sequences. The character at offset-1 is assumed (without 3470 * checking) to be a backslash. If the escape sequence is 3471 * ill-formed, or the offset is out of range, U_SENTINEL=-1 is 3472 * returned. 3473 * 3474 * @param offset an input output parameter. On input, it is the 3475 * offset into this string where the escape sequence is located, 3476 * after the initial backslash. On output, it is advanced after the 3477 * last character parsed. On error, it is not advanced at all. 3478 * @return the character represented by the escape sequence at 3479 * offset, or U_SENTINEL=-1 on error. 3480 * @see UnicodeString#unescape() 3481 * @see u_unescape() 3482 * @see u_unescapeAt() 3483 * @stable ICU 2.0 3484 */ 3485 UChar32 unescapeAt(int32_t &offset) const; 3486 3487 /** 3488 * ICU "poor man's RTTI", returns a UClassID for this class. 3489 * 3490 * @stable ICU 2.2 3491 */ 3492 static UClassID U_EXPORT2 getStaticClassID(); 3493 3494 /** 3495 * ICU "poor man's RTTI", returns a UClassID for the actual class. 3496 * 3497 * @stable ICU 2.2 3498 */ 3499 virtual UClassID getDynamicClassID() const; 3500 3501 //======================================== 3502 // Implementation methods 3503 //======================================== 3504 3505 protected: 3506 /** 3507 * Implement Replaceable::getLength() (see jitterbug 1027). 3508 * @stable ICU 2.4 3509 */ 3510 virtual int32_t getLength() const; 3511 3512 /** 3513 * The change in Replaceable to use virtual getCharAt() allows 3514 * UnicodeString::charAt() to be inline again (see jitterbug 709). 3515 * @stable ICU 2.4 3516 */ 3517 virtual char16_t getCharAt(int32_t offset) const; 3518 3519 /** 3520 * The change in Replaceable to use virtual getChar32At() allows 3521 * UnicodeString::char32At() to be inline again (see jitterbug 709). 3522 * @stable ICU 2.4 3523 */ 3524 virtual UChar32 getChar32At(int32_t offset) const; 3525 3526 private: 3527 // For char* constructors. Could be made public. 3528 UnicodeString &setToUTF8(StringPiece utf8); 3529 // For extract(char*). 3530 // We could make a toUTF8(target, capacity, errorCode) public but not 3531 // this version: New API will be cleaner if we make callers create substrings 3532 // rather than having start+length on every method, 3533 // and it should take a UErrorCode&. 3534 int32_t 3535 toUTF8(int32_t start, int32_t len, 3536 char *target, int32_t capacity) const; 3537 3538 /** 3539 * Internal string contents comparison, called by operator==. 3540 * Requires: this & text not bogus and have same lengths. 3541 */ 3542 UBool doEquals(const UnicodeString &text, int32_t len) const; 3543 3544 inline int8_t 3545 doCompare(int32_t start, 3546 int32_t length, 3547 const UnicodeString& srcText, 3548 int32_t srcStart, 3549 int32_t srcLength) const; 3550 3551 int8_t doCompare(int32_t start, 3552 int32_t length, 3553 const char16_t *srcChars, 3554 int32_t srcStart, 3555 int32_t srcLength) const; 3556 3557 inline int8_t 3558 doCompareCodePointOrder(int32_t start, 3559 int32_t length, 3560 const UnicodeString& srcText, 3561 int32_t srcStart, 3562 int32_t srcLength) const; 3563 3564 int8_t doCompareCodePointOrder(int32_t start, 3565 int32_t length, 3566 const char16_t *srcChars, 3567 int32_t srcStart, 3568 int32_t srcLength) const; 3569 3570 inline int8_t 3571 doCaseCompare(int32_t start, 3572 int32_t length, 3573 const UnicodeString &srcText, 3574 int32_t srcStart, 3575 int32_t srcLength, 3576 uint32_t options) const; 3577 3578 int8_t 3579 doCaseCompare(int32_t start, 3580 int32_t length, 3581 const char16_t *srcChars, 3582 int32_t srcStart, 3583 int32_t srcLength, 3584 uint32_t options) const; 3585 3586 int32_t doIndexOf(char16_t c, 3587 int32_t start, 3588 int32_t length) const; 3589 3590 int32_t doIndexOf(UChar32 c, 3591 int32_t start, 3592 int32_t length) const; 3593 3594 int32_t doLastIndexOf(char16_t c, 3595 int32_t start, 3596 int32_t length) const; 3597 3598 int32_t doLastIndexOf(UChar32 c, 3599 int32_t start, 3600 int32_t length) const; 3601 3602 void doExtract(int32_t start, 3603 int32_t length, 3604 char16_t *dst, 3605 int32_t dstStart) const; 3606 3607 inline void doExtract(int32_t start, 3608 int32_t length, 3609 UnicodeString& target) const; 3610 3611 inline char16_t doCharAt(int32_t offset) const; 3612 3613 UnicodeString& doReplace(int32_t start, 3614 int32_t length, 3615 const UnicodeString& srcText, 3616 int32_t srcStart, 3617 int32_t srcLength); 3618 3619 UnicodeString& doReplace(int32_t start, 3620 int32_t length, 3621 const char16_t *srcChars, 3622 int32_t srcStart, 3623 int32_t srcLength); 3624 3625 UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 3626 UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); 3627 3628 UnicodeString& doReverse(int32_t start, 3629 int32_t length); 3630 3631 // calculate hash code 3632 int32_t doHashCode(void) const; 3633 3634 // get pointer to start of array 3635 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 3636 inline char16_t* getArrayStart(void); 3637 inline const char16_t* getArrayStart(void) const; 3638 3639 inline UBool hasShortLength() const; 3640 inline int32_t getShortLength() const; 3641 3642 // A UnicodeString object (not necessarily its current buffer) 3643 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 3644 inline UBool isWritable() const; 3645 3646 // Is the current buffer writable? 3647 inline UBool isBufferWritable() const; 3648 3649 // None of the following does releaseArray(). 3650 inline void setZeroLength(); 3651 inline void setShortLength(int32_t len); 3652 inline void setLength(int32_t len); 3653 inline void setToEmpty(); 3654 inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags 3655 3656 // allocate the array; result may be the stack buffer 3657 // sets refCount to 1 if appropriate 3658 // sets fArray, fCapacity, and flags 3659 // sets length to 0 3660 // returns boolean for success or failure 3661 UBool allocate(int32_t capacity); 3662 3663 // release the array if owned 3664 void releaseArray(void); 3665 3666 // turn a bogus string into an empty one 3667 void unBogus(); 3668 3669 // implements assigment operator, copy constructor, and fastCopyFrom() 3670 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); 3671 3672 // Copies just the fields without memory management. 3673 void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT; 3674 3675 // Pin start and limit to acceptable values. 3676 inline void pinIndex(int32_t& start) const; 3677 inline void pinIndices(int32_t& start, 3678 int32_t& length) const; 3679 3680 #if !UCONFIG_NO_CONVERSION 3681 3682 /* Internal extract() using UConverter. */ 3683 int32_t doExtract(int32_t start, int32_t length, 3684 char *dest, int32_t destCapacity, 3685 UConverter *cnv, 3686 UErrorCode &errorCode) const; 3687 3688 /* 3689 * Real constructor for converting from codepage data. 3690 * It assumes that it is called with !fRefCounted. 3691 * 3692 * If <code>codepage==0</code>, then the default converter 3693 * is used for the platform encoding. 3694 * If <code>codepage</code> is an empty string (<code>""</code>), 3695 * then a simple conversion is performed on the codepage-invariant 3696 * subset ("invariant characters") of the platform encoding. See utypes.h. 3697 */ 3698 void doCodepageCreate(const char *codepageData, 3699 int32_t dataLength, 3700 const char *codepage); 3701 3702 /* 3703 * Worker function for creating a UnicodeString from 3704 * a codepage string using a UConverter. 3705 */ 3706 void 3707 doCodepageCreate(const char *codepageData, 3708 int32_t dataLength, 3709 UConverter *converter, 3710 UErrorCode &status); 3711 3712 #endif 3713 3714 /* 3715 * This function is called when write access to the array 3716 * is necessary. 3717 * 3718 * We need to make a copy of the array if 3719 * the buffer is read-only, or 3720 * the buffer is refCounted (shared), and refCount>1, or 3721 * the buffer is too small. 3722 * 3723 * Return FALSE if memory could not be allocated. 3724 */ 3725 UBool cloneArrayIfNeeded(int32_t newCapacity = -1, 3726 int32_t growCapacity = -1, 3727 UBool doCopyArray = TRUE, 3728 int32_t **pBufferToDelete = 0, 3729 UBool forceClone = FALSE); 3730 3731 /** 3732 * Common function for UnicodeString case mappings. 3733 * The stringCaseMapper has the same type UStringCaseMapper 3734 * as in ustr_imp.h for ustrcase_map(). 3735 */ 3736 UnicodeString & 3737 caseMap(int32_t caseLocale, uint32_t options, 3738 #if !UCONFIG_NO_BREAK_ITERATION 3739 BreakIterator *iter, 3740 #endif 3741 UStringCaseMapper *stringCaseMapper); 3742 3743 // ref counting 3744 void addRef(void); 3745 int32_t removeRef(void); 3746 int32_t refCount(void) const; 3747 3748 // constants 3749 enum { 3750 /** 3751 * Size of stack buffer for short strings. 3752 * Must be at least U16_MAX_LENGTH for the single-code point constructor to work. 3753 * @see UNISTR_OBJECT_SIZE 3754 */ 3755 US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR, 3756 kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index) 3757 kInvalidHashCode=0, // invalid hash code 3758 kEmptyHashCode=1, // hash code for empty string 3759 3760 // bit flag values for fLengthAndFlags 3761 kIsBogus=1, // this string is bogus, i.e., not valid or NULL 3762 kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields 3763 kRefCounted=4, // there is a refCount field before the characters in fArray 3764 kBufferIsReadonly=8,// do not write to this buffer 3765 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), 3766 // and releaseBuffer(newLength) must be called 3767 kAllStorageFlags=0x1f, 3768 3769 kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long 3770 kLength1=1<<kLengthShift, 3771 kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0) 3772 kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength 3773 3774 // combined values for convenience 3775 kShortString=kUsingStackBuffer, 3776 kLongString=kRefCounted, 3777 kReadonlyAlias=kBufferIsReadonly, 3778 kWritableAlias=0 3779 }; 3780 3781 friend class UnicodeStringAppendable; 3782 3783 union StackBufferOrFields; // forward declaration necessary before friend declaration 3784 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion 3785 3786 /* 3787 * The following are all the class fields that are stored 3788 * in each UnicodeString object. 3789 * Note that UnicodeString has virtual functions, 3790 * therefore there is an implicit vtable pointer 3791 * as the first real field. 3792 * The fields should be aligned such that no padding is necessary. 3793 * On 32-bit machines, the size should be 32 bytes, 3794 * on 64-bit machines (8-byte pointers), it should be 40 bytes. 3795 * 3796 * We use a hack to achieve this. 3797 * 3798 * With at least some compilers, each of the following is forced to 3799 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], 3800 * rounded up with additional padding if the fields do not already fit that requirement: 3801 * - sizeof(class UnicodeString) 3802 * - offsetof(UnicodeString, fUnion) 3803 * - sizeof(fUnion) 3804 * - sizeof(fStackFields) 3805 * 3806 * We optimize for the longest possible internal buffer for short strings. 3807 * fUnion.fStackFields begins with 2 bytes for storage flags 3808 * and the length of relatively short strings, 3809 * followed by the buffer for short string contents. 3810 * There is no padding inside fStackFields. 3811 * 3812 * Heap-allocated and aliased strings use fUnion.fFields. 3813 * Both fStackFields and fFields must begin with the same fields for flags and short length, 3814 * that is, those must have the same memory offsets inside the object, 3815 * because the flags must be inspected in order to decide which half of fUnion is being used. 3816 * We assume that the compiler does not reorder the fields. 3817 * 3818 * (Padding at the end of fFields is ok: 3819 * As long as it is no larger than fStackFields, it is not wasted space.) 3820 * 3821 * For some of the history of the UnicodeString class fields layout, see 3822 * - ICU ticket #11551 "longer UnicodeString contents in stack buffer" 3823 * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays" 3824 * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?" 3825 */ 3826 // (implicit) *vtable; 3827 union StackBufferOrFields { 3828 // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used. 3829 // Each struct of the union must begin with fLengthAndFlags. 3830 struct { 3831 int16_t fLengthAndFlags; // bit fields: see constants above 3832 char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings 3833 } fStackFields; 3834 struct { 3835 int16_t fLengthAndFlags; // bit fields: see constants above 3836 int32_t fLength; // number of characters in fArray if >127; else undefined 3837 int32_t fCapacity; // capacity of fArray (in char16_ts) 3838 // array pointer last to minimize padding for machines with P128 data model 3839 // or pointer sizes that are not a power of 2 3840 char16_t *fArray; // the Unicode data 3841 } fFields; 3842 } fUnion; 3843 }; 3844 3845 /** 3846 * Create a new UnicodeString with the concatenation of two others. 3847 * 3848 * @param s1 The first string to be copied to the new one. 3849 * @param s2 The second string to be copied to the new one, after s1. 3850 * @return UnicodeString(s1).append(s2) 3851 * @stable ICU 2.8 3852 */ 3853 U_COMMON_API UnicodeString U_EXPORT2 3854 operator+ (const UnicodeString &s1, const UnicodeString &s2); 3855 3856 //======================================== 3857 // Inline members 3858 //======================================== 3859 3860 //======================================== 3861 // Privates 3862 //======================================== 3863 3864 inline void 3865 UnicodeString::pinIndex(int32_t& start) const 3866 { 3867 // pin index 3868 if(start < 0) { 3869 start = 0; 3870 } else if(start > length()) { 3871 start = length(); 3872 } 3873 } 3874 3875 inline void 3876 UnicodeString::pinIndices(int32_t& start, 3877 int32_t& _length) const 3878 { 3879 // pin indices 3880 int32_t len = length(); 3881 if(start < 0) { 3882 start = 0; 3883 } else if(start > len) { 3884 start = len; 3885 } 3886 if(_length < 0) { 3887 _length = 0; 3888 } else if(_length > (len - start)) { 3889 _length = (len - start); 3890 } 3891 } 3892 3893 inline char16_t* 3894 UnicodeString::getArrayStart() { 3895 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 3896 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; 3897 } 3898 3899 inline const char16_t* 3900 UnicodeString::getArrayStart() const { 3901 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 3902 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; 3903 } 3904 3905 //======================================== 3906 // Default constructor 3907 //======================================== 3908 3909 inline 3910 UnicodeString::UnicodeString() { 3911 fUnion.fStackFields.fLengthAndFlags=kShortString; 3912 } 3913 3914 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) { 3915 fUnion.fStackFields.fLengthAndFlags=kShortString; 3916 } 3917 3918 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) { 3919 fUnion.fStackFields.fLengthAndFlags=kShortString; 3920 } 3921 3922 inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) { 3923 fUnion.fStackFields.fLengthAndFlags=kShortString; 3924 } 3925 3926 //======================================== 3927 // Read-only implementation methods 3928 //======================================== 3929 inline UBool 3930 UnicodeString::hasShortLength() const { 3931 return fUnion.fFields.fLengthAndFlags>=0; 3932 } 3933 3934 inline int32_t 3935 UnicodeString::getShortLength() const { 3936 // fLengthAndFlags must be non-negative -> short length >= 0 3937 // and arithmetic or logical shift does not matter. 3938 return fUnion.fFields.fLengthAndFlags>>kLengthShift; 3939 } 3940 3941 inline int32_t 3942 UnicodeString::length() const { 3943 return hasShortLength() ? getShortLength() : fUnion.fFields.fLength; 3944 } 3945 3946 inline int32_t 3947 UnicodeString::getCapacity() const { 3948 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 3949 US_STACKBUF_SIZE : fUnion.fFields.fCapacity; 3950 } 3951 3952 inline int32_t 3953 UnicodeString::hashCode() const 3954 { return doHashCode(); } 3955 3956 inline UBool 3957 UnicodeString::isBogus() const 3958 { return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); } 3959 3960 inline UBool 3961 UnicodeString::isWritable() const 3962 { return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); } 3963 3964 inline UBool 3965 UnicodeString::isBufferWritable() const 3966 { 3967 return (UBool)( 3968 !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && 3969 (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1)); 3970 } 3971 3972 inline const char16_t * 3973 UnicodeString::getBuffer() const { 3974 if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) { 3975 return nullptr; 3976 } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { 3977 return fUnion.fStackFields.fBuffer; 3978 } else { 3979 return fUnion.fFields.fArray; 3980 } 3981 } 3982 3983 //======================================== 3984 // Read-only alias methods 3985 //======================================== 3986 inline int8_t 3987 UnicodeString::doCompare(int32_t start, 3988 int32_t thisLength, 3989 const UnicodeString& srcText, 3990 int32_t srcStart, 3991 int32_t srcLength) const 3992 { 3993 if(srcText.isBogus()) { 3994 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 3995 } else { 3996 srcText.pinIndices(srcStart, srcLength); 3997 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 3998 } 3999 } 4000 4001 inline UBool 4002 UnicodeString::operator== (const UnicodeString& text) const 4003 { 4004 if(isBogus()) { 4005 return text.isBogus(); 4006 } else { 4007 int32_t len = length(), textLength = text.length(); 4008 return !text.isBogus() && len == textLength && doEquals(text, len); 4009 } 4010 } 4011 4012 inline UBool 4013 UnicodeString::operator!= (const UnicodeString& text) const 4014 { return (! operator==(text)); } 4015 4016 inline UBool 4017 UnicodeString::operator> (const UnicodeString& text) const 4018 { return doCompare(0, length(), text, 0, text.length()) == 1; } 4019 4020 inline UBool 4021 UnicodeString::operator< (const UnicodeString& text) const 4022 { return doCompare(0, length(), text, 0, text.length()) == -1; } 4023 4024 inline UBool 4025 UnicodeString::operator>= (const UnicodeString& text) const 4026 { return doCompare(0, length(), text, 0, text.length()) != -1; } 4027 4028 inline UBool 4029 UnicodeString::operator<= (const UnicodeString& text) const 4030 { return doCompare(0, length(), text, 0, text.length()) != 1; } 4031 4032 inline int8_t 4033 UnicodeString::compare(const UnicodeString& text) const 4034 { return doCompare(0, length(), text, 0, text.length()); } 4035 4036 inline int8_t 4037 UnicodeString::compare(int32_t start, 4038 int32_t _length, 4039 const UnicodeString& srcText) const 4040 { return doCompare(start, _length, srcText, 0, srcText.length()); } 4041 4042 inline int8_t 4043 UnicodeString::compare(ConstChar16Ptr srcChars, 4044 int32_t srcLength) const 4045 { return doCompare(0, length(), srcChars, 0, srcLength); } 4046 4047 inline int8_t 4048 UnicodeString::compare(int32_t start, 4049 int32_t _length, 4050 const UnicodeString& srcText, 4051 int32_t srcStart, 4052 int32_t srcLength) const 4053 { return doCompare(start, _length, srcText, srcStart, srcLength); } 4054 4055 inline int8_t 4056 UnicodeString::compare(int32_t start, 4057 int32_t _length, 4058 const char16_t *srcChars) const 4059 { return doCompare(start, _length, srcChars, 0, _length); } 4060 4061 inline int8_t 4062 UnicodeString::compare(int32_t start, 4063 int32_t _length, 4064 const char16_t *srcChars, 4065 int32_t srcStart, 4066 int32_t srcLength) const 4067 { return doCompare(start, _length, srcChars, srcStart, srcLength); } 4068 4069 inline int8_t 4070 UnicodeString::compareBetween(int32_t start, 4071 int32_t limit, 4072 const UnicodeString& srcText, 4073 int32_t srcStart, 4074 int32_t srcLimit) const 4075 { return doCompare(start, limit - start, 4076 srcText, srcStart, srcLimit - srcStart); } 4077 4078 inline int8_t 4079 UnicodeString::doCompareCodePointOrder(int32_t start, 4080 int32_t thisLength, 4081 const UnicodeString& srcText, 4082 int32_t srcStart, 4083 int32_t srcLength) const 4084 { 4085 if(srcText.isBogus()) { 4086 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 4087 } else { 4088 srcText.pinIndices(srcStart, srcLength); 4089 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 4090 } 4091 } 4092 4093 inline int8_t 4094 UnicodeString::compareCodePointOrder(const UnicodeString& text) const 4095 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); } 4096 4097 inline int8_t 4098 UnicodeString::compareCodePointOrder(int32_t start, 4099 int32_t _length, 4100 const UnicodeString& srcText) const 4101 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } 4102 4103 inline int8_t 4104 UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars, 4105 int32_t srcLength) const 4106 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } 4107 4108 inline int8_t 4109 UnicodeString::compareCodePointOrder(int32_t start, 4110 int32_t _length, 4111 const UnicodeString& srcText, 4112 int32_t srcStart, 4113 int32_t srcLength) const 4114 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } 4115 4116 inline int8_t 4117 UnicodeString::compareCodePointOrder(int32_t start, 4118 int32_t _length, 4119 const char16_t *srcChars) const 4120 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } 4121 4122 inline int8_t 4123 UnicodeString::compareCodePointOrder(int32_t start, 4124 int32_t _length, 4125 const char16_t *srcChars, 4126 int32_t srcStart, 4127 int32_t srcLength) const 4128 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } 4129 4130 inline int8_t 4131 UnicodeString::compareCodePointOrderBetween(int32_t start, 4132 int32_t limit, 4133 const UnicodeString& srcText, 4134 int32_t srcStart, 4135 int32_t srcLimit) const 4136 { return doCompareCodePointOrder(start, limit - start, 4137 srcText, srcStart, srcLimit - srcStart); } 4138 4139 inline int8_t 4140 UnicodeString::doCaseCompare(int32_t start, 4141 int32_t thisLength, 4142 const UnicodeString &srcText, 4143 int32_t srcStart, 4144 int32_t srcLength, 4145 uint32_t options) const 4146 { 4147 if(srcText.isBogus()) { 4148 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 4149 } else { 4150 srcText.pinIndices(srcStart, srcLength); 4151 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); 4152 } 4153 } 4154 4155 inline int8_t 4156 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { 4157 return doCaseCompare(0, length(), text, 0, text.length(), options); 4158 } 4159 4160 inline int8_t 4161 UnicodeString::caseCompare(int32_t start, 4162 int32_t _length, 4163 const UnicodeString &srcText, 4164 uint32_t options) const { 4165 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); 4166 } 4167 4168 inline int8_t 4169 UnicodeString::caseCompare(ConstChar16Ptr srcChars, 4170 int32_t srcLength, 4171 uint32_t options) const { 4172 return doCaseCompare(0, length(), srcChars, 0, srcLength, options); 4173 } 4174 4175 inline int8_t 4176 UnicodeString::caseCompare(int32_t start, 4177 int32_t _length, 4178 const UnicodeString &srcText, 4179 int32_t srcStart, 4180 int32_t srcLength, 4181 uint32_t options) const { 4182 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); 4183 } 4184 4185 inline int8_t 4186 UnicodeString::caseCompare(int32_t start, 4187 int32_t _length, 4188 const char16_t *srcChars, 4189 uint32_t options) const { 4190 return doCaseCompare(start, _length, srcChars, 0, _length, options); 4191 } 4192 4193 inline int8_t 4194 UnicodeString::caseCompare(int32_t start, 4195 int32_t _length, 4196 const char16_t *srcChars, 4197 int32_t srcStart, 4198 int32_t srcLength, 4199 uint32_t options) const { 4200 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); 4201 } 4202 4203 inline int8_t 4204 UnicodeString::caseCompareBetween(int32_t start, 4205 int32_t limit, 4206 const UnicodeString &srcText, 4207 int32_t srcStart, 4208 int32_t srcLimit, 4209 uint32_t options) const { 4210 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); 4211 } 4212 4213 inline int32_t 4214 UnicodeString::indexOf(const UnicodeString& srcText, 4215 int32_t srcStart, 4216 int32_t srcLength, 4217 int32_t start, 4218 int32_t _length) const 4219 { 4220 if(!srcText.isBogus()) { 4221 srcText.pinIndices(srcStart, srcLength); 4222 if(srcLength > 0) { 4223 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 4224 } 4225 } 4226 return -1; 4227 } 4228 4229 inline int32_t 4230 UnicodeString::indexOf(const UnicodeString& text) const 4231 { return indexOf(text, 0, text.length(), 0, length()); } 4232 4233 inline int32_t 4234 UnicodeString::indexOf(const UnicodeString& text, 4235 int32_t start) const { 4236 pinIndex(start); 4237 return indexOf(text, 0, text.length(), start, length() - start); 4238 } 4239 4240 inline int32_t 4241 UnicodeString::indexOf(const UnicodeString& text, 4242 int32_t start, 4243 int32_t _length) const 4244 { return indexOf(text, 0, text.length(), start, _length); } 4245 4246 inline int32_t 4247 UnicodeString::indexOf(const char16_t *srcChars, 4248 int32_t srcLength, 4249 int32_t start) const { 4250 pinIndex(start); 4251 return indexOf(srcChars, 0, srcLength, start, length() - start); 4252 } 4253 4254 inline int32_t 4255 UnicodeString::indexOf(ConstChar16Ptr srcChars, 4256 int32_t srcLength, 4257 int32_t start, 4258 int32_t _length) const 4259 { return indexOf(srcChars, 0, srcLength, start, _length); } 4260 4261 inline int32_t 4262 UnicodeString::indexOf(char16_t c, 4263 int32_t start, 4264 int32_t _length) const 4265 { return doIndexOf(c, start, _length); } 4266 4267 inline int32_t 4268 UnicodeString::indexOf(UChar32 c, 4269 int32_t start, 4270 int32_t _length) const 4271 { return doIndexOf(c, start, _length); } 4272 4273 inline int32_t 4274 UnicodeString::indexOf(char16_t c) const 4275 { return doIndexOf(c, 0, length()); } 4276 4277 inline int32_t 4278 UnicodeString::indexOf(UChar32 c) const 4279 { return indexOf(c, 0, length()); } 4280 4281 inline int32_t 4282 UnicodeString::indexOf(char16_t c, 4283 int32_t start) const { 4284 pinIndex(start); 4285 return doIndexOf(c, start, length() - start); 4286 } 4287 4288 inline int32_t 4289 UnicodeString::indexOf(UChar32 c, 4290 int32_t start) const { 4291 pinIndex(start); 4292 return indexOf(c, start, length() - start); 4293 } 4294 4295 inline int32_t 4296 UnicodeString::lastIndexOf(ConstChar16Ptr srcChars, 4297 int32_t srcLength, 4298 int32_t start, 4299 int32_t _length) const 4300 { return lastIndexOf(srcChars, 0, srcLength, start, _length); } 4301 4302 inline int32_t 4303 UnicodeString::lastIndexOf(const char16_t *srcChars, 4304 int32_t srcLength, 4305 int32_t start) const { 4306 pinIndex(start); 4307 return lastIndexOf(srcChars, 0, srcLength, start, length() - start); 4308 } 4309 4310 inline int32_t 4311 UnicodeString::lastIndexOf(const UnicodeString& srcText, 4312 int32_t srcStart, 4313 int32_t srcLength, 4314 int32_t start, 4315 int32_t _length) const 4316 { 4317 if(!srcText.isBogus()) { 4318 srcText.pinIndices(srcStart, srcLength); 4319 if(srcLength > 0) { 4320 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 4321 } 4322 } 4323 return -1; 4324 } 4325 4326 inline int32_t 4327 UnicodeString::lastIndexOf(const UnicodeString& text, 4328 int32_t start, 4329 int32_t _length) const 4330 { return lastIndexOf(text, 0, text.length(), start, _length); } 4331 4332 inline int32_t 4333 UnicodeString::lastIndexOf(const UnicodeString& text, 4334 int32_t start) const { 4335 pinIndex(start); 4336 return lastIndexOf(text, 0, text.length(), start, length() - start); 4337 } 4338 4339 inline int32_t 4340 UnicodeString::lastIndexOf(const UnicodeString& text) const 4341 { return lastIndexOf(text, 0, text.length(), 0, length()); } 4342 4343 inline int32_t 4344 UnicodeString::lastIndexOf(char16_t c, 4345 int32_t start, 4346 int32_t _length) const 4347 { return doLastIndexOf(c, start, _length); } 4348 4349 inline int32_t 4350 UnicodeString::lastIndexOf(UChar32 c, 4351 int32_t start, 4352 int32_t _length) const { 4353 return doLastIndexOf(c, start, _length); 4354 } 4355 4356 inline int32_t 4357 UnicodeString::lastIndexOf(char16_t c) const 4358 { return doLastIndexOf(c, 0, length()); } 4359 4360 inline int32_t 4361 UnicodeString::lastIndexOf(UChar32 c) const { 4362 return lastIndexOf(c, 0, length()); 4363 } 4364 4365 inline int32_t 4366 UnicodeString::lastIndexOf(char16_t c, 4367 int32_t start) const { 4368 pinIndex(start); 4369 return doLastIndexOf(c, start, length() - start); 4370 } 4371 4372 inline int32_t 4373 UnicodeString::lastIndexOf(UChar32 c, 4374 int32_t start) const { 4375 pinIndex(start); 4376 return lastIndexOf(c, start, length() - start); 4377 } 4378 4379 inline UBool 4380 UnicodeString::startsWith(const UnicodeString& text) const 4381 { return compare(0, text.length(), text, 0, text.length()) == 0; } 4382 4383 inline UBool 4384 UnicodeString::startsWith(const UnicodeString& srcText, 4385 int32_t srcStart, 4386 int32_t srcLength) const 4387 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } 4388 4389 inline UBool 4390 UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { 4391 if(srcLength < 0) { 4392 srcLength = u_strlen(toUCharPtr(srcChars)); 4393 } 4394 return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; 4395 } 4396 4397 inline UBool 4398 UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { 4399 if(srcLength < 0) { 4400 srcLength = u_strlen(toUCharPtr(srcChars)); 4401 } 4402 return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; 4403 } 4404 4405 inline UBool 4406 UnicodeString::endsWith(const UnicodeString& text) const 4407 { return doCompare(length() - text.length(), text.length(), 4408 text, 0, text.length()) == 0; } 4409 4410 inline UBool 4411 UnicodeString::endsWith(const UnicodeString& srcText, 4412 int32_t srcStart, 4413 int32_t srcLength) const { 4414 srcText.pinIndices(srcStart, srcLength); 4415 return doCompare(length() - srcLength, srcLength, 4416 srcText, srcStart, srcLength) == 0; 4417 } 4418 4419 inline UBool 4420 UnicodeString::endsWith(ConstChar16Ptr srcChars, 4421 int32_t srcLength) const { 4422 if(srcLength < 0) { 4423 srcLength = u_strlen(toUCharPtr(srcChars)); 4424 } 4425 return doCompare(length() - srcLength, srcLength, 4426 srcChars, 0, srcLength) == 0; 4427 } 4428 4429 inline UBool 4430 UnicodeString::endsWith(const char16_t *srcChars, 4431 int32_t srcStart, 4432 int32_t srcLength) const { 4433 if(srcLength < 0) { 4434 srcLength = u_strlen(toUCharPtr(srcChars + srcStart)); 4435 } 4436 return doCompare(length() - srcLength, srcLength, 4437 srcChars, srcStart, srcLength) == 0; 4438 } 4439 4440 //======================================== 4441 // replace 4442 //======================================== 4443 inline UnicodeString& 4444 UnicodeString::replace(int32_t start, 4445 int32_t _length, 4446 const UnicodeString& srcText) 4447 { return doReplace(start, _length, srcText, 0, srcText.length()); } 4448 4449 inline UnicodeString& 4450 UnicodeString::replace(int32_t start, 4451 int32_t _length, 4452 const UnicodeString& srcText, 4453 int32_t srcStart, 4454 int32_t srcLength) 4455 { return doReplace(start, _length, srcText, srcStart, srcLength); } 4456 4457 inline UnicodeString& 4458 UnicodeString::replace(int32_t start, 4459 int32_t _length, 4460 ConstChar16Ptr srcChars, 4461 int32_t srcLength) 4462 { return doReplace(start, _length, srcChars, 0, srcLength); } 4463 4464 inline UnicodeString& 4465 UnicodeString::replace(int32_t start, 4466 int32_t _length, 4467 const char16_t *srcChars, 4468 int32_t srcStart, 4469 int32_t srcLength) 4470 { return doReplace(start, _length, srcChars, srcStart, srcLength); } 4471 4472 inline UnicodeString& 4473 UnicodeString::replace(int32_t start, 4474 int32_t _length, 4475 char16_t srcChar) 4476 { return doReplace(start, _length, &srcChar, 0, 1); } 4477 4478 inline UnicodeString& 4479 UnicodeString::replaceBetween(int32_t start, 4480 int32_t limit, 4481 const UnicodeString& srcText) 4482 { return doReplace(start, limit - start, srcText, 0, srcText.length()); } 4483 4484 inline UnicodeString& 4485 UnicodeString::replaceBetween(int32_t start, 4486 int32_t limit, 4487 const UnicodeString& srcText, 4488 int32_t srcStart, 4489 int32_t srcLimit) 4490 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } 4491 4492 inline UnicodeString& 4493 UnicodeString::findAndReplace(const UnicodeString& oldText, 4494 const UnicodeString& newText) 4495 { return findAndReplace(0, length(), oldText, 0, oldText.length(), 4496 newText, 0, newText.length()); } 4497 4498 inline UnicodeString& 4499 UnicodeString::findAndReplace(int32_t start, 4500 int32_t _length, 4501 const UnicodeString& oldText, 4502 const UnicodeString& newText) 4503 { return findAndReplace(start, _length, oldText, 0, oldText.length(), 4504 newText, 0, newText.length()); } 4505 4506 // ============================ 4507 // extract 4508 // ============================ 4509 inline void 4510 UnicodeString::doExtract(int32_t start, 4511 int32_t _length, 4512 UnicodeString& target) const 4513 { target.replace(0, target.length(), *this, start, _length); } 4514 4515 inline void 4516 UnicodeString::extract(int32_t start, 4517 int32_t _length, 4518 Char16Ptr target, 4519 int32_t targetStart) const 4520 { doExtract(start, _length, target, targetStart); } 4521 4522 inline void 4523 UnicodeString::extract(int32_t start, 4524 int32_t _length, 4525 UnicodeString& target) const 4526 { doExtract(start, _length, target); } 4527 4528 #if !UCONFIG_NO_CONVERSION 4529 4530 inline int32_t 4531 UnicodeString::extract(int32_t start, 4532 int32_t _length, 4533 char *dst, 4534 const char *codepage) const 4535 4536 { 4537 // This dstSize value will be checked explicitly 4538 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); 4539 } 4540 4541 #endif 4542 4543 inline void 4544 UnicodeString::extractBetween(int32_t start, 4545 int32_t limit, 4546 char16_t *dst, 4547 int32_t dstStart) const { 4548 pinIndex(start); 4549 pinIndex(limit); 4550 doExtract(start, limit - start, dst, dstStart); 4551 } 4552 4553 inline UnicodeString 4554 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { 4555 return tempSubString(start, limit - start); 4556 } 4557 4558 inline char16_t 4559 UnicodeString::doCharAt(int32_t offset) const 4560 { 4561 if((uint32_t)offset < (uint32_t)length()) { 4562 return getArrayStart()[offset]; 4563 } else { 4564 return kInvalidUChar; 4565 } 4566 } 4567 4568 inline char16_t 4569 UnicodeString::charAt(int32_t offset) const 4570 { return doCharAt(offset); } 4571 4572 inline char16_t 4573 UnicodeString::operator[] (int32_t offset) const 4574 { return doCharAt(offset); } 4575 4576 inline UBool 4577 UnicodeString::isEmpty() const { 4578 // Arithmetic or logical right shift does not matter: only testing for 0. 4579 return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0; 4580 } 4581 4582 //======================================== 4583 // Write implementation methods 4584 //======================================== 4585 inline void 4586 UnicodeString::setZeroLength() { 4587 fUnion.fFields.fLengthAndFlags &= kAllStorageFlags; 4588 } 4589 4590 inline void 4591 UnicodeString::setShortLength(int32_t len) { 4592 // requires 0 <= len <= kMaxShortLength 4593 fUnion.fFields.fLengthAndFlags = 4594 (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift)); 4595 } 4596 4597 inline void 4598 UnicodeString::setLength(int32_t len) { 4599 if(len <= kMaxShortLength) { 4600 setShortLength(len); 4601 } else { 4602 fUnion.fFields.fLengthAndFlags |= kLengthIsLarge; 4603 fUnion.fFields.fLength = len; 4604 } 4605 } 4606 4607 inline void 4608 UnicodeString::setToEmpty() { 4609 fUnion.fFields.fLengthAndFlags = kShortString; 4610 } 4611 4612 inline void 4613 UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) { 4614 setLength(len); 4615 fUnion.fFields.fArray = array; 4616 fUnion.fFields.fCapacity = capacity; 4617 } 4618 4619 inline UnicodeString& 4620 UnicodeString::operator= (char16_t ch) 4621 { return doReplace(0, length(), &ch, 0, 1); } 4622 4623 inline UnicodeString& 4624 UnicodeString::operator= (UChar32 ch) 4625 { return replace(0, length(), ch); } 4626 4627 inline UnicodeString& 4628 UnicodeString::setTo(const UnicodeString& srcText, 4629 int32_t srcStart, 4630 int32_t srcLength) 4631 { 4632 unBogus(); 4633 return doReplace(0, length(), srcText, srcStart, srcLength); 4634 } 4635 4636 inline UnicodeString& 4637 UnicodeString::setTo(const UnicodeString& srcText, 4638 int32_t srcStart) 4639 { 4640 unBogus(); 4641 srcText.pinIndex(srcStart); 4642 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); 4643 } 4644 4645 inline UnicodeString& 4646 UnicodeString::setTo(const UnicodeString& srcText) 4647 { 4648 return copyFrom(srcText); 4649 } 4650 4651 inline UnicodeString& 4652 UnicodeString::setTo(const char16_t *srcChars, 4653 int32_t srcLength) 4654 { 4655 unBogus(); 4656 return doReplace(0, length(), srcChars, 0, srcLength); 4657 } 4658 4659 inline UnicodeString& 4660 UnicodeString::setTo(char16_t srcChar) 4661 { 4662 unBogus(); 4663 return doReplace(0, length(), &srcChar, 0, 1); 4664 } 4665 4666 inline UnicodeString& 4667 UnicodeString::setTo(UChar32 srcChar) 4668 { 4669 unBogus(); 4670 return replace(0, length(), srcChar); 4671 } 4672 4673 inline UnicodeString& 4674 UnicodeString::append(const UnicodeString& srcText, 4675 int32_t srcStart, 4676 int32_t srcLength) 4677 { return doAppend(srcText, srcStart, srcLength); } 4678 4679 inline UnicodeString& 4680 UnicodeString::append(const UnicodeString& srcText) 4681 { return doAppend(srcText, 0, srcText.length()); } 4682 4683 inline UnicodeString& 4684 UnicodeString::append(const char16_t *srcChars, 4685 int32_t srcStart, 4686 int32_t srcLength) 4687 { return doAppend(srcChars, srcStart, srcLength); } 4688 4689 inline UnicodeString& 4690 UnicodeString::append(ConstChar16Ptr srcChars, 4691 int32_t srcLength) 4692 { return doAppend(srcChars, 0, srcLength); } 4693 4694 inline UnicodeString& 4695 UnicodeString::append(char16_t srcChar) 4696 { return doAppend(&srcChar, 0, 1); } 4697 4698 inline UnicodeString& 4699 UnicodeString::operator+= (char16_t ch) 4700 { return doAppend(&ch, 0, 1); } 4701 4702 inline UnicodeString& 4703 UnicodeString::operator+= (UChar32 ch) { 4704 return append(ch); 4705 } 4706 4707 inline UnicodeString& 4708 UnicodeString::operator+= (const UnicodeString& srcText) 4709 { return doAppend(srcText, 0, srcText.length()); } 4710 4711 inline UnicodeString& 4712 UnicodeString::insert(int32_t start, 4713 const UnicodeString& srcText, 4714 int32_t srcStart, 4715 int32_t srcLength) 4716 { return doReplace(start, 0, srcText, srcStart, srcLength); } 4717 4718 inline UnicodeString& 4719 UnicodeString::insert(int32_t start, 4720 const UnicodeString& srcText) 4721 { return doReplace(start, 0, srcText, 0, srcText.length()); } 4722 4723 inline UnicodeString& 4724 UnicodeString::insert(int32_t start, 4725 const char16_t *srcChars, 4726 int32_t srcStart, 4727 int32_t srcLength) 4728 { return doReplace(start, 0, srcChars, srcStart, srcLength); } 4729 4730 inline UnicodeString& 4731 UnicodeString::insert(int32_t start, 4732 ConstChar16Ptr srcChars, 4733 int32_t srcLength) 4734 { return doReplace(start, 0, srcChars, 0, srcLength); } 4735 4736 inline UnicodeString& 4737 UnicodeString::insert(int32_t start, 4738 char16_t srcChar) 4739 { return doReplace(start, 0, &srcChar, 0, 1); } 4740 4741 inline UnicodeString& 4742 UnicodeString::insert(int32_t start, 4743 UChar32 srcChar) 4744 { return replace(start, 0, srcChar); } 4745 4746 4747 inline UnicodeString& 4748 UnicodeString::remove() 4749 { 4750 // remove() of a bogus string makes the string empty and non-bogus 4751 if(isBogus()) { 4752 setToEmpty(); 4753 } else { 4754 setZeroLength(); 4755 } 4756 return *this; 4757 } 4758 4759 inline UnicodeString& 4760 UnicodeString::remove(int32_t start, 4761 int32_t _length) 4762 { 4763 if(start <= 0 && _length == INT32_MAX) { 4764 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 4765 return remove(); 4766 } 4767 return doReplace(start, _length, NULL, 0, 0); 4768 } 4769 4770 inline UnicodeString& 4771 UnicodeString::removeBetween(int32_t start, 4772 int32_t limit) 4773 { return doReplace(start, limit - start, NULL, 0, 0); } 4774 4775 inline UnicodeString & 4776 UnicodeString::retainBetween(int32_t start, int32_t limit) { 4777 truncate(limit); 4778 return doReplace(0, start, NULL, 0, 0); 4779 } 4780 4781 inline UBool 4782 UnicodeString::truncate(int32_t targetLength) 4783 { 4784 if(isBogus() && targetLength == 0) { 4785 // truncate(0) of a bogus string makes the string empty and non-bogus 4786 unBogus(); 4787 return FALSE; 4788 } else if((uint32_t)targetLength < (uint32_t)length()) { 4789 setLength(targetLength); 4790 return TRUE; 4791 } else { 4792 return FALSE; 4793 } 4794 } 4795 4796 inline UnicodeString& 4797 UnicodeString::reverse() 4798 { return doReverse(0, length()); } 4799 4800 inline UnicodeString& 4801 UnicodeString::reverse(int32_t start, 4802 int32_t _length) 4803 { return doReverse(start, _length); } 4804 4805 U_NAMESPACE_END 4806 4807 #endif 4808