1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 1998-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * 9 * File unistr.h 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 09/25/98 stephen Creation. 15 * 11/11/98 stephen Changed per 11/9 code review. 16 * 04/20/99 stephen Overhauled per 4/16 code review. 17 * 11/18/99 aliu Made to inherit from Replaceable. Added method 18 * handleReplaceBetween(); other methods unchanged. 19 * 06/25/01 grhoten Remove dependency on iostream. 20 ****************************************************************************** 21 */ 22 23 #ifndef UNISTR_H 24 #define UNISTR_H 25 26 /** 27 * \file 28 * \brief C++ API: Unicode String 29 */ 30 31 #include <cstddef> 32 #include "unicode/utypes.h" 33 #include "unicode/char16ptr.h" 34 #include "unicode/rep.h" 35 #include "unicode/std_string.h" 36 #include "unicode/stringpiece.h" 37 #include "unicode/bytestream.h" 38 39 struct UConverter; // unicode/ucnv.h 40 41 #ifndef USTRING_H 42 /** 43 * \ingroup ustring_ustrlen 44 */ 45 U_STABLE int32_t U_EXPORT2 46 u_strlen(const UChar *s); 47 #endif 48 49 U_NAMESPACE_BEGIN 50 51 #if !UCONFIG_NO_BREAK_ITERATION 52 class BreakIterator; // unicode/brkiter.h 53 #endif 54 class Edits; 55 56 U_NAMESPACE_END 57 58 // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper. 59 /** 60 * Internal string case mapping function type. 61 * All error checking must be done. 62 * src and dest must not overlap. 63 * @internal 64 */ 65 typedef int32_t U_CALLCONV 66 UStringCaseMapper(int32_t caseLocale, uint32_t options, 67 #if !UCONFIG_NO_BREAK_ITERATION 68 icu::BreakIterator *iter, 69 #endif 70 char16_t *dest, int32_t destCapacity, 71 const char16_t *src, int32_t srcLength, 72 icu::Edits *edits, 73 UErrorCode &errorCode); 74 75 U_NAMESPACE_BEGIN 76 77 class Locale; // unicode/locid.h 78 class StringCharacterIterator; 79 class UnicodeStringAppendable; // unicode/appendable.h 80 81 /* The <iostream> include has been moved to unicode/ustream.h */ 82 83 /** 84 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 85 * which constructs a Unicode string from an invariant-character char * string. 86 * About invariant characters see utypes.h. 87 * This constructor has no runtime dependency on conversion code and is 88 * therefore recommended over ones taking a charset name string 89 * (where the empty string "" indicates invariant-character conversion). 90 * 91 * @stable ICU 3.2 92 */ 93 #define US_INV icu::UnicodeString::kInvariant 94 95 /** 96 * Unicode String literals in C++. 97 * 98 * Note: these macros are not recommended for new code. 99 * Prior to the availability of C++11 and u"unicode string literals", 100 * these macros were provided for portability and efficiency when 101 * initializing UnicodeStrings from literals. 102 * 103 * They work only for strings that contain "invariant characters", i.e., 104 * only latin letters, digits, and some punctuation. 105 * See utypes.h for details. 106 * 107 * The string parameter must be a C string literal. 108 * The length of the string, not including the terminating 109 * <code>NUL</code>, must be specified as a constant. 110 * @stable ICU 2.0 111 */ 112 #if !U_CHAR16_IS_TYPEDEF 113 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length) 114 #else 115 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length) 116 #endif 117 118 /** 119 * Unicode String literals in C++. 120 * Dependent on the platform properties, different UnicodeString 121 * constructors should be used to create a UnicodeString object from 122 * a string literal. 123 * The macros are defined for improved performance. 124 * They work only for strings that contain "invariant characters", i.e., 125 * only latin letters, digits, and some punctuation. 126 * See utypes.h for details. 127 * 128 * The string parameter must be a C string literal. 129 * @stable ICU 2.0 130 */ 131 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 132 133 /** 134 * \def UNISTR_FROM_CHAR_EXPLICIT 135 * This can be defined to be empty or "explicit". 136 * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32) 137 * constructors are marked as explicit, preventing their inadvertent use. 138 * @stable ICU 49 139 */ 140 #ifndef UNISTR_FROM_CHAR_EXPLICIT 141 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 142 // Auto-"explicit" in ICU library code. 143 # define UNISTR_FROM_CHAR_EXPLICIT explicit 144 # else 145 // Empty by default for source code compatibility. 146 # define UNISTR_FROM_CHAR_EXPLICIT 147 # endif 148 #endif 149 150 /** 151 * \def UNISTR_FROM_STRING_EXPLICIT 152 * This can be defined to be empty or "explicit". 153 * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *) 154 * constructors are marked as explicit, preventing their inadvertent use. 155 * 156 * In particular, this helps prevent accidentally depending on ICU conversion code 157 * by passing a string literal into an API with a const UnicodeString & parameter. 158 * @stable ICU 49 159 */ 160 #ifndef UNISTR_FROM_STRING_EXPLICIT 161 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) 162 // Auto-"explicit" in ICU library code. 163 # define UNISTR_FROM_STRING_EXPLICIT explicit 164 # else 165 // Empty by default for source code compatibility. 166 # define UNISTR_FROM_STRING_EXPLICIT 167 # endif 168 #endif 169 170 /** 171 * \def UNISTR_OBJECT_SIZE 172 * Desired sizeof(UnicodeString) in bytes. 173 * It should be a multiple of sizeof(pointer) to avoid unusable space for padding. 174 * The object size may want to be a multiple of 16 bytes, 175 * which is a common granularity for heap allocation. 176 * 177 * Any space inside the object beyond sizeof(vtable pointer) + 2 178 * is available for storing short strings inside the object. 179 * The bigger the object, the longer a string that can be stored inside the object, 180 * without additional heap allocation. 181 * 182 * Depending on a platform's pointer size, pointer alignment requirements, 183 * and struct padding, the compiler will usually round up sizeof(UnicodeString) 184 * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models), 185 * to hold the fields for heap-allocated strings. 186 * Such a minimum size also ensures that the object is easily large enough 187 * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH). 188 * 189 * sizeof(UnicodeString) >= 48 should work for all known platforms. 190 * 191 * For example, on a 64-bit machine where sizeof(vtable pointer) is 8, 192 * sizeof(UnicodeString) = 64 would leave space for 193 * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27 194 * char16_ts stored inside the object. 195 * 196 * The minimum object size on a 64-bit machine would be 197 * 4 * sizeof(pointer) = 4 * 8 = 32 bytes, 198 * and the internal buffer would hold up to 11 char16_ts in that case. 199 * 200 * @see U16_MAX_LENGTH 201 * @stable ICU 56 202 */ 203 #ifndef UNISTR_OBJECT_SIZE 204 # define UNISTR_OBJECT_SIZE 64 205 #endif 206 207 /** 208 * UnicodeString is a string class that stores Unicode characters directly and provides 209 * similar functionality as the Java String and StringBuffer/StringBuilder classes. 210 * It is a concrete implementation of the abstract class Replaceable (for transliteration). 211 * 212 * A UnicodeString may also "alias" an external array of characters 213 * (that is, point to it, rather than own the array) 214 * whose lifetime must then at least match the lifetime of the aliasing object. 215 * This aliasing may be preserved when returning a UnicodeString by value, 216 * depending on the compiler and the function implementation, 217 * via Return Value Optimization (RVO) or the move assignment operator. 218 * (However, the copy assignment operator does not preserve aliasing.) 219 * For details see the description of storage models at the end of the class API docs 220 * and in the User Guide chapter linked from there. 221 * 222 * The UnicodeString class is not suitable for subclassing. 223 * 224 * <p>For an overview of Unicode strings in C and C++ see the 225 * <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p> 226 * 227 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>. 228 * A Unicode character may be stored with either one code unit 229 * (the most common case) or with a matched pair of special code units 230 * ("surrogates"). The data type for code units is char16_t. 231 * For single-character handling, a Unicode character code <em>point</em> is a value 232 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p> 233 * 234 * <p>Indexes and offsets into and lengths of strings always count code units, not code points. 235 * This is the same as with multi-byte char* strings in traditional string handling. 236 * Operations on partial strings typically do not test for code point boundaries. 237 * If necessary, the user needs to take care of such boundaries by testing for the code unit 238 * values or by using functions like 239 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() 240 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p> 241 * 242 * UnicodeString methods are more lenient with regard to input parameter values 243 * than other ICU APIs. In particular: 244 * - If indexes are out of bounds for a UnicodeString object 245 * (<0 or >length()) then they are "pinned" to the nearest boundary. 246 * - If the buffer passed to an insert/append/replace operation is owned by the 247 * target object, e.g., calling str.append(str), an extra copy may take place 248 * to ensure safety. 249 * - If primitive string pointer values (e.g., const char16_t * or char *) 250 * for input strings are NULL, then those input string parameters are treated 251 * as if they pointed to an empty string. 252 * However, this is <em>not</em> the case for char * parameters for charset names 253 * or other IDs. 254 * - Most UnicodeString methods do not take a UErrorCode parameter because 255 * there are usually very few opportunities for failure other than a shortage 256 * of memory, error codes in low-level C++ string methods would be inconvenient, 257 * and the error code as the last parameter (ICU convention) would prevent 258 * the use of default parameter values. 259 * Instead, such methods set the UnicodeString into a "bogus" state 260 * (see isBogus()) if an error occurs. 261 * 262 * In string comparisons, two UnicodeString objects that are both "bogus" 263 * compare equal (to be transitive and prevent endless loops in sorting), 264 * and a "bogus" string compares less than any non-"bogus" one. 265 * 266 * Const UnicodeString methods are thread-safe. Multiple threads can use 267 * const methods on the same UnicodeString object simultaneously, 268 * but non-const methods must not be called concurrently (in multiple threads) 269 * with any other (const or non-const) methods. 270 * 271 * Similarly, const UnicodeString & parameters are thread-safe. 272 * One object may be passed in as such a parameter concurrently in multiple threads. 273 * This includes the const UnicodeString & parameters for 274 * copy construction, assignment, and cloning. 275 * 276 * <p>UnicodeString uses several storage methods. 277 * String contents can be stored inside the UnicodeString object itself, 278 * in an allocated and shared buffer, or in an outside buffer that is "aliased". 279 * Most of this is done transparently, but careful aliasing in particular provides 280 * significant performance improvements. 281 * Also, the internal buffer is accessible via special functions. 282 * For details see the 283 * <a href="http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model">User Guide Strings chapter</a>.</p> 284 * 285 * @see utf.h 286 * @see CharacterIterator 287 * @stable ICU 2.0 288 */ 289 class U_COMMON_API UnicodeString : public Replaceable 290 { 291 public: 292 293 /** 294 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor 295 * which constructs a Unicode string from an invariant-character char * string. 296 * Use the macro US_INV instead of the full qualification for this value. 297 * 298 * @see US_INV 299 * @stable ICU 3.2 300 */ 301 enum EInvariant { 302 /** 303 * @see EInvariant 304 * @stable ICU 3.2 305 */ 306 kInvariant 307 }; 308 309 //======================================== 310 // Read-only operations 311 //======================================== 312 313 /* Comparison - bitwise only - for international comparison use collation */ 314 315 /** 316 * Equality operator. Performs only bitwise comparison. 317 * @param text The UnicodeString to compare to this one. 318 * @return TRUE if <TT>text</TT> contains the same characters as this one, 319 * FALSE otherwise. 320 * @stable ICU 2.0 321 */ 322 inline UBool operator== (const UnicodeString& text) const; 323 324 /** 325 * Inequality operator. Performs only bitwise comparison. 326 * @param text The UnicodeString to compare to this one. 327 * @return FALSE if <TT>text</TT> contains the same characters as this one, 328 * TRUE otherwise. 329 * @stable ICU 2.0 330 */ 331 inline UBool operator!= (const UnicodeString& text) const; 332 333 /** 334 * Greater than operator. Performs only bitwise comparison. 335 * @param text The UnicodeString to compare to this one. 336 * @return TRUE if the characters in this are bitwise 337 * greater than the characters in <code>text</code>, FALSE otherwise 338 * @stable ICU 2.0 339 */ 340 inline UBool operator> (const UnicodeString& text) const; 341 342 /** 343 * Less than operator. Performs only bitwise comparison. 344 * @param text The UnicodeString to compare to this one. 345 * @return TRUE if the characters in this are bitwise 346 * less than the characters in <code>text</code>, FALSE otherwise 347 * @stable ICU 2.0 348 */ 349 inline UBool operator< (const UnicodeString& text) const; 350 351 /** 352 * Greater than or equal operator. Performs only bitwise comparison. 353 * @param text The UnicodeString to compare to this one. 354 * @return TRUE if the characters in this are bitwise 355 * greater than or equal to the characters in <code>text</code>, FALSE otherwise 356 * @stable ICU 2.0 357 */ 358 inline UBool operator>= (const UnicodeString& text) const; 359 360 /** 361 * Less than or equal operator. Performs only bitwise comparison. 362 * @param text The UnicodeString to compare to this one. 363 * @return TRUE if the characters in this are bitwise 364 * less than or equal to the characters in <code>text</code>, FALSE otherwise 365 * @stable ICU 2.0 366 */ 367 inline UBool operator<= (const UnicodeString& text) const; 368 369 /** 370 * Compare the characters bitwise in this UnicodeString to 371 * the characters in <code>text</code>. 372 * @param text The UnicodeString to compare to this one. 373 * @return The result of bitwise character comparison: 0 if this 374 * contains the same characters as <code>text</code>, -1 if the characters in 375 * this are bitwise less than the characters in <code>text</code>, +1 if the 376 * characters in this are bitwise greater than the characters 377 * in <code>text</code>. 378 * @stable ICU 2.0 379 */ 380 inline int8_t compare(const UnicodeString& text) const; 381 382 /** 383 * Compare the characters bitwise in the range 384 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 385 * in the <b>entire string</b> <TT>text</TT>. 386 * (The parameters "start" and "length" are not applied to the other text "text".) 387 * @param start the offset at which the compare operation begins 388 * @param length the number of characters of text to compare. 389 * @param text the other text to be compared against this string. 390 * @return The result of bitwise character comparison: 0 if this 391 * contains the same characters as <code>text</code>, -1 if the characters in 392 * this are bitwise less than the characters in <code>text</code>, +1 if the 393 * characters in this are bitwise greater than the characters 394 * in <code>text</code>. 395 * @stable ICU 2.0 396 */ 397 inline int8_t compare(int32_t start, 398 int32_t length, 399 const UnicodeString& text) const; 400 401 /** 402 * Compare the characters bitwise in the range 403 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 404 * in <TT>srcText</TT> in the range 405 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 406 * @param start the offset at which the compare operation begins 407 * @param length the number of characters in this to compare. 408 * @param srcText the text to be compared 409 * @param srcStart the offset into <TT>srcText</TT> to start comparison 410 * @param srcLength the number of characters in <TT>src</TT> to compare 411 * @return The result of bitwise character comparison: 0 if this 412 * contains the same characters as <code>srcText</code>, -1 if the characters in 413 * this are bitwise less than the characters in <code>srcText</code>, +1 if the 414 * characters in this are bitwise greater than the characters 415 * in <code>srcText</code>. 416 * @stable ICU 2.0 417 */ 418 inline int8_t compare(int32_t start, 419 int32_t length, 420 const UnicodeString& srcText, 421 int32_t srcStart, 422 int32_t srcLength) const; 423 424 /** 425 * Compare the characters bitwise in this UnicodeString with the first 426 * <TT>srcLength</TT> characters in <TT>srcChars</TT>. 427 * @param srcChars The characters to compare to this UnicodeString. 428 * @param srcLength the number of characters in <TT>srcChars</TT> to compare 429 * @return The result of bitwise character comparison: 0 if this 430 * contains the same characters as <code>srcChars</code>, -1 if the characters in 431 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 432 * characters in this are bitwise greater than the characters 433 * in <code>srcChars</code>. 434 * @stable ICU 2.0 435 */ 436 inline int8_t compare(ConstChar16Ptr srcChars, 437 int32_t srcLength) const; 438 439 /** 440 * Compare the characters bitwise in the range 441 * [<TT>start</TT>, <TT>start + length</TT>) with the first 442 * <TT>length</TT> characters in <TT>srcChars</TT> 443 * @param start the offset at which the compare operation begins 444 * @param length the number of characters to compare. 445 * @param srcChars the characters to be compared 446 * @return The result of bitwise character comparison: 0 if this 447 * contains the same characters as <code>srcChars</code>, -1 if the characters in 448 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 449 * characters in this are bitwise greater than the characters 450 * in <code>srcChars</code>. 451 * @stable ICU 2.0 452 */ 453 inline int8_t compare(int32_t start, 454 int32_t length, 455 const char16_t *srcChars) const; 456 457 /** 458 * Compare the characters bitwise in the range 459 * [<TT>start</TT>, <TT>start + length</TT>) with the characters 460 * in <TT>srcChars</TT> in the range 461 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 462 * @param start the offset at which the compare operation begins 463 * @param length the number of characters in this to compare 464 * @param srcChars the characters to be compared 465 * @param srcStart the offset into <TT>srcChars</TT> to start comparison 466 * @param srcLength the number of characters in <TT>srcChars</TT> to compare 467 * @return The result of bitwise character comparison: 0 if this 468 * contains the same characters as <code>srcChars</code>, -1 if the characters in 469 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the 470 * characters in this are bitwise greater than the characters 471 * in <code>srcChars</code>. 472 * @stable ICU 2.0 473 */ 474 inline int8_t compare(int32_t start, 475 int32_t length, 476 const char16_t *srcChars, 477 int32_t srcStart, 478 int32_t srcLength) const; 479 480 /** 481 * Compare the characters bitwise in the range 482 * [<TT>start</TT>, <TT>limit</TT>) with the characters 483 * in <TT>srcText</TT> in the range 484 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). 485 * @param start the offset at which the compare operation begins 486 * @param limit the offset immediately following the compare operation 487 * @param srcText the text to be compared 488 * @param srcStart the offset into <TT>srcText</TT> to start comparison 489 * @param srcLimit the offset into <TT>srcText</TT> to limit comparison 490 * @return The result of bitwise character comparison: 0 if this 491 * contains the same characters as <code>srcText</code>, -1 if the characters in 492 * this are bitwise less than the characters in <code>srcText</code>, +1 if the 493 * characters in this are bitwise greater than the characters 494 * in <code>srcText</code>. 495 * @stable ICU 2.0 496 */ 497 inline int8_t compareBetween(int32_t start, 498 int32_t limit, 499 const UnicodeString& srcText, 500 int32_t srcStart, 501 int32_t srcLimit) const; 502 503 /** 504 * Compare two Unicode strings in code point order. 505 * The result may be different from the results of compare(), operator<, etc. 506 * if supplementary characters are present: 507 * 508 * In UTF-16, supplementary characters (with code points U+10000 and above) are 509 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 510 * which means that they compare as less than some other BMP characters like U+feff. 511 * This function compares Unicode strings in code point order. 512 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 513 * 514 * @param text Another string to compare this one to. 515 * @return a negative/zero/positive integer corresponding to whether 516 * this string is less than/equal to/greater than the second one 517 * in code point order 518 * @stable ICU 2.0 519 */ 520 inline int8_t compareCodePointOrder(const UnicodeString& text) const; 521 522 /** 523 * Compare two Unicode strings in code point order. 524 * The result may be different from the results of compare(), operator<, etc. 525 * if supplementary characters are present: 526 * 527 * In UTF-16, supplementary characters (with code points U+10000 and above) are 528 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 529 * which means that they compare as less than some other BMP characters like U+feff. 530 * This function compares Unicode strings in code point order. 531 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 532 * 533 * @param start The start offset in this string at which the compare operation begins. 534 * @param length The number of code units from this string to compare. 535 * @param srcText Another string to compare this one to. 536 * @return a negative/zero/positive integer corresponding to whether 537 * this string is less than/equal to/greater than the second one 538 * in code point order 539 * @stable ICU 2.0 540 */ 541 inline int8_t compareCodePointOrder(int32_t start, 542 int32_t length, 543 const UnicodeString& srcText) const; 544 545 /** 546 * Compare two Unicode strings in code point order. 547 * The result may be different from the results of compare(), operator<, etc. 548 * if supplementary characters are present: 549 * 550 * In UTF-16, supplementary characters (with code points U+10000 and above) are 551 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 552 * which means that they compare as less than some other BMP characters like U+feff. 553 * This function compares Unicode strings in code point order. 554 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 555 * 556 * @param start The start offset in this string at which the compare operation begins. 557 * @param length The number of code units from this string to compare. 558 * @param srcText Another string to compare this one to. 559 * @param srcStart The start offset in that string at which the compare operation begins. 560 * @param srcLength The number of code units from that string to compare. 561 * @return a negative/zero/positive integer corresponding to whether 562 * this string is less than/equal to/greater than the second one 563 * in code point order 564 * @stable ICU 2.0 565 */ 566 inline int8_t compareCodePointOrder(int32_t start, 567 int32_t length, 568 const UnicodeString& srcText, 569 int32_t srcStart, 570 int32_t srcLength) const; 571 572 /** 573 * Compare two Unicode strings in code point order. 574 * The result may be different from the results of compare(), operator<, etc. 575 * if supplementary characters are present: 576 * 577 * In UTF-16, supplementary characters (with code points U+10000 and above) are 578 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 579 * which means that they compare as less than some other BMP characters like U+feff. 580 * This function compares Unicode strings in code point order. 581 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 582 * 583 * @param srcChars A pointer to another string to compare this one to. 584 * @param srcLength The number of code units from that string to compare. 585 * @return a negative/zero/positive integer corresponding to whether 586 * this string is less than/equal to/greater than the second one 587 * in code point order 588 * @stable ICU 2.0 589 */ 590 inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars, 591 int32_t srcLength) const; 592 593 /** 594 * Compare two Unicode strings in code point order. 595 * The result may be different from the results of compare(), operator<, etc. 596 * if supplementary characters are present: 597 * 598 * In UTF-16, supplementary characters (with code points U+10000 and above) are 599 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 600 * which means that they compare as less than some other BMP characters like U+feff. 601 * This function compares Unicode strings in code point order. 602 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 603 * 604 * @param start The start offset in this string at which the compare operation begins. 605 * @param length The number of code units from this string to compare. 606 * @param srcChars A pointer to another string to compare this one to. 607 * @return a negative/zero/positive integer corresponding to whether 608 * this string is less than/equal to/greater than the second one 609 * in code point order 610 * @stable ICU 2.0 611 */ 612 inline int8_t compareCodePointOrder(int32_t start, 613 int32_t length, 614 const char16_t *srcChars) const; 615 616 /** 617 * Compare two Unicode strings in code point order. 618 * The result may be different from the results of compare(), operator<, etc. 619 * if supplementary characters are present: 620 * 621 * In UTF-16, supplementary characters (with code points U+10000 and above) are 622 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 623 * which means that they compare as less than some other BMP characters like U+feff. 624 * This function compares Unicode strings in code point order. 625 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 626 * 627 * @param start The start offset in this string at which the compare operation begins. 628 * @param length The number of code units from this string to compare. 629 * @param srcChars A pointer to another string to compare this one to. 630 * @param srcStart The start offset in that string at which the compare operation begins. 631 * @param srcLength The number of code units from that string to compare. 632 * @return a negative/zero/positive integer corresponding to whether 633 * this string is less than/equal to/greater than the second one 634 * in code point order 635 * @stable ICU 2.0 636 */ 637 inline int8_t compareCodePointOrder(int32_t start, 638 int32_t length, 639 const char16_t *srcChars, 640 int32_t srcStart, 641 int32_t srcLength) const; 642 643 /** 644 * Compare two Unicode strings in code point order. 645 * The result may be different from the results of compare(), operator<, etc. 646 * if supplementary characters are present: 647 * 648 * In UTF-16, supplementary characters (with code points U+10000 and above) are 649 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, 650 * which means that they compare as less than some other BMP characters like U+feff. 651 * This function compares Unicode strings in code point order. 652 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. 653 * 654 * @param start The start offset in this string at which the compare operation begins. 655 * @param limit The offset after the last code unit from this string to compare. 656 * @param srcText Another string to compare this one to. 657 * @param srcStart The start offset in that string at which the compare operation begins. 658 * @param srcLimit The offset after the last code unit from that string to compare. 659 * @return a negative/zero/positive integer corresponding to whether 660 * this string is less than/equal to/greater than the second one 661 * in code point order 662 * @stable ICU 2.0 663 */ 664 inline int8_t compareCodePointOrderBetween(int32_t start, 665 int32_t limit, 666 const UnicodeString& srcText, 667 int32_t srcStart, 668 int32_t srcLimit) const; 669 670 /** 671 * Compare two strings case-insensitively using full case folding. 672 * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). 673 * 674 * @param text Another string to compare this one to. 675 * @param options A bit set of options: 676 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 677 * Comparison in code unit order with default case folding. 678 * 679 * - U_COMPARE_CODE_POINT_ORDER 680 * Set to choose code point order instead of code unit order 681 * (see u_strCompare for details). 682 * 683 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 684 * 685 * @return A negative, zero, or positive integer indicating the comparison result. 686 * @stable ICU 2.0 687 */ 688 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; 689 690 /** 691 * Compare two strings case-insensitively using full case folding. 692 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 693 * 694 * @param start The start offset in this string at which the compare operation begins. 695 * @param length The number of code units from this string to compare. 696 * @param srcText Another string to compare this one to. 697 * @param options A bit set of options: 698 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 699 * Comparison in code unit order with default case folding. 700 * 701 * - U_COMPARE_CODE_POINT_ORDER 702 * Set to choose code point order instead of code unit order 703 * (see u_strCompare for details). 704 * 705 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 706 * 707 * @return A negative, zero, or positive integer indicating the comparison result. 708 * @stable ICU 2.0 709 */ 710 inline int8_t caseCompare(int32_t start, 711 int32_t length, 712 const UnicodeString& srcText, 713 uint32_t options) const; 714 715 /** 716 * Compare two strings case-insensitively using full case folding. 717 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). 718 * 719 * @param start The start offset in this string at which the compare operation begins. 720 * @param length The number of code units from this string to compare. 721 * @param srcText Another string to compare this one to. 722 * @param srcStart The start offset in that string at which the compare operation begins. 723 * @param srcLength The number of code units from that string to compare. 724 * @param options A bit set of options: 725 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 726 * Comparison in code unit order with default case folding. 727 * 728 * - U_COMPARE_CODE_POINT_ORDER 729 * Set to choose code point order instead of code unit order 730 * (see u_strCompare for details). 731 * 732 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 733 * 734 * @return A negative, zero, or positive integer indicating the comparison result. 735 * @stable ICU 2.0 736 */ 737 inline int8_t caseCompare(int32_t start, 738 int32_t length, 739 const UnicodeString& srcText, 740 int32_t srcStart, 741 int32_t srcLength, 742 uint32_t options) const; 743 744 /** 745 * Compare two strings case-insensitively using full case folding. 746 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 747 * 748 * @param srcChars A pointer to another string to compare this one to. 749 * @param srcLength The number of code units from that string to compare. 750 * @param options A bit set of options: 751 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 752 * Comparison in code unit order with default case folding. 753 * 754 * - U_COMPARE_CODE_POINT_ORDER 755 * Set to choose code point order instead of code unit order 756 * (see u_strCompare for details). 757 * 758 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 759 * 760 * @return A negative, zero, or positive integer indicating the comparison result. 761 * @stable ICU 2.0 762 */ 763 inline int8_t caseCompare(ConstChar16Ptr srcChars, 764 int32_t srcLength, 765 uint32_t options) const; 766 767 /** 768 * Compare two strings case-insensitively using full case folding. 769 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 770 * 771 * @param start The start offset in this string at which the compare operation begins. 772 * @param length The number of code units from this string to compare. 773 * @param srcChars A pointer to another string to compare this one to. 774 * @param options A bit set of options: 775 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 776 * Comparison in code unit order with default case folding. 777 * 778 * - U_COMPARE_CODE_POINT_ORDER 779 * Set to choose code point order instead of code unit order 780 * (see u_strCompare for details). 781 * 782 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 783 * 784 * @return A negative, zero, or positive integer indicating the comparison result. 785 * @stable ICU 2.0 786 */ 787 inline int8_t caseCompare(int32_t start, 788 int32_t length, 789 const char16_t *srcChars, 790 uint32_t options) const; 791 792 /** 793 * Compare two strings case-insensitively using full case folding. 794 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). 795 * 796 * @param start The start offset in this string at which the compare operation begins. 797 * @param length The number of code units from this string to compare. 798 * @param srcChars A pointer to another string to compare this one to. 799 * @param srcStart The start offset in that string at which the compare operation begins. 800 * @param srcLength The number of code units from that string to compare. 801 * @param options A bit set of options: 802 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 803 * Comparison in code unit order with default case folding. 804 * 805 * - U_COMPARE_CODE_POINT_ORDER 806 * Set to choose code point order instead of code unit order 807 * (see u_strCompare for details). 808 * 809 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 810 * 811 * @return A negative, zero, or positive integer indicating the comparison result. 812 * @stable ICU 2.0 813 */ 814 inline int8_t caseCompare(int32_t start, 815 int32_t length, 816 const char16_t *srcChars, 817 int32_t srcStart, 818 int32_t srcLength, 819 uint32_t options) const; 820 821 /** 822 * Compare two strings case-insensitively using full case folding. 823 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). 824 * 825 * @param start The start offset in this string at which the compare operation begins. 826 * @param limit The offset after the last code unit from this string to compare. 827 * @param srcText Another string to compare this one to. 828 * @param srcStart The start offset in that string at which the compare operation begins. 829 * @param srcLimit The offset after the last code unit from that string to compare. 830 * @param options A bit set of options: 831 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 832 * Comparison in code unit order with default case folding. 833 * 834 * - U_COMPARE_CODE_POINT_ORDER 835 * Set to choose code point order instead of code unit order 836 * (see u_strCompare for details). 837 * 838 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 839 * 840 * @return A negative, zero, or positive integer indicating the comparison result. 841 * @stable ICU 2.0 842 */ 843 inline int8_t caseCompareBetween(int32_t start, 844 int32_t limit, 845 const UnicodeString& srcText, 846 int32_t srcStart, 847 int32_t srcLimit, 848 uint32_t options) const; 849 850 /** 851 * Determine if this starts with the characters in <TT>text</TT> 852 * @param text The text to match. 853 * @return TRUE if this starts with the characters in <TT>text</TT>, 854 * FALSE otherwise 855 * @stable ICU 2.0 856 */ 857 inline UBool startsWith(const UnicodeString& text) const; 858 859 /** 860 * Determine if this starts with the characters in <TT>srcText</TT> 861 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 862 * @param srcText The text to match. 863 * @param srcStart the offset into <TT>srcText</TT> to start matching 864 * @param srcLength the number of characters in <TT>srcText</TT> to match 865 * @return TRUE if this starts with the characters in <TT>text</TT>, 866 * FALSE otherwise 867 * @stable ICU 2.0 868 */ 869 inline UBool startsWith(const UnicodeString& srcText, 870 int32_t srcStart, 871 int32_t srcLength) const; 872 873 /** 874 * Determine if this starts with the characters in <TT>srcChars</TT> 875 * @param srcChars The characters to match. 876 * @param srcLength the number of characters in <TT>srcChars</TT> 877 * @return TRUE if this starts with the characters in <TT>srcChars</TT>, 878 * FALSE otherwise 879 * @stable ICU 2.0 880 */ 881 inline UBool startsWith(ConstChar16Ptr srcChars, 882 int32_t srcLength) const; 883 884 /** 885 * Determine if this ends with the characters in <TT>srcChars</TT> 886 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 887 * @param srcChars The characters to match. 888 * @param srcStart the offset into <TT>srcText</TT> to start matching 889 * @param srcLength the number of characters in <TT>srcChars</TT> to match 890 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise 891 * @stable ICU 2.0 892 */ 893 inline UBool startsWith(const char16_t *srcChars, 894 int32_t srcStart, 895 int32_t srcLength) const; 896 897 /** 898 * Determine if this ends with the characters in <TT>text</TT> 899 * @param text The text to match. 900 * @return TRUE if this ends with the characters in <TT>text</TT>, 901 * FALSE otherwise 902 * @stable ICU 2.0 903 */ 904 inline UBool endsWith(const UnicodeString& text) const; 905 906 /** 907 * Determine if this ends with the characters in <TT>srcText</TT> 908 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 909 * @param srcText The text to match. 910 * @param srcStart the offset into <TT>srcText</TT> to start matching 911 * @param srcLength the number of characters in <TT>srcText</TT> to match 912 * @return TRUE if this ends with the characters in <TT>text</TT>, 913 * FALSE otherwise 914 * @stable ICU 2.0 915 */ 916 inline UBool endsWith(const UnicodeString& srcText, 917 int32_t srcStart, 918 int32_t srcLength) const; 919 920 /** 921 * Determine if this ends with the characters in <TT>srcChars</TT> 922 * @param srcChars The characters to match. 923 * @param srcLength the number of characters in <TT>srcChars</TT> 924 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 925 * FALSE otherwise 926 * @stable ICU 2.0 927 */ 928 inline UBool endsWith(ConstChar16Ptr srcChars, 929 int32_t srcLength) const; 930 931 /** 932 * Determine if this ends with the characters in <TT>srcChars</TT> 933 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 934 * @param srcChars The characters to match. 935 * @param srcStart the offset into <TT>srcText</TT> to start matching 936 * @param srcLength the number of characters in <TT>srcChars</TT> to match 937 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, 938 * FALSE otherwise 939 * @stable ICU 2.0 940 */ 941 inline UBool endsWith(const char16_t *srcChars, 942 int32_t srcStart, 943 int32_t srcLength) const; 944 945 946 /* Searching - bitwise only */ 947 948 /** 949 * Locate in this the first occurrence of the characters in <TT>text</TT>, 950 * using bitwise comparison. 951 * @param text The text to search for. 952 * @return The offset into this of the start of <TT>text</TT>, 953 * or -1 if not found. 954 * @stable ICU 2.0 955 */ 956 inline int32_t indexOf(const UnicodeString& text) const; 957 958 /** 959 * Locate in this the first occurrence of the characters in <TT>text</TT> 960 * starting at offset <TT>start</TT>, using bitwise comparison. 961 * @param text The text to search for. 962 * @param start The offset at which searching will start. 963 * @return The offset into this of the start of <TT>text</TT>, 964 * or -1 if not found. 965 * @stable ICU 2.0 966 */ 967 inline int32_t indexOf(const UnicodeString& text, 968 int32_t start) const; 969 970 /** 971 * Locate in this the first occurrence in the range 972 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 973 * in <TT>text</TT>, using bitwise comparison. 974 * @param text The text to search for. 975 * @param start The offset at which searching will start. 976 * @param length The number of characters to search 977 * @return The offset into this of the start of <TT>text</TT>, 978 * or -1 if not found. 979 * @stable ICU 2.0 980 */ 981 inline int32_t indexOf(const UnicodeString& text, 982 int32_t start, 983 int32_t length) const; 984 985 /** 986 * Locate in this the first occurrence in the range 987 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 988 * in <TT>srcText</TT> in the range 989 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 990 * using bitwise comparison. 991 * @param srcText The text to search for. 992 * @param srcStart the offset into <TT>srcText</TT> at which 993 * to start matching 994 * @param srcLength the number of characters in <TT>srcText</TT> to match 995 * @param start the offset into this at which to start matching 996 * @param length the number of characters in this to search 997 * @return The offset into this of the start of <TT>text</TT>, 998 * or -1 if not found. 999 * @stable ICU 2.0 1000 */ 1001 inline int32_t indexOf(const UnicodeString& srcText, 1002 int32_t srcStart, 1003 int32_t srcLength, 1004 int32_t start, 1005 int32_t length) const; 1006 1007 /** 1008 * Locate in this the first occurrence of the characters in 1009 * <TT>srcChars</TT> 1010 * starting at offset <TT>start</TT>, using bitwise comparison. 1011 * @param srcChars The text to search for. 1012 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1013 * @param start the offset into this at which to start matching 1014 * @return The offset into this of the start of <TT>text</TT>, 1015 * or -1 if not found. 1016 * @stable ICU 2.0 1017 */ 1018 inline int32_t indexOf(const char16_t *srcChars, 1019 int32_t srcLength, 1020 int32_t start) const; 1021 1022 /** 1023 * Locate in this the first occurrence in the range 1024 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1025 * in <TT>srcChars</TT>, using bitwise comparison. 1026 * @param srcChars The text to search for. 1027 * @param srcLength the number of characters in <TT>srcChars</TT> 1028 * @param start The offset at which searching will start. 1029 * @param length The number of characters to search 1030 * @return The offset into this of the start of <TT>srcChars</TT>, 1031 * or -1 if not found. 1032 * @stable ICU 2.0 1033 */ 1034 inline int32_t indexOf(ConstChar16Ptr srcChars, 1035 int32_t srcLength, 1036 int32_t start, 1037 int32_t length) const; 1038 1039 /** 1040 * Locate in this the first occurrence in the range 1041 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1042 * in <TT>srcChars</TT> in the range 1043 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1044 * using bitwise comparison. 1045 * @param srcChars The text to search for. 1046 * @param srcStart the offset into <TT>srcChars</TT> at which 1047 * to start matching 1048 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1049 * @param start the offset into this at which to start matching 1050 * @param length the number of characters in this to search 1051 * @return The offset into this of the start of <TT>text</TT>, 1052 * or -1 if not found. 1053 * @stable ICU 2.0 1054 */ 1055 int32_t indexOf(const char16_t *srcChars, 1056 int32_t srcStart, 1057 int32_t srcLength, 1058 int32_t start, 1059 int32_t length) const; 1060 1061 /** 1062 * Locate in this the first occurrence of the BMP code point <code>c</code>, 1063 * using bitwise comparison. 1064 * @param c The code unit to search for. 1065 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1066 * @stable ICU 2.0 1067 */ 1068 inline int32_t indexOf(char16_t c) const; 1069 1070 /** 1071 * Locate in this the first occurrence of the code point <TT>c</TT>, 1072 * using bitwise comparison. 1073 * 1074 * @param c The code point to search for. 1075 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1076 * @stable ICU 2.0 1077 */ 1078 inline int32_t indexOf(UChar32 c) const; 1079 1080 /** 1081 * Locate in this the first occurrence of the BMP code point <code>c</code>, 1082 * starting at offset <TT>start</TT>, using bitwise comparison. 1083 * @param c The code unit to search for. 1084 * @param start The offset at which searching will start. 1085 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1086 * @stable ICU 2.0 1087 */ 1088 inline int32_t indexOf(char16_t c, 1089 int32_t start) const; 1090 1091 /** 1092 * Locate in this the first occurrence of the code point <TT>c</TT> 1093 * starting at offset <TT>start</TT>, using bitwise comparison. 1094 * 1095 * @param c The code point to search for. 1096 * @param start The offset at which searching will start. 1097 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1098 * @stable ICU 2.0 1099 */ 1100 inline int32_t indexOf(UChar32 c, 1101 int32_t start) const; 1102 1103 /** 1104 * Locate in this the first occurrence of the BMP code point <code>c</code> 1105 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1106 * using bitwise comparison. 1107 * @param c The code unit to search for. 1108 * @param start the offset into this at which to start matching 1109 * @param length the number of characters in this to search 1110 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1111 * @stable ICU 2.0 1112 */ 1113 inline int32_t indexOf(char16_t c, 1114 int32_t start, 1115 int32_t length) const; 1116 1117 /** 1118 * Locate in this the first occurrence of the code point <TT>c</TT> 1119 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1120 * using bitwise comparison. 1121 * 1122 * @param c The code point to search for. 1123 * @param start the offset into this at which to start matching 1124 * @param length the number of characters in this to search 1125 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1126 * @stable ICU 2.0 1127 */ 1128 inline int32_t indexOf(UChar32 c, 1129 int32_t start, 1130 int32_t length) const; 1131 1132 /** 1133 * Locate in this the last occurrence of the characters in <TT>text</TT>, 1134 * using bitwise comparison. 1135 * @param text The text to search for. 1136 * @return The offset into this of the start of <TT>text</TT>, 1137 * or -1 if not found. 1138 * @stable ICU 2.0 1139 */ 1140 inline int32_t lastIndexOf(const UnicodeString& text) const; 1141 1142 /** 1143 * Locate in this the last occurrence of the characters in <TT>text</TT> 1144 * starting at offset <TT>start</TT>, using bitwise comparison. 1145 * @param text The text to search for. 1146 * @param start The offset at which searching will start. 1147 * @return The offset into this of the start of <TT>text</TT>, 1148 * or -1 if not found. 1149 * @stable ICU 2.0 1150 */ 1151 inline int32_t lastIndexOf(const UnicodeString& text, 1152 int32_t start) const; 1153 1154 /** 1155 * Locate in this the last occurrence in the range 1156 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1157 * in <TT>text</TT>, using bitwise comparison. 1158 * @param text The text to search for. 1159 * @param start The offset at which searching will start. 1160 * @param length The number of characters to search 1161 * @return The offset into this of the start of <TT>text</TT>, 1162 * or -1 if not found. 1163 * @stable ICU 2.0 1164 */ 1165 inline int32_t lastIndexOf(const UnicodeString& text, 1166 int32_t start, 1167 int32_t length) const; 1168 1169 /** 1170 * Locate in this the last occurrence in the range 1171 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1172 * in <TT>srcText</TT> in the range 1173 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1174 * using bitwise comparison. 1175 * @param srcText The text to search for. 1176 * @param srcStart the offset into <TT>srcText</TT> at which 1177 * to start matching 1178 * @param srcLength the number of characters in <TT>srcText</TT> to match 1179 * @param start the offset into this at which to start matching 1180 * @param length the number of characters in this to search 1181 * @return The offset into this of the start of <TT>text</TT>, 1182 * or -1 if not found. 1183 * @stable ICU 2.0 1184 */ 1185 inline int32_t lastIndexOf(const UnicodeString& srcText, 1186 int32_t srcStart, 1187 int32_t srcLength, 1188 int32_t start, 1189 int32_t length) const; 1190 1191 /** 1192 * Locate in this the last occurrence of the characters in <TT>srcChars</TT> 1193 * starting at offset <TT>start</TT>, using bitwise comparison. 1194 * @param srcChars The text to search for. 1195 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1196 * @param start the offset into this at which to start matching 1197 * @return The offset into this of the start of <TT>text</TT>, 1198 * or -1 if not found. 1199 * @stable ICU 2.0 1200 */ 1201 inline int32_t lastIndexOf(const char16_t *srcChars, 1202 int32_t srcLength, 1203 int32_t start) const; 1204 1205 /** 1206 * Locate in this the last occurrence in the range 1207 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1208 * in <TT>srcChars</TT>, using bitwise comparison. 1209 * @param srcChars The text to search for. 1210 * @param srcLength the number of characters in <TT>srcChars</TT> 1211 * @param start The offset at which searching will start. 1212 * @param length The number of characters to search 1213 * @return The offset into this of the start of <TT>srcChars</TT>, 1214 * or -1 if not found. 1215 * @stable ICU 2.0 1216 */ 1217 inline int32_t lastIndexOf(ConstChar16Ptr srcChars, 1218 int32_t srcLength, 1219 int32_t start, 1220 int32_t length) const; 1221 1222 /** 1223 * Locate in this the last occurrence in the range 1224 * [<TT>start</TT>, <TT>start + length</TT>) of the characters 1225 * in <TT>srcChars</TT> in the range 1226 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), 1227 * using bitwise comparison. 1228 * @param srcChars The text to search for. 1229 * @param srcStart the offset into <TT>srcChars</TT> at which 1230 * to start matching 1231 * @param srcLength the number of characters in <TT>srcChars</TT> to match 1232 * @param start the offset into this at which to start matching 1233 * @param length the number of characters in this to search 1234 * @return The offset into this of the start of <TT>text</TT>, 1235 * or -1 if not found. 1236 * @stable ICU 2.0 1237 */ 1238 int32_t lastIndexOf(const char16_t *srcChars, 1239 int32_t srcStart, 1240 int32_t srcLength, 1241 int32_t start, 1242 int32_t length) const; 1243 1244 /** 1245 * Locate in this the last occurrence of the BMP code point <code>c</code>, 1246 * using bitwise comparison. 1247 * @param c The code unit to search for. 1248 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1249 * @stable ICU 2.0 1250 */ 1251 inline int32_t lastIndexOf(char16_t c) const; 1252 1253 /** 1254 * Locate in this the last occurrence of the code point <TT>c</TT>, 1255 * using bitwise comparison. 1256 * 1257 * @param c The code point to search for. 1258 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1259 * @stable ICU 2.0 1260 */ 1261 inline int32_t lastIndexOf(UChar32 c) const; 1262 1263 /** 1264 * Locate in this the last occurrence of the BMP code point <code>c</code> 1265 * starting at offset <TT>start</TT>, using bitwise comparison. 1266 * @param c The code unit to search for. 1267 * @param start The offset at which searching will start. 1268 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1269 * @stable ICU 2.0 1270 */ 1271 inline int32_t lastIndexOf(char16_t c, 1272 int32_t start) const; 1273 1274 /** 1275 * Locate in this the last occurrence of the code point <TT>c</TT> 1276 * starting at offset <TT>start</TT>, using bitwise comparison. 1277 * 1278 * @param c The code point to search for. 1279 * @param start The offset at which searching will start. 1280 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1281 * @stable ICU 2.0 1282 */ 1283 inline int32_t lastIndexOf(UChar32 c, 1284 int32_t start) const; 1285 1286 /** 1287 * Locate in this the last occurrence of the BMP code point <code>c</code> 1288 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1289 * using bitwise comparison. 1290 * @param c The code unit to search for. 1291 * @param start the offset into this at which to start matching 1292 * @param length the number of characters in this to search 1293 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1294 * @stable ICU 2.0 1295 */ 1296 inline int32_t lastIndexOf(char16_t c, 1297 int32_t start, 1298 int32_t length) const; 1299 1300 /** 1301 * Locate in this the last occurrence of the code point <TT>c</TT> 1302 * in the range [<TT>start</TT>, <TT>start + length</TT>), 1303 * using bitwise comparison. 1304 * 1305 * @param c The code point to search for. 1306 * @param start the offset into this at which to start matching 1307 * @param length the number of characters in this to search 1308 * @return The offset into this of <TT>c</TT>, or -1 if not found. 1309 * @stable ICU 2.0 1310 */ 1311 inline int32_t lastIndexOf(UChar32 c, 1312 int32_t start, 1313 int32_t length) const; 1314 1315 1316 /* Character access */ 1317 1318 /** 1319 * Return the code unit at offset <tt>offset</tt>. 1320 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1321 * @param offset a valid offset into the text 1322 * @return the code unit at offset <tt>offset</tt> 1323 * or 0xffff if the offset is not valid for this string 1324 * @stable ICU 2.0 1325 */ 1326 inline char16_t charAt(int32_t offset) const; 1327 1328 /** 1329 * Return the code unit at offset <tt>offset</tt>. 1330 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1331 * @param offset a valid offset into the text 1332 * @return the code unit at offset <tt>offset</tt> 1333 * @stable ICU 2.0 1334 */ 1335 inline char16_t operator[] (int32_t offset) const; 1336 1337 /** 1338 * Return the code point that contains the code unit 1339 * at offset <tt>offset</tt>. 1340 * If the offset is not valid (0..length()-1) then U+ffff is returned. 1341 * @param offset a valid offset into the text 1342 * that indicates the text offset of any of the code units 1343 * that will be assembled into a code point (21-bit value) and returned 1344 * @return the code point of text at <tt>offset</tt> 1345 * or 0xffff if the offset is not valid for this string 1346 * @stable ICU 2.0 1347 */ 1348 UChar32 char32At(int32_t offset) const; 1349 1350 /** 1351 * Adjust a random-access offset so that 1352 * it points to the beginning of a Unicode character. 1353 * The offset that is passed in points to 1354 * any code unit of a code point, 1355 * while the returned offset will point to the first code unit 1356 * of the same code point. 1357 * In UTF-16, if the input offset points to a second surrogate 1358 * of a surrogate pair, then the returned offset will point 1359 * to the first surrogate. 1360 * @param offset a valid offset into one code point of the text 1361 * @return offset of the first code unit of the same code point 1362 * @see U16_SET_CP_START 1363 * @stable ICU 2.0 1364 */ 1365 int32_t getChar32Start(int32_t offset) const; 1366 1367 /** 1368 * Adjust a random-access offset so that 1369 * it points behind a Unicode character. 1370 * The offset that is passed in points behind 1371 * any code unit of a code point, 1372 * while the returned offset will point behind the last code unit 1373 * of the same code point. 1374 * In UTF-16, if the input offset points behind the first surrogate 1375 * (i.e., to the second surrogate) 1376 * of a surrogate pair, then the returned offset will point 1377 * behind the second surrogate (i.e., to the first surrogate). 1378 * @param offset a valid offset after any code unit of a code point of the text 1379 * @return offset of the first code unit after the same code point 1380 * @see U16_SET_CP_LIMIT 1381 * @stable ICU 2.0 1382 */ 1383 int32_t getChar32Limit(int32_t offset) const; 1384 1385 /** 1386 * Move the code unit index along the string by delta code points. 1387 * Interpret the input index as a code unit-based offset into the string, 1388 * move the index forward or backward by delta code points, and 1389 * return the resulting index. 1390 * The input index should point to the first code unit of a code point, 1391 * if there is more than one. 1392 * 1393 * Both input and output indexes are code unit-based as for all 1394 * string indexes/offsets in ICU (and other libraries, like MBCS char*). 1395 * If delta<0 then the index is moved backward (toward the start of the string). 1396 * If delta>0 then the index is moved forward (toward the end of the string). 1397 * 1398 * This behaves like CharacterIterator::move32(delta, kCurrent). 1399 * 1400 * Behavior for out-of-bounds indexes: 1401 * <code>moveIndex32</code> pins the input index to 0..length(), i.e., 1402 * if the input index<0 then it is pinned to 0; 1403 * if it is index>length() then it is pinned to length(). 1404 * Afterwards, the index is moved by <code>delta</code> code points 1405 * forward or backward, 1406 * but no further backward than to 0 and no further forward than to length(). 1407 * The resulting index return value will be in between 0 and length(), inclusively. 1408 * 1409 * Examples: 1410 * <pre> 1411 * // s has code points 'a' U+10000 'b' U+10ffff U+2029 1412 * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape(); 1413 * 1414 * // initial index: position of U+10000 1415 * int32_t index=1; 1416 * 1417 * // the following examples will all result in index==4, position of U+10ffff 1418 * 1419 * // skip 2 code points from some position in the string 1420 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b' 1421 * 1422 * // go to the 3rd code point from the start of s (0-based) 1423 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b' 1424 * 1425 * // go to the next-to-last code point of s 1426 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff 1427 * </pre> 1428 * 1429 * @param index input code unit index 1430 * @param delta (signed) code point count to move the index forward or backward 1431 * in the string 1432 * @return the resulting code unit index 1433 * @stable ICU 2.0 1434 */ 1435 int32_t moveIndex32(int32_t index, int32_t delta) const; 1436 1437 /* Substring extraction */ 1438 1439 /** 1440 * Copy the characters in the range 1441 * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>, 1442 * beginning at <tt>dstStart</tt>. 1443 * If the string aliases to <code>dst</code> itself as an external buffer, 1444 * then extract() will not copy the contents. 1445 * 1446 * @param start offset of first character which will be copied into the array 1447 * @param length the number of characters to extract 1448 * @param dst array in which to copy characters. The length of <tt>dst</tt> 1449 * must be at least (<tt>dstStart + length</tt>). 1450 * @param dstStart the offset in <TT>dst</TT> where the first character 1451 * will be extracted 1452 * @stable ICU 2.0 1453 */ 1454 inline void extract(int32_t start, 1455 int32_t length, 1456 Char16Ptr dst, 1457 int32_t dstStart = 0) const; 1458 1459 /** 1460 * Copy the contents of the string into dest. 1461 * This is a convenience function that 1462 * checks if there is enough space in dest, 1463 * extracts the entire string if possible, 1464 * and NUL-terminates dest if possible. 1465 * 1466 * If the string fits into dest but cannot be NUL-terminated 1467 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. 1468 * If the string itself does not fit into dest 1469 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. 1470 * 1471 * If the string aliases to <code>dest</code> itself as an external buffer, 1472 * then extract() will not copy the contents. 1473 * 1474 * @param dest Destination string buffer. 1475 * @param destCapacity Number of char16_ts available at dest. 1476 * @param errorCode ICU error code. 1477 * @return length() 1478 * @stable ICU 2.0 1479 */ 1480 int32_t 1481 extract(Char16Ptr dest, int32_t destCapacity, 1482 UErrorCode &errorCode) const; 1483 1484 /** 1485 * Copy the characters in the range 1486 * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString 1487 * <tt>target</tt>. 1488 * @param start offset of first character which will be copied 1489 * @param length the number of characters to extract 1490 * @param target UnicodeString into which to copy characters. 1491 * @return A reference to <TT>target</TT> 1492 * @stable ICU 2.0 1493 */ 1494 inline void extract(int32_t start, 1495 int32_t length, 1496 UnicodeString& target) const; 1497 1498 /** 1499 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 1500 * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>. 1501 * @param start offset of first character which will be copied into the array 1502 * @param limit offset immediately following the last character to be copied 1503 * @param dst array in which to copy characters. The length of <tt>dst</tt> 1504 * must be at least (<tt>dstStart + (limit - start)</tt>). 1505 * @param dstStart the offset in <TT>dst</TT> where the first character 1506 * will be extracted 1507 * @stable ICU 2.0 1508 */ 1509 inline void extractBetween(int32_t start, 1510 int32_t limit, 1511 char16_t *dst, 1512 int32_t dstStart = 0) const; 1513 1514 /** 1515 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) 1516 * into the UnicodeString <tt>target</tt>. Replaceable API. 1517 * @param start offset of first character which will be copied 1518 * @param limit offset immediately following the last character to be copied 1519 * @param target UnicodeString into which to copy characters. 1520 * @return A reference to <TT>target</TT> 1521 * @stable ICU 2.0 1522 */ 1523 virtual void extractBetween(int32_t start, 1524 int32_t limit, 1525 UnicodeString& target) const; 1526 1527 /** 1528 * Copy the characters in the range 1529 * [<tt>start</TT>, <tt>start + startLength</TT>) into an array of characters. 1530 * All characters must be invariant (see utypes.h). 1531 * Use US_INV as the last, signature-distinguishing parameter. 1532 * 1533 * This function does not write any more than <code>targetCapacity</code> 1534 * characters but returns the length of the entire output string 1535 * so that one can allocate a larger buffer and call the function again 1536 * if necessary. 1537 * The output string is NUL-terminated if possible. 1538 * 1539 * @param start offset of first character which will be copied 1540 * @param startLength the number of characters to extract 1541 * @param target the target buffer for extraction, can be NULL 1542 * if targetLength is 0 1543 * @param targetCapacity the length of the target buffer 1544 * @param inv Signature-distinguishing paramater, use US_INV. 1545 * @return the output string length, not including the terminating NUL 1546 * @stable ICU 3.2 1547 */ 1548 int32_t extract(int32_t start, 1549 int32_t startLength, 1550 char *target, 1551 int32_t targetCapacity, 1552 enum EInvariant inv) const; 1553 1554 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 1555 1556 /** 1557 * Copy the characters in the range 1558 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1559 * in the platform's default codepage. 1560 * This function does not write any more than <code>targetLength</code> 1561 * characters but returns the length of the entire output string 1562 * so that one can allocate a larger buffer and call the function again 1563 * if necessary. 1564 * The output string is NUL-terminated if possible. 1565 * 1566 * @param start offset of first character which will be copied 1567 * @param startLength the number of characters to extract 1568 * @param target the target buffer for extraction 1569 * @param targetLength the length of the target buffer 1570 * If <TT>target</TT> is NULL, then the number of bytes required for 1571 * <TT>target</TT> is returned. 1572 * @return the output string length, not including the terminating NUL 1573 * @stable ICU 2.0 1574 */ 1575 int32_t extract(int32_t start, 1576 int32_t startLength, 1577 char *target, 1578 uint32_t targetLength) const; 1579 1580 #endif 1581 1582 #if !UCONFIG_NO_CONVERSION 1583 1584 /** 1585 * Copy the characters in the range 1586 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1587 * in a specified codepage. 1588 * The output string is NUL-terminated. 1589 * 1590 * Recommendation: For invariant-character strings use 1591 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1592 * because it avoids object code dependencies of UnicodeString on 1593 * the conversion code. 1594 * 1595 * @param start offset of first character which will be copied 1596 * @param startLength the number of characters to extract 1597 * @param target the target buffer for extraction 1598 * @param codepage the desired codepage for the characters. 0 has 1599 * the special meaning of the default codepage 1600 * If <code>codepage</code> is an empty string (<code>""</code>), 1601 * then a simple conversion is performed on the codepage-invariant 1602 * subset ("invariant characters") of the platform encoding. See utypes.h. 1603 * If <TT>target</TT> is NULL, then the number of bytes required for 1604 * <TT>target</TT> is returned. It is assumed that the target is big enough 1605 * to fit all of the characters. 1606 * @return the output string length, not including the terminating NUL 1607 * @stable ICU 2.0 1608 */ 1609 inline int32_t extract(int32_t start, 1610 int32_t startLength, 1611 char *target, 1612 const char *codepage = 0) const; 1613 1614 /** 1615 * Copy the characters in the range 1616 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters 1617 * in a specified codepage. 1618 * This function does not write any more than <code>targetLength</code> 1619 * characters but returns the length of the entire output string 1620 * so that one can allocate a larger buffer and call the function again 1621 * if necessary. 1622 * The output string is NUL-terminated if possible. 1623 * 1624 * Recommendation: For invariant-character strings use 1625 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const 1626 * because it avoids object code dependencies of UnicodeString on 1627 * the conversion code. 1628 * 1629 * @param start offset of first character which will be copied 1630 * @param startLength the number of characters to extract 1631 * @param target the target buffer for extraction 1632 * @param targetLength the length of the target buffer 1633 * @param codepage the desired codepage for the characters. 0 has 1634 * the special meaning of the default codepage 1635 * If <code>codepage</code> is an empty string (<code>""</code>), 1636 * then a simple conversion is performed on the codepage-invariant 1637 * subset ("invariant characters") of the platform encoding. See utypes.h. 1638 * If <TT>target</TT> is NULL, then the number of bytes required for 1639 * <TT>target</TT> is returned. 1640 * @return the output string length, not including the terminating NUL 1641 * @stable ICU 2.0 1642 */ 1643 int32_t extract(int32_t start, 1644 int32_t startLength, 1645 char *target, 1646 uint32_t targetLength, 1647 const char *codepage) const; 1648 1649 /** 1650 * Convert the UnicodeString into a codepage string using an existing UConverter. 1651 * The output string is NUL-terminated if possible. 1652 * 1653 * This function avoids the overhead of opening and closing a converter if 1654 * multiple strings are extracted. 1655 * 1656 * @param dest destination string buffer, can be NULL if destCapacity==0 1657 * @param destCapacity the number of chars available at dest 1658 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), 1659 * or NULL for the default converter 1660 * @param errorCode normal ICU error code 1661 * @return the length of the output string, not counting the terminating NUL; 1662 * if the length is greater than destCapacity, then the string will not fit 1663 * and a buffer of the indicated length would need to be passed in 1664 * @stable ICU 2.0 1665 */ 1666 int32_t extract(char *dest, int32_t destCapacity, 1667 UConverter *cnv, 1668 UErrorCode &errorCode) const; 1669 1670 #endif 1671 1672 /** 1673 * Create a temporary substring for the specified range. 1674 * Unlike the substring constructor and setTo() functions, 1675 * the object returned here will be a read-only alias (using getBuffer()) 1676 * rather than copying the text. 1677 * As a result, this substring operation is much faster but requires 1678 * that the original string not be modified or deleted during the lifetime 1679 * of the returned substring object. 1680 * @param start offset of the first character visible in the substring 1681 * @param length length of the substring 1682 * @return a read-only alias UnicodeString object for the substring 1683 * @stable ICU 4.4 1684 */ 1685 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; 1686 1687 /** 1688 * Create a temporary substring for the specified range. 1689 * Same as tempSubString(start, length) except that the substring range 1690 * is specified as a (start, limit) pair (with an exclusive limit index) 1691 * rather than a (start, length) pair. 1692 * @param start offset of the first character visible in the substring 1693 * @param limit offset immediately following the last character visible in the substring 1694 * @return a read-only alias UnicodeString object for the substring 1695 * @stable ICU 4.4 1696 */ 1697 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; 1698 1699 /** 1700 * Convert the UnicodeString to UTF-8 and write the result 1701 * to a ByteSink. This is called by toUTF8String(). 1702 * Unpaired surrogates are replaced with U+FFFD. 1703 * Calls u_strToUTF8WithSub(). 1704 * 1705 * @param sink A ByteSink to which the UTF-8 version of the string is written. 1706 * sink.Flush() is called at the end. 1707 * @stable ICU 4.2 1708 * @see toUTF8String 1709 */ 1710 void toUTF8(ByteSink &sink) const; 1711 1712 /** 1713 * Convert the UnicodeString to UTF-8 and append the result 1714 * to a standard string. 1715 * Unpaired surrogates are replaced with U+FFFD. 1716 * Calls toUTF8(). 1717 * 1718 * @param result A standard string (or a compatible object) 1719 * to which the UTF-8 version of the string is appended. 1720 * @return The string object. 1721 * @stable ICU 4.2 1722 * @see toUTF8 1723 */ 1724 template<typename StringClass> 1725 StringClass &toUTF8String(StringClass &result) const { 1726 StringByteSink<StringClass> sbs(&result, length()); 1727 toUTF8(sbs); 1728 return result; 1729 } 1730 1731 /** 1732 * Convert the UnicodeString to UTF-32. 1733 * Unpaired surrogates are replaced with U+FFFD. 1734 * Calls u_strToUTF32WithSub(). 1735 * 1736 * @param utf32 destination string buffer, can be NULL if capacity==0 1737 * @param capacity the number of UChar32s available at utf32 1738 * @param errorCode Standard ICU error code. Its input value must 1739 * pass the U_SUCCESS() test, or else the function returns 1740 * immediately. Check for U_FAILURE() on output or use with 1741 * function chaining. (See User Guide for details.) 1742 * @return The length of the UTF-32 string. 1743 * @see fromUTF32 1744 * @stable ICU 4.2 1745 */ 1746 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; 1747 1748 /* Length operations */ 1749 1750 /** 1751 * Return the length of the UnicodeString object. 1752 * The length is the number of char16_t code units are in the UnicodeString. 1753 * If you want the number of code points, please use countChar32(). 1754 * @return the length of the UnicodeString object 1755 * @see countChar32 1756 * @stable ICU 2.0 1757 */ 1758 inline int32_t length(void) const; 1759 1760 /** 1761 * Count Unicode code points in the length char16_t code units of the string. 1762 * A code point may occupy either one or two char16_t code units. 1763 * Counting code points involves reading all code units. 1764 * 1765 * This functions is basically the inverse of moveIndex32(). 1766 * 1767 * @param start the index of the first code unit to check 1768 * @param length the number of char16_t code units to check 1769 * @return the number of code points in the specified code units 1770 * @see length 1771 * @stable ICU 2.0 1772 */ 1773 int32_t 1774 countChar32(int32_t start=0, int32_t length=INT32_MAX) const; 1775 1776 /** 1777 * Check if the length char16_t code units of the string 1778 * contain more Unicode code points than a certain number. 1779 * This is more efficient than counting all code points in this part of the string 1780 * and comparing that number with a threshold. 1781 * This function may not need to scan the string at all if the length 1782 * falls within a certain range, and 1783 * never needs to count more than 'number+1' code points. 1784 * Logically equivalent to (countChar32(start, length)>number). 1785 * A Unicode code point may occupy either one or two char16_t code units. 1786 * 1787 * @param start the index of the first code unit to check (0 for the entire string) 1788 * @param length the number of char16_t code units to check 1789 * (use INT32_MAX for the entire string; remember that start/length 1790 * values are pinned) 1791 * @param number The number of code points in the (sub)string is compared against 1792 * the 'number' parameter. 1793 * @return Boolean value for whether the string contains more Unicode code points 1794 * than 'number'. Same as (u_countChar32(s, length)>number). 1795 * @see countChar32 1796 * @see u_strHasMoreChar32Than 1797 * @stable ICU 2.4 1798 */ 1799 UBool 1800 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; 1801 1802 /** 1803 * Determine if this string is empty. 1804 * @return TRUE if this string contains 0 characters, FALSE otherwise. 1805 * @stable ICU 2.0 1806 */ 1807 inline UBool isEmpty(void) const; 1808 1809 /** 1810 * Return the capacity of the internal buffer of the UnicodeString object. 1811 * This is useful together with the getBuffer functions. 1812 * See there for details. 1813 * 1814 * @return the number of char16_ts available in the internal buffer 1815 * @see getBuffer 1816 * @stable ICU 2.0 1817 */ 1818 inline int32_t getCapacity(void) const; 1819 1820 /* Other operations */ 1821 1822 /** 1823 * Generate a hash code for this object. 1824 * @return The hash code of this UnicodeString. 1825 * @stable ICU 2.0 1826 */ 1827 inline int32_t hashCode(void) const; 1828 1829 /** 1830 * Determine if this object contains a valid string. 1831 * A bogus string has no value. It is different from an empty string, 1832 * although in both cases isEmpty() returns TRUE and length() returns 0. 1833 * setToBogus() and isBogus() can be used to indicate that no string value is available. 1834 * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and 1835 * length() returns 0. 1836 * 1837 * @return TRUE if the string is bogus/invalid, FALSE otherwise 1838 * @see setToBogus() 1839 * @stable ICU 2.0 1840 */ 1841 inline UBool isBogus(void) const; 1842 1843 1844 //======================================== 1845 // Write operations 1846 //======================================== 1847 1848 /* Assignment operations */ 1849 1850 /** 1851 * Assignment operator. Replace the characters in this UnicodeString 1852 * with the characters from <TT>srcText</TT>. 1853 * 1854 * Starting with ICU 2.4, the assignment operator and the copy constructor 1855 * allocate a new buffer and copy the buffer contents even for readonly aliases. 1856 * By contrast, the fastCopyFrom() function implements the old, 1857 * more efficient but less safe behavior 1858 * of making this string also a readonly alias to the same buffer. 1859 * 1860 * If the source object has an "open" buffer from getBuffer(minCapacity), 1861 * then the copy is an empty string. 1862 * 1863 * @param srcText The text containing the characters to replace 1864 * @return a reference to this 1865 * @stable ICU 2.0 1866 * @see fastCopyFrom 1867 */ 1868 UnicodeString &operator=(const UnicodeString &srcText); 1869 1870 /** 1871 * Almost the same as the assignment operator. 1872 * Replace the characters in this UnicodeString 1873 * with the characters from <code>srcText</code>. 1874 * 1875 * This function works the same as the assignment operator 1876 * for all strings except for ones that are readonly aliases. 1877 * 1878 * Starting with ICU 2.4, the assignment operator and the copy constructor 1879 * allocate a new buffer and copy the buffer contents even for readonly aliases. 1880 * This function implements the old, more efficient but less safe behavior 1881 * of making this string also a readonly alias to the same buffer. 1882 * 1883 * The fastCopyFrom function must be used only if it is known that the lifetime of 1884 * this UnicodeString does not exceed the lifetime of the aliased buffer 1885 * including its contents, for example for strings from resource bundles 1886 * or aliases to string constants. 1887 * 1888 * If the source object has an "open" buffer from getBuffer(minCapacity), 1889 * then the copy is an empty string. 1890 * 1891 * @param src The text containing the characters to replace. 1892 * @return a reference to this 1893 * @stable ICU 2.4 1894 */ 1895 UnicodeString &fastCopyFrom(const UnicodeString &src); 1896 1897 /** 1898 * Move assignment operator; might leave src in bogus state. 1899 * This string will have the same contents and state that the source string had. 1900 * The behavior is undefined if *this and src are the same object. 1901 * @param src source string 1902 * @return *this 1903 * @stable ICU 56 1904 */ 1905 UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT { 1906 return moveFrom(src); 1907 } 1908 1909 // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API 1910 /** 1911 * Move assignment; might leave src in bogus state. 1912 * This string will have the same contents and state that the source string had. 1913 * The behavior is undefined if *this and src are the same object. 1914 * 1915 * Can be called explicitly, does not need C++11 support. 1916 * @param src source string 1917 * @return *this 1918 * @draft ICU 56 1919 */ 1920 UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT; 1921 1922 /** 1923 * Swap strings. 1924 * @param other other string 1925 * @stable ICU 56 1926 */ 1927 void swap(UnicodeString &other) U_NOEXCEPT; 1928 1929 /** 1930 * Non-member UnicodeString swap function. 1931 * @param s1 will get s2's contents and state 1932 * @param s2 will get s1's contents and state 1933 * @stable ICU 56 1934 */ 1935 friend U_COMMON_API inline void U_EXPORT2 1936 swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT { 1937 s1.swap(s2); 1938 } 1939 1940 /** 1941 * Assignment operator. Replace the characters in this UnicodeString 1942 * with the code unit <TT>ch</TT>. 1943 * @param ch the code unit to replace 1944 * @return a reference to this 1945 * @stable ICU 2.0 1946 */ 1947 inline UnicodeString& operator= (char16_t ch); 1948 1949 /** 1950 * Assignment operator. Replace the characters in this UnicodeString 1951 * with the code point <TT>ch</TT>. 1952 * @param ch the code point to replace 1953 * @return a reference to this 1954 * @stable ICU 2.0 1955 */ 1956 inline UnicodeString& operator= (UChar32 ch); 1957 1958 /** 1959 * Set the text in the UnicodeString object to the characters 1960 * in <TT>srcText</TT> in the range 1961 * [<TT>srcStart</TT>, <TT>srcText.length()</TT>). 1962 * <TT>srcText</TT> is not modified. 1963 * @param srcText the source for the new characters 1964 * @param srcStart the offset into <TT>srcText</TT> where new characters 1965 * will be obtained 1966 * @return a reference to this 1967 * @stable ICU 2.2 1968 */ 1969 inline UnicodeString& setTo(const UnicodeString& srcText, 1970 int32_t srcStart); 1971 1972 /** 1973 * Set the text in the UnicodeString object to the characters 1974 * in <TT>srcText</TT> in the range 1975 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 1976 * <TT>srcText</TT> is not modified. 1977 * @param srcText the source for the new characters 1978 * @param srcStart the offset into <TT>srcText</TT> where new characters 1979 * will be obtained 1980 * @param srcLength the number of characters in <TT>srcText</TT> in the 1981 * replace string. 1982 * @return a reference to this 1983 * @stable ICU 2.0 1984 */ 1985 inline UnicodeString& setTo(const UnicodeString& srcText, 1986 int32_t srcStart, 1987 int32_t srcLength); 1988 1989 /** 1990 * Set the text in the UnicodeString object to the characters in 1991 * <TT>srcText</TT>. 1992 * <TT>srcText</TT> is not modified. 1993 * @param srcText the source for the new characters 1994 * @return a reference to this 1995 * @stable ICU 2.0 1996 */ 1997 inline UnicodeString& setTo(const UnicodeString& srcText); 1998 1999 /** 2000 * Set the characters in the UnicodeString object to the characters 2001 * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. 2002 * @param srcChars the source for the new characters 2003 * @param srcLength the number of Unicode characters in srcChars. 2004 * @return a reference to this 2005 * @stable ICU 2.0 2006 */ 2007 inline UnicodeString& setTo(const char16_t *srcChars, 2008 int32_t srcLength); 2009 2010 /** 2011 * Set the characters in the UnicodeString object to the code unit 2012 * <TT>srcChar</TT>. 2013 * @param srcChar the code unit which becomes the UnicodeString's character 2014 * content 2015 * @return a reference to this 2016 * @stable ICU 2.0 2017 */ 2018 UnicodeString& setTo(char16_t srcChar); 2019 2020 /** 2021 * Set the characters in the UnicodeString object to the code point 2022 * <TT>srcChar</TT>. 2023 * @param srcChar the code point which becomes the UnicodeString's character 2024 * content 2025 * @return a reference to this 2026 * @stable ICU 2.0 2027 */ 2028 UnicodeString& setTo(UChar32 srcChar); 2029 2030 /** 2031 * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor. 2032 * The text will be used for the UnicodeString object, but 2033 * it will not be released when the UnicodeString is destroyed. 2034 * This has copy-on-write semantics: 2035 * When the string is modified, then the buffer is first copied into 2036 * newly allocated memory. 2037 * The aliased buffer is never modified. 2038 * 2039 * In an assignment to another UnicodeString, when using the copy constructor 2040 * or the assignment operator, the text will be copied. 2041 * When using fastCopyFrom(), the text will be aliased again, 2042 * so that both strings then alias the same readonly-text. 2043 * 2044 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. 2045 * This must be true if <code>textLength==-1</code>. 2046 * @param text The characters to alias for the UnicodeString. 2047 * @param textLength The number of Unicode characters in <code>text</code> to alias. 2048 * If -1, then this constructor will determine the length 2049 * by calling <code>u_strlen()</code>. 2050 * @return a reference to this 2051 * @stable ICU 2.0 2052 */ 2053 UnicodeString &setTo(UBool isTerminated, 2054 ConstChar16Ptr text, 2055 int32_t textLength); 2056 2057 /** 2058 * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor. 2059 * The text will be used for the UnicodeString object, but 2060 * it will not be released when the UnicodeString is destroyed. 2061 * This has write-through semantics: 2062 * For as long as the capacity of the buffer is sufficient, write operations 2063 * will directly affect the buffer. When more capacity is necessary, then 2064 * a new buffer will be allocated and the contents copied as with regularly 2065 * constructed strings. 2066 * In an assignment to another UnicodeString, the buffer will be copied. 2067 * The extract(Char16Ptr dst) function detects whether the dst pointer is the same 2068 * as the string buffer itself and will in this case not copy the contents. 2069 * 2070 * @param buffer The characters to alias for the UnicodeString. 2071 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. 2072 * @param buffCapacity The size of <code>buffer</code> in char16_ts. 2073 * @return a reference to this 2074 * @stable ICU 2.0 2075 */ 2076 UnicodeString &setTo(char16_t *buffer, 2077 int32_t buffLength, 2078 int32_t buffCapacity); 2079 2080 /** 2081 * Make this UnicodeString object invalid. 2082 * The string will test TRUE with isBogus(). 2083 * 2084 * A bogus string has no value. It is different from an empty string. 2085 * It can be used to indicate that no string value is available. 2086 * getBuffer() and getTerminatedBuffer() return NULL, and 2087 * length() returns 0. 2088 * 2089 * This utility function is used throughout the UnicodeString 2090 * implementation to indicate that a UnicodeString operation failed, 2091 * and may be used in other functions, 2092 * especially but not exclusively when such functions do not 2093 * take a UErrorCode for simplicity. 2094 * 2095 * The following methods, and no others, will clear a string object's bogus flag: 2096 * - remove() 2097 * - remove(0, INT32_MAX) 2098 * - truncate(0) 2099 * - operator=() (assignment operator) 2100 * - setTo(...) 2101 * 2102 * The simplest ways to turn a bogus string into an empty one 2103 * is to use the remove() function. 2104 * Examples for other functions that are equivalent to "set to empty string": 2105 * \code 2106 * if(s.isBogus()) { 2107 * s.remove(); // set to an empty string (remove all), or 2108 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or 2109 * s.truncate(0); // set to an empty string (complete truncation), or 2110 * s=UnicodeString(); // assign an empty string, or 2111 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or 2112 * static const char16_t nul=0; 2113 * s.setTo(&nul, 0); // set to an empty C Unicode string 2114 * } 2115 * \endcode 2116 * 2117 * @see isBogus() 2118 * @stable ICU 2.0 2119 */ 2120 void setToBogus(); 2121 2122 /** 2123 * Set the character at the specified offset to the specified character. 2124 * @param offset A valid offset into the text of the character to set 2125 * @param ch The new character 2126 * @return A reference to this 2127 * @stable ICU 2.0 2128 */ 2129 UnicodeString& setCharAt(int32_t offset, 2130 char16_t ch); 2131 2132 2133 /* Append operations */ 2134 2135 /** 2136 * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString 2137 * object. 2138 * @param ch the code unit to be appended 2139 * @return a reference to this 2140 * @stable ICU 2.0 2141 */ 2142 inline UnicodeString& operator+= (char16_t ch); 2143 2144 /** 2145 * Append operator. Append the code point <TT>ch</TT> to the UnicodeString 2146 * object. 2147 * @param ch the code point to be appended 2148 * @return a reference to this 2149 * @stable ICU 2.0 2150 */ 2151 inline UnicodeString& operator+= (UChar32 ch); 2152 2153 /** 2154 * Append operator. Append the characters in <TT>srcText</TT> to the 2155 * UnicodeString object. <TT>srcText</TT> is not modified. 2156 * @param srcText the source for the new characters 2157 * @return a reference to this 2158 * @stable ICU 2.0 2159 */ 2160 inline UnicodeString& operator+= (const UnicodeString& srcText); 2161 2162 /** 2163 * Append the characters 2164 * in <TT>srcText</TT> in the range 2165 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the 2166 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> 2167 * is not modified. 2168 * @param srcText the source for the new characters 2169 * @param srcStart the offset into <TT>srcText</TT> where new characters 2170 * will be obtained 2171 * @param srcLength the number of characters in <TT>srcText</TT> in 2172 * the append string 2173 * @return a reference to this 2174 * @stable ICU 2.0 2175 */ 2176 inline UnicodeString& append(const UnicodeString& srcText, 2177 int32_t srcStart, 2178 int32_t srcLength); 2179 2180 /** 2181 * Append the characters in <TT>srcText</TT> to the UnicodeString object. 2182 * <TT>srcText</TT> is not modified. 2183 * @param srcText the source for the new characters 2184 * @return a reference to this 2185 * @stable ICU 2.0 2186 */ 2187 inline UnicodeString& append(const UnicodeString& srcText); 2188 2189 /** 2190 * Append the characters in <TT>srcChars</TT> in the range 2191 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString 2192 * object at offset 2193 * <TT>start</TT>. <TT>srcChars</TT> is not modified. 2194 * @param srcChars the source for the new characters 2195 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2196 * will be obtained 2197 * @param srcLength the number of characters in <TT>srcChars</TT> in 2198 * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated 2199 * @return a reference to this 2200 * @stable ICU 2.0 2201 */ 2202 inline UnicodeString& append(const char16_t *srcChars, 2203 int32_t srcStart, 2204 int32_t srcLength); 2205 2206 /** 2207 * Append the characters in <TT>srcChars</TT> to the UnicodeString object 2208 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2209 * @param srcChars the source for the new characters 2210 * @param srcLength the number of Unicode characters in <TT>srcChars</TT>; 2211 * can be -1 if <TT>srcChars</TT> is NUL-terminated 2212 * @return a reference to this 2213 * @stable ICU 2.0 2214 */ 2215 inline UnicodeString& append(ConstChar16Ptr srcChars, 2216 int32_t srcLength); 2217 2218 /** 2219 * Append the code unit <TT>srcChar</TT> to the UnicodeString object. 2220 * @param srcChar the code unit to append 2221 * @return a reference to this 2222 * @stable ICU 2.0 2223 */ 2224 inline UnicodeString& append(char16_t srcChar); 2225 2226 /** 2227 * Append the code point <TT>srcChar</TT> to the UnicodeString object. 2228 * @param srcChar the code point to append 2229 * @return a reference to this 2230 * @stable ICU 2.0 2231 */ 2232 UnicodeString& append(UChar32 srcChar); 2233 2234 2235 /* Insert operations */ 2236 2237 /** 2238 * Insert the characters in <TT>srcText</TT> in the range 2239 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 2240 * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified. 2241 * @param start the offset where the insertion begins 2242 * @param srcText the source for the new characters 2243 * @param srcStart the offset into <TT>srcText</TT> where new characters 2244 * will be obtained 2245 * @param srcLength the number of characters in <TT>srcText</TT> in 2246 * the insert string 2247 * @return a reference to this 2248 * @stable ICU 2.0 2249 */ 2250 inline UnicodeString& insert(int32_t start, 2251 const UnicodeString& srcText, 2252 int32_t srcStart, 2253 int32_t srcLength); 2254 2255 /** 2256 * Insert the characters in <TT>srcText</TT> into the UnicodeString object 2257 * at offset <TT>start</TT>. <TT>srcText</TT> is not modified. 2258 * @param start the offset where the insertion begins 2259 * @param srcText the source for the new characters 2260 * @return a reference to this 2261 * @stable ICU 2.0 2262 */ 2263 inline UnicodeString& insert(int32_t start, 2264 const UnicodeString& srcText); 2265 2266 /** 2267 * Insert the characters in <TT>srcChars</TT> in the range 2268 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString 2269 * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2270 * @param start the offset at which the insertion begins 2271 * @param srcChars the source for the new characters 2272 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2273 * will be obtained 2274 * @param srcLength the number of characters in <TT>srcChars</TT> 2275 * in the insert string 2276 * @return a reference to this 2277 * @stable ICU 2.0 2278 */ 2279 inline UnicodeString& insert(int32_t start, 2280 const char16_t *srcChars, 2281 int32_t srcStart, 2282 int32_t srcLength); 2283 2284 /** 2285 * Insert the characters in <TT>srcChars</TT> into the UnicodeString object 2286 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. 2287 * @param start the offset where the insertion begins 2288 * @param srcChars the source for the new characters 2289 * @param srcLength the number of Unicode characters in srcChars. 2290 * @return a reference to this 2291 * @stable ICU 2.0 2292 */ 2293 inline UnicodeString& insert(int32_t start, 2294 ConstChar16Ptr srcChars, 2295 int32_t srcLength); 2296 2297 /** 2298 * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at 2299 * offset <TT>start</TT>. 2300 * @param start the offset at which the insertion occurs 2301 * @param srcChar the code unit to insert 2302 * @return a reference to this 2303 * @stable ICU 2.0 2304 */ 2305 inline UnicodeString& insert(int32_t start, 2306 char16_t srcChar); 2307 2308 /** 2309 * Insert the code point <TT>srcChar</TT> into the UnicodeString object at 2310 * offset <TT>start</TT>. 2311 * @param start the offset at which the insertion occurs 2312 * @param srcChar the code point to insert 2313 * @return a reference to this 2314 * @stable ICU 2.0 2315 */ 2316 inline UnicodeString& insert(int32_t start, 2317 UChar32 srcChar); 2318 2319 2320 /* Replace operations */ 2321 2322 /** 2323 * Replace the characters in the range 2324 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2325 * <TT>srcText</TT> in the range 2326 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). 2327 * <TT>srcText</TT> is not modified. 2328 * @param start the offset at which the replace operation begins 2329 * @param length the number of characters to replace. The character at 2330 * <TT>start + length</TT> is not modified. 2331 * @param srcText the source for the new characters 2332 * @param srcStart the offset into <TT>srcText</TT> where new characters 2333 * will be obtained 2334 * @param srcLength the number of characters in <TT>srcText</TT> in 2335 * the replace string 2336 * @return a reference to this 2337 * @stable ICU 2.0 2338 */ 2339 UnicodeString& replace(int32_t start, 2340 int32_t length, 2341 const UnicodeString& srcText, 2342 int32_t srcStart, 2343 int32_t srcLength); 2344 2345 /** 2346 * Replace the characters in the range 2347 * [<TT>start</TT>, <TT>start + length</TT>) 2348 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is 2349 * not modified. 2350 * @param start the offset at which the replace operation begins 2351 * @param length the number of characters to replace. The character at 2352 * <TT>start + length</TT> is not modified. 2353 * @param srcText the source for the new characters 2354 * @return a reference to this 2355 * @stable ICU 2.0 2356 */ 2357 UnicodeString& replace(int32_t start, 2358 int32_t length, 2359 const UnicodeString& srcText); 2360 2361 /** 2362 * Replace the characters in the range 2363 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2364 * <TT>srcChars</TT> in the range 2365 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT> 2366 * is not modified. 2367 * @param start the offset at which the replace operation begins 2368 * @param length the number of characters to replace. The character at 2369 * <TT>start + length</TT> is not modified. 2370 * @param srcChars the source for the new characters 2371 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2372 * will be obtained 2373 * @param srcLength the number of characters in <TT>srcChars</TT> 2374 * in the replace string 2375 * @return a reference to this 2376 * @stable ICU 2.0 2377 */ 2378 UnicodeString& replace(int32_t start, 2379 int32_t length, 2380 const char16_t *srcChars, 2381 int32_t srcStart, 2382 int32_t srcLength); 2383 2384 /** 2385 * Replace the characters in the range 2386 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in 2387 * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. 2388 * @param start the offset at which the replace operation begins 2389 * @param length number of characters to replace. The character at 2390 * <TT>start + length</TT> is not modified. 2391 * @param srcChars the source for the new characters 2392 * @param srcLength the number of Unicode characters in srcChars 2393 * @return a reference to this 2394 * @stable ICU 2.0 2395 */ 2396 inline UnicodeString& replace(int32_t start, 2397 int32_t length, 2398 ConstChar16Ptr srcChars, 2399 int32_t srcLength); 2400 2401 /** 2402 * Replace the characters in the range 2403 * [<TT>start</TT>, <TT>start + length</TT>) with the code unit 2404 * <TT>srcChar</TT>. 2405 * @param start the offset at which the replace operation begins 2406 * @param length the number of characters to replace. The character at 2407 * <TT>start + length</TT> is not modified. 2408 * @param srcChar the new code unit 2409 * @return a reference to this 2410 * @stable ICU 2.0 2411 */ 2412 inline UnicodeString& replace(int32_t start, 2413 int32_t length, 2414 char16_t srcChar); 2415 2416 /** 2417 * Replace the characters in the range 2418 * [<TT>start</TT>, <TT>start + length</TT>) with the code point 2419 * <TT>srcChar</TT>. 2420 * @param start the offset at which the replace operation begins 2421 * @param length the number of characters to replace. The character at 2422 * <TT>start + length</TT> is not modified. 2423 * @param srcChar the new code point 2424 * @return a reference to this 2425 * @stable ICU 2.0 2426 */ 2427 UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); 2428 2429 /** 2430 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 2431 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified. 2432 * @param start the offset at which the replace operation begins 2433 * @param limit the offset immediately following the replace range 2434 * @param srcText the source for the new characters 2435 * @return a reference to this 2436 * @stable ICU 2.0 2437 */ 2438 inline UnicodeString& replaceBetween(int32_t start, 2439 int32_t limit, 2440 const UnicodeString& srcText); 2441 2442 /** 2443 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) 2444 * with the characters in <TT>srcText</TT> in the range 2445 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified. 2446 * @param start the offset at which the replace operation begins 2447 * @param limit the offset immediately following the replace range 2448 * @param srcText the source for the new characters 2449 * @param srcStart the offset into <TT>srcChars</TT> where new characters 2450 * will be obtained 2451 * @param srcLimit the offset immediately following the range to copy 2452 * in <TT>srcText</TT> 2453 * @return a reference to this 2454 * @stable ICU 2.0 2455 */ 2456 inline UnicodeString& replaceBetween(int32_t start, 2457 int32_t limit, 2458 const UnicodeString& srcText, 2459 int32_t srcStart, 2460 int32_t srcLimit); 2461 2462 /** 2463 * Replace a substring of this object with the given text. 2464 * @param start the beginning index, inclusive; <code>0 <= start 2465 * <= limit</code>. 2466 * @param limit the ending index, exclusive; <code>start <= limit 2467 * <= length()</code>. 2468 * @param text the text to replace characters <code>start</code> 2469 * to <code>limit - 1</code> 2470 * @stable ICU 2.0 2471 */ 2472 virtual void handleReplaceBetween(int32_t start, 2473 int32_t limit, 2474 const UnicodeString& text); 2475 2476 /** 2477 * Replaceable API 2478 * @return TRUE if it has MetaData 2479 * @stable ICU 2.4 2480 */ 2481 virtual UBool hasMetaData() const; 2482 2483 /** 2484 * Copy a substring of this object, retaining attribute (out-of-band) 2485 * information. This method is used to duplicate or reorder substrings. 2486 * The destination index must not overlap the source range. 2487 * 2488 * @param start the beginning index, inclusive; <code>0 <= start <= 2489 * limit</code>. 2490 * @param limit the ending index, exclusive; <code>start <= limit <= 2491 * length()</code>. 2492 * @param dest the destination index. The characters from 2493 * <code>start..limit-1</code> will be copied to <code>dest</code>. 2494 * Implementations of this method may assume that <code>dest <= start || 2495 * dest >= limit</code>. 2496 * @stable ICU 2.0 2497 */ 2498 virtual void copy(int32_t start, int32_t limit, int32_t dest); 2499 2500 /* Search and replace operations */ 2501 2502 /** 2503 * Replace all occurrences of characters in oldText with the characters 2504 * in newText 2505 * @param oldText the text containing the search text 2506 * @param newText the text containing the replacement text 2507 * @return a reference to this 2508 * @stable ICU 2.0 2509 */ 2510 inline UnicodeString& findAndReplace(const UnicodeString& oldText, 2511 const UnicodeString& newText); 2512 2513 /** 2514 * Replace all occurrences of characters in oldText with characters 2515 * in newText 2516 * in the range [<TT>start</TT>, <TT>start + length</TT>). 2517 * @param start the start of the range in which replace will performed 2518 * @param length the length of the range in which replace will be performed 2519 * @param oldText the text containing the search text 2520 * @param newText the text containing the replacement text 2521 * @return a reference to this 2522 * @stable ICU 2.0 2523 */ 2524 inline UnicodeString& findAndReplace(int32_t start, 2525 int32_t length, 2526 const UnicodeString& oldText, 2527 const UnicodeString& newText); 2528 2529 /** 2530 * Replace all occurrences of characters in oldText in the range 2531 * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters 2532 * in newText in the range 2533 * [<TT>newStart</TT>, <TT>newStart + newLength</TT>) 2534 * in the range [<TT>start</TT>, <TT>start + length</TT>). 2535 * @param start the start of the range in which replace will performed 2536 * @param length the length of the range in which replace will be performed 2537 * @param oldText the text containing the search text 2538 * @param oldStart the start of the search range in <TT>oldText</TT> 2539 * @param oldLength the length of the search range in <TT>oldText</TT> 2540 * @param newText the text containing the replacement text 2541 * @param newStart the start of the replacement range in <TT>newText</TT> 2542 * @param newLength the length of the replacement range in <TT>newText</TT> 2543 * @return a reference to this 2544 * @stable ICU 2.0 2545 */ 2546 UnicodeString& findAndReplace(int32_t start, 2547 int32_t length, 2548 const UnicodeString& oldText, 2549 int32_t oldStart, 2550 int32_t oldLength, 2551 const UnicodeString& newText, 2552 int32_t newStart, 2553 int32_t newLength); 2554 2555 2556 /* Remove operations */ 2557 2558 /** 2559 * Remove all characters from the UnicodeString object. 2560 * @return a reference to this 2561 * @stable ICU 2.0 2562 */ 2563 inline UnicodeString& remove(void); 2564 2565 /** 2566 * Remove the characters in the range 2567 * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object. 2568 * @param start the offset of the first character to remove 2569 * @param length the number of characters to remove 2570 * @return a reference to this 2571 * @stable ICU 2.0 2572 */ 2573 inline UnicodeString& remove(int32_t start, 2574 int32_t length = (int32_t)INT32_MAX); 2575 2576 /** 2577 * Remove the characters in the range 2578 * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object. 2579 * @param start the offset of the first character to remove 2580 * @param limit the offset immediately following the range to remove 2581 * @return a reference to this 2582 * @stable ICU 2.0 2583 */ 2584 inline UnicodeString& removeBetween(int32_t start, 2585 int32_t limit = (int32_t)INT32_MAX); 2586 2587 /** 2588 * Retain only the characters in the range 2589 * [<code>start</code>, <code>limit</code>) from the UnicodeString object. 2590 * Removes characters before <code>start</code> and at and after <code>limit</code>. 2591 * @param start the offset of the first character to retain 2592 * @param limit the offset immediately following the range to retain 2593 * @return a reference to this 2594 * @stable ICU 4.4 2595 */ 2596 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); 2597 2598 /* Length operations */ 2599 2600 /** 2601 * Pad the start of this UnicodeString with the character <TT>padChar</TT>. 2602 * If the length of this UnicodeString is less than targetLength, 2603 * length() - targetLength copies of padChar will be added to the 2604 * beginning of this UnicodeString. 2605 * @param targetLength the desired length of the string 2606 * @param padChar the character to use for padding. Defaults to 2607 * space (U+0020) 2608 * @return TRUE if the text was padded, FALSE otherwise. 2609 * @stable ICU 2.0 2610 */ 2611 UBool padLeading(int32_t targetLength, 2612 char16_t padChar = 0x0020); 2613 2614 /** 2615 * Pad the end of this UnicodeString with the character <TT>padChar</TT>. 2616 * If the length of this UnicodeString is less than targetLength, 2617 * length() - targetLength copies of padChar will be added to the 2618 * end of this UnicodeString. 2619 * @param targetLength the desired length of the string 2620 * @param padChar the character to use for padding. Defaults to 2621 * space (U+0020) 2622 * @return TRUE if the text was padded, FALSE otherwise. 2623 * @stable ICU 2.0 2624 */ 2625 UBool padTrailing(int32_t targetLength, 2626 char16_t padChar = 0x0020); 2627 2628 /** 2629 * Truncate this UnicodeString to the <TT>targetLength</TT>. 2630 * @param targetLength the desired length of this UnicodeString. 2631 * @return TRUE if the text was truncated, FALSE otherwise 2632 * @stable ICU 2.0 2633 */ 2634 inline UBool truncate(int32_t targetLength); 2635 2636 /** 2637 * Trims leading and trailing whitespace from this UnicodeString. 2638 * @return a reference to this 2639 * @stable ICU 2.0 2640 */ 2641 UnicodeString& trim(void); 2642 2643 2644 /* Miscellaneous operations */ 2645 2646 /** 2647 * Reverse this UnicodeString in place. 2648 * @return a reference to this 2649 * @stable ICU 2.0 2650 */ 2651 inline UnicodeString& reverse(void); 2652 2653 /** 2654 * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in 2655 * this UnicodeString. 2656 * @param start the start of the range to reverse 2657 * @param length the number of characters to to reverse 2658 * @return a reference to this 2659 * @stable ICU 2.0 2660 */ 2661 inline UnicodeString& reverse(int32_t start, 2662 int32_t length); 2663 2664 /** 2665 * Convert the characters in this to UPPER CASE following the conventions of 2666 * the default locale. 2667 * @return A reference to this. 2668 * @stable ICU 2.0 2669 */ 2670 UnicodeString& toUpper(void); 2671 2672 /** 2673 * Convert the characters in this to UPPER CASE following the conventions of 2674 * a specific locale. 2675 * @param locale The locale containing the conventions to use. 2676 * @return A reference to this. 2677 * @stable ICU 2.0 2678 */ 2679 UnicodeString& toUpper(const Locale& locale); 2680 2681 /** 2682 * Convert the characters in this to lower case following the conventions of 2683 * the default locale. 2684 * @return A reference to this. 2685 * @stable ICU 2.0 2686 */ 2687 UnicodeString& toLower(void); 2688 2689 /** 2690 * Convert the characters in this to lower case following the conventions of 2691 * a specific locale. 2692 * @param locale The locale containing the conventions to use. 2693 * @return A reference to this. 2694 * @stable ICU 2.0 2695 */ 2696 UnicodeString& toLower(const Locale& locale); 2697 2698 #if !UCONFIG_NO_BREAK_ITERATION 2699 2700 /** 2701 * Titlecase this string, convenience function using the default locale. 2702 * 2703 * Casing is locale-dependent and context-sensitive. 2704 * Titlecasing uses a break iterator to find the first characters of words 2705 * that are to be titlecased. It titlecases those characters and lowercases 2706 * all others. 2707 * 2708 * The titlecase break iterator can be provided to customize for arbitrary 2709 * styles, using rules and dictionaries beyond the standard iterators. 2710 * It may be more efficient to always provide an iterator to avoid 2711 * opening and closing one for each string. 2712 * The standard titlecase iterator for the root locale implements the 2713 * algorithm of Unicode TR 21. 2714 * 2715 * This function uses only the setText(), first() and next() methods of the 2716 * provided break iterator. 2717 * 2718 * @param titleIter A break iterator to find the first characters of words 2719 * that are to be titlecased. 2720 * If none is provided (0), then a standard titlecase 2721 * break iterator is opened. 2722 * Otherwise the provided iterator is set to the string's text. 2723 * @return A reference to this. 2724 * @stable ICU 2.1 2725 */ 2726 UnicodeString &toTitle(BreakIterator *titleIter); 2727 2728 /** 2729 * Titlecase this string. 2730 * 2731 * Casing is locale-dependent and context-sensitive. 2732 * Titlecasing uses a break iterator to find the first characters of words 2733 * that are to be titlecased. It titlecases those characters and lowercases 2734 * all others. 2735 * 2736 * The titlecase break iterator can be provided to customize for arbitrary 2737 * styles, using rules and dictionaries beyond the standard iterators. 2738 * It may be more efficient to always provide an iterator to avoid 2739 * opening and closing one for each string. 2740 * The standard titlecase iterator for the root locale implements the 2741 * algorithm of Unicode TR 21. 2742 * 2743 * This function uses only the setText(), first() and next() methods of the 2744 * provided break iterator. 2745 * 2746 * @param titleIter A break iterator to find the first characters of words 2747 * that are to be titlecased. 2748 * If none is provided (0), then a standard titlecase 2749 * break iterator is opened. 2750 * Otherwise the provided iterator is set to the string's text. 2751 * @param locale The locale to consider. 2752 * @return A reference to this. 2753 * @stable ICU 2.1 2754 */ 2755 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); 2756 2757 /** 2758 * Titlecase this string, with options. 2759 * 2760 * Casing is locale-dependent and context-sensitive. 2761 * Titlecasing uses a break iterator to find the first characters of words 2762 * that are to be titlecased. It titlecases those characters and lowercases 2763 * all others. (This can be modified with options.) 2764 * 2765 * The titlecase break iterator can be provided to customize for arbitrary 2766 * styles, using rules and dictionaries beyond the standard iterators. 2767 * It may be more efficient to always provide an iterator to avoid 2768 * opening and closing one for each string. 2769 * The standard titlecase iterator for the root locale implements the 2770 * algorithm of Unicode TR 21. 2771 * 2772 * This function uses only the setText(), first() and next() methods of the 2773 * provided break iterator. 2774 * 2775 * @param titleIter A break iterator to find the first characters of words 2776 * that are to be titlecased. 2777 * If none is provided (0), then a standard titlecase 2778 * break iterator is opened. 2779 * Otherwise the provided iterator is set to the string's text. 2780 * @param locale The locale to consider. 2781 * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE, 2782 * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, 2783 * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. 2784 * @param options Options bit set, see ucasemap_open(). 2785 * @return A reference to this. 2786 * @stable ICU 3.8 2787 */ 2788 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); 2789 2790 #endif 2791 2792 /** 2793 * Case-folds the characters in this string. 2794 * 2795 * Case-folding is locale-independent and not context-sensitive, 2796 * but there is an option for whether to include or exclude mappings for dotted I 2797 * and dotless i that are marked with 'T' in CaseFolding.txt. 2798 * 2799 * The result may be longer or shorter than the original. 2800 * 2801 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I 2802 * @return A reference to this. 2803 * @stable ICU 2.0 2804 */ 2805 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); 2806 2807 //======================================== 2808 // Access to the internal buffer 2809 //======================================== 2810 2811 /** 2812 * Get a read/write pointer to the internal buffer. 2813 * The buffer is guaranteed to be large enough for at least minCapacity char16_ts, 2814 * writable, and is still owned by the UnicodeString object. 2815 * Calls to getBuffer(minCapacity) must not be nested, and 2816 * must be matched with calls to releaseBuffer(newLength). 2817 * If the string buffer was read-only or shared, 2818 * then it will be reallocated and copied. 2819 * 2820 * An attempted nested call will return 0, and will not further modify the 2821 * state of the UnicodeString object. 2822 * It also returns 0 if the string is bogus. 2823 * 2824 * The actual capacity of the string buffer may be larger than minCapacity. 2825 * getCapacity() returns the actual capacity. 2826 * For many operations, the full capacity should be used to avoid reallocations. 2827 * 2828 * While the buffer is "open" between getBuffer(minCapacity) 2829 * and releaseBuffer(newLength), the following applies: 2830 * - The string length is set to 0. 2831 * - Any read API call on the UnicodeString object will behave like on a 0-length string. 2832 * - Any write API call on the UnicodeString object is disallowed and will have no effect. 2833 * - You can read from and write to the returned buffer. 2834 * - The previous string contents will still be in the buffer; 2835 * if you want to use it, then you need to call length() before getBuffer(minCapacity). 2836 * If the length() was greater than minCapacity, then any contents after minCapacity 2837 * may be lost. 2838 * The buffer contents is not NUL-terminated by getBuffer(). 2839 * If length()<getCapacity() then you can terminate it by writing a NUL 2840 * at index length(). 2841 * - You must call releaseBuffer(newLength) before and in order to 2842 * return to normal UnicodeString operation. 2843 * 2844 * @param minCapacity the minimum number of char16_ts that are to be available 2845 * in the buffer, starting at the returned pointer; 2846 * default to the current string capacity if minCapacity==-1 2847 * @return a writable pointer to the internal string buffer, 2848 * or nullptr if an error occurs (nested calls, out of memory) 2849 * 2850 * @see releaseBuffer 2851 * @see getTerminatedBuffer() 2852 * @stable ICU 2.0 2853 */ 2854 char16_t *getBuffer(int32_t minCapacity); 2855 2856 /** 2857 * Release a read/write buffer on a UnicodeString object with an 2858 * "open" getBuffer(minCapacity). 2859 * This function must be called in a matched pair with getBuffer(minCapacity). 2860 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open". 2861 * 2862 * It will set the string length to newLength, at most to the current capacity. 2863 * If newLength==-1 then it will set the length according to the 2864 * first NUL in the buffer, or to the capacity if there is no NUL. 2865 * 2866 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation. 2867 * 2868 * @param newLength the new length of the UnicodeString object; 2869 * defaults to the current capacity if newLength is greater than that; 2870 * if newLength==-1, it defaults to u_strlen(buffer) but not more than 2871 * the current capacity of the string 2872 * 2873 * @see getBuffer(int32_t minCapacity) 2874 * @stable ICU 2.0 2875 */ 2876 void releaseBuffer(int32_t newLength=-1); 2877 2878 /** 2879 * Get a read-only pointer to the internal buffer. 2880 * This can be called at any time on a valid UnicodeString. 2881 * 2882 * It returns 0 if the string is bogus, or 2883 * during an "open" getBuffer(minCapacity). 2884 * 2885 * It can be called as many times as desired. 2886 * The pointer that it returns will remain valid until the UnicodeString object is modified, 2887 * at which time the pointer is semantically invalidated and must not be used any more. 2888 * 2889 * The capacity of the buffer can be determined with getCapacity(). 2890 * The part after length() may or may not be initialized and valid, 2891 * depending on the history of the UnicodeString object. 2892 * 2893 * The buffer contents is (probably) not NUL-terminated. 2894 * You can check if it is with 2895 * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>. 2896 * (See getTerminatedBuffer().) 2897 * 2898 * The buffer may reside in read-only memory. Its contents must not 2899 * be modified. 2900 * 2901 * @return a read-only pointer to the internal string buffer, 2902 * or nullptr if the string is empty or bogus 2903 * 2904 * @see getBuffer(int32_t minCapacity) 2905 * @see getTerminatedBuffer() 2906 * @stable ICU 2.0 2907 */ 2908 inline const char16_t *getBuffer() const; 2909 2910 /** 2911 * Get a read-only pointer to the internal buffer, 2912 * making sure that it is NUL-terminated. 2913 * This can be called at any time on a valid UnicodeString. 2914 * 2915 * It returns 0 if the string is bogus, or 2916 * during an "open" getBuffer(minCapacity), or if the buffer cannot 2917 * be NUL-terminated (because memory allocation failed). 2918 * 2919 * It can be called as many times as desired. 2920 * The pointer that it returns will remain valid until the UnicodeString object is modified, 2921 * at which time the pointer is semantically invalidated and must not be used any more. 2922 * 2923 * The capacity of the buffer can be determined with getCapacity(). 2924 * The part after length()+1 may or may not be initialized and valid, 2925 * depending on the history of the UnicodeString object. 2926 * 2927 * The buffer contents is guaranteed to be NUL-terminated. 2928 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL 2929 * is written. 2930 * For this reason, this function is not const, unlike getBuffer(). 2931 * Note that a UnicodeString may also contain NUL characters as part of its contents. 2932 * 2933 * The buffer may reside in read-only memory. Its contents must not 2934 * be modified. 2935 * 2936 * @return a read-only pointer to the internal string buffer, 2937 * or 0 if the string is empty or bogus 2938 * 2939 * @see getBuffer(int32_t minCapacity) 2940 * @see getBuffer() 2941 * @stable ICU 2.2 2942 */ 2943 const char16_t *getTerminatedBuffer(); 2944 2945 //======================================== 2946 // Constructors 2947 //======================================== 2948 2949 /** Construct an empty UnicodeString. 2950 * @stable ICU 2.0 2951 */ 2952 inline UnicodeString(); 2953 2954 /** 2955 * Construct a UnicodeString with capacity to hold <TT>capacity</TT> char16_ts 2956 * @param capacity the number of char16_ts this UnicodeString should hold 2957 * before a resize is necessary; if count is greater than 0 and count 2958 * code points c take up more space than capacity, then capacity is adjusted 2959 * accordingly. 2960 * @param c is used to initially fill the string 2961 * @param count specifies how many code points c are to be written in the 2962 * string 2963 * @stable ICU 2.0 2964 */ 2965 UnicodeString(int32_t capacity, UChar32 c, int32_t count); 2966 2967 /** 2968 * Single char16_t (code unit) constructor. 2969 * 2970 * It is recommended to mark this constructor "explicit" by 2971 * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> 2972 * on the compiler command line or similar. 2973 * @param ch the character to place in the UnicodeString 2974 * @stable ICU 2.0 2975 */ 2976 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch); 2977 2978 /** 2979 * Single UChar32 (code point) constructor. 2980 * 2981 * It is recommended to mark this constructor "explicit" by 2982 * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> 2983 * on the compiler command line or similar. 2984 * @param ch the character to place in the UnicodeString 2985 * @stable ICU 2.0 2986 */ 2987 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); 2988 2989 /** 2990 * char16_t* constructor. 2991 * 2992 * It is recommended to mark this constructor "explicit" by 2993 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 2994 * on the compiler command line or similar. 2995 * @param text The characters to place in the UnicodeString. <TT>text</TT> 2996 * must be NULL (U+0000) terminated. 2997 * @stable ICU 2.0 2998 */ 2999 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text); 3000 3001 #if !U_CHAR16_IS_TYPEDEF 3002 /** 3003 * uint16_t * constructor. 3004 * Delegates to UnicodeString(const char16_t *). 3005 * 3006 * It is recommended to mark this constructor "explicit" by 3007 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 3008 * on the compiler command line or similar. 3009 * @param text NUL-terminated UTF-16 string 3010 * @stable ICU 59 3011 */ 3012 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : 3013 UnicodeString(ConstChar16Ptr(text)) {} 3014 #endif 3015 3016 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) 3017 /** 3018 * wchar_t * constructor. 3019 * (Only defined if U_SIZEOF_WCHAR_T==2.) 3020 * Delegates to UnicodeString(const char16_t *). 3021 * 3022 * It is recommended to mark this constructor "explicit" by 3023 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 3024 * on the compiler command line or similar. 3025 * @param text NUL-terminated UTF-16 string 3026 * @stable ICU 59 3027 */ 3028 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : 3029 UnicodeString(ConstChar16Ptr(text)) {} 3030 #endif 3031 3032 /** 3033 * nullptr_t constructor. 3034 * Effectively the same as the default constructor, makes an empty string object. 3035 * 3036 * It is recommended to mark this constructor "explicit" by 3037 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 3038 * on the compiler command line or similar. 3039 * @param text nullptr 3040 * @stable ICU 59 3041 */ 3042 UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); 3043 3044 /** 3045 * char16_t* constructor. 3046 * @param text The characters to place in the UnicodeString. 3047 * @param textLength The number of Unicode characters in <TT>text</TT> 3048 * to copy. 3049 * @stable ICU 2.0 3050 */ 3051 UnicodeString(const char16_t *text, 3052 int32_t textLength); 3053 3054 #if !U_CHAR16_IS_TYPEDEF 3055 /** 3056 * uint16_t * constructor. 3057 * Delegates to UnicodeString(const char16_t *, int32_t). 3058 * @param text UTF-16 string 3059 * @param length string length 3060 * @stable ICU 59 3061 */ 3062 UnicodeString(const uint16_t *text, int32_t length) : 3063 UnicodeString(ConstChar16Ptr(text), length) {} 3064 #endif 3065 3066 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) 3067 /** 3068 * wchar_t * constructor. 3069 * (Only defined if U_SIZEOF_WCHAR_T==2.) 3070 * Delegates to UnicodeString(const char16_t *, int32_t). 3071 * @param text NUL-terminated UTF-16 string 3072 * @param length string length 3073 * @stable ICU 59 3074 */ 3075 UnicodeString(const wchar_t *text, int32_t length) : 3076 UnicodeString(ConstChar16Ptr(text), length) {} 3077 #endif 3078 3079 /** 3080 * nullptr_t constructor. 3081 * Effectively the same as the default constructor, makes an empty string object. 3082 * @param text nullptr 3083 * @param length ignored 3084 * @stable ICU 59 3085 */ 3086 inline UnicodeString(const std::nullptr_t text, int32_t length); 3087 3088 /** 3089 * Readonly-aliasing char16_t* constructor. 3090 * The text will be used for the UnicodeString object, but 3091 * it will not be released when the UnicodeString is destroyed. 3092 * This has copy-on-write semantics: 3093 * When the string is modified, then the buffer is first copied into 3094 * newly allocated memory. 3095 * The aliased buffer is never modified. 3096 * 3097 * In an assignment to another UnicodeString, when using the copy constructor 3098 * or the assignment operator, the text will be copied. 3099 * When using fastCopyFrom(), the text will be aliased again, 3100 * so that both strings then alias the same readonly-text. 3101 * 3102 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. 3103 * This must be true if <code>textLength==-1</code>. 3104 * @param text The characters to alias for the UnicodeString. 3105 * @param textLength The number of Unicode characters in <code>text</code> to alias. 3106 * If -1, then this constructor will determine the length 3107 * by calling <code>u_strlen()</code>. 3108 * @stable ICU 2.0 3109 */ 3110 UnicodeString(UBool isTerminated, 3111 ConstChar16Ptr text, 3112 int32_t textLength); 3113 3114 /** 3115 * Writable-aliasing char16_t* constructor. 3116 * The text will be used for the UnicodeString object, but 3117 * it will not be released when the UnicodeString is destroyed. 3118 * This has write-through semantics: 3119 * For as long as the capacity of the buffer is sufficient, write operations 3120 * will directly affect the buffer. When more capacity is necessary, then 3121 * a new buffer will be allocated and the contents copied as with regularly 3122 * constructed strings. 3123 * In an assignment to another UnicodeString, the buffer will be copied. 3124 * The extract(Char16Ptr dst) function detects whether the dst pointer is the same 3125 * as the string buffer itself and will in this case not copy the contents. 3126 * 3127 * @param buffer The characters to alias for the UnicodeString. 3128 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. 3129 * @param buffCapacity The size of <code>buffer</code> in char16_ts. 3130 * @stable ICU 2.0 3131 */ 3132 UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); 3133 3134 #if !U_CHAR16_IS_TYPEDEF 3135 /** 3136 * Writable-aliasing uint16_t * constructor. 3137 * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). 3138 * @param buffer writable buffer of/for UTF-16 text 3139 * @param buffLength length of the current buffer contents 3140 * @param buffCapacity buffer capacity 3141 * @stable ICU 59 3142 */ 3143 UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : 3144 UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} 3145 #endif 3146 3147 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) 3148 /** 3149 * Writable-aliasing wchar_t * constructor. 3150 * (Only defined if U_SIZEOF_WCHAR_T==2.) 3151 * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). 3152 * @param buffer writable buffer of/for UTF-16 text 3153 * @param buffLength length of the current buffer contents 3154 * @param buffCapacity buffer capacity 3155 * @stable ICU 59 3156 */ 3157 UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : 3158 UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} 3159 #endif 3160 3161 /** 3162 * Writable-aliasing nullptr_t constructor. 3163 * Effectively the same as the default constructor, makes an empty string object. 3164 * @param buffer nullptr 3165 * @param buffLength ignored 3166 * @param buffCapacity ignored 3167 * @stable ICU 59 3168 */ 3169 inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity); 3170 3171 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 3172 3173 /** 3174 * char* constructor. 3175 * Uses the default converter (and thus depends on the ICU conversion code) 3176 * unless U_CHARSET_IS_UTF8 is set to 1. 3177 * 3178 * For ASCII (really "invariant character") strings it is more efficient to use 3179 * the constructor that takes a US_INV (for its enum EInvariant). 3180 * For ASCII (invariant-character) string literals, see UNICODE_STRING and 3181 * UNICODE_STRING_SIMPLE. 3182 * 3183 * It is recommended to mark this constructor "explicit" by 3184 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> 3185 * on the compiler command line or similar. 3186 * @param codepageData an array of bytes, null-terminated, 3187 * in the platform's default codepage. 3188 * @stable ICU 2.0 3189 * @see UNICODE_STRING 3190 * @see UNICODE_STRING_SIMPLE 3191 */ 3192 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); 3193 3194 /** 3195 * char* constructor. 3196 * Uses the default converter (and thus depends on the ICU conversion code) 3197 * unless U_CHARSET_IS_UTF8 is set to 1. 3198 * @param codepageData an array of bytes in the platform's default codepage. 3199 * @param dataLength The number of bytes in <TT>codepageData</TT>. 3200 * @stable ICU 2.0 3201 */ 3202 UnicodeString(const char *codepageData, int32_t dataLength); 3203 3204 #endif 3205 3206 #if !UCONFIG_NO_CONVERSION 3207 3208 /** 3209 * char* constructor. 3210 * @param codepageData an array of bytes, null-terminated 3211 * @param codepage the encoding of <TT>codepageData</TT>. The special 3212 * value 0 for <TT>codepage</TT> indicates that the text is in the 3213 * platform's default codepage. 3214 * 3215 * If <code>codepage</code> is an empty string (<code>""</code>), 3216 * then a simple conversion is performed on the codepage-invariant 3217 * subset ("invariant characters") of the platform encoding. See utypes.h. 3218 * Recommendation: For invariant-character strings use the constructor 3219 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 3220 * because it avoids object code dependencies of UnicodeString on 3221 * the conversion code. 3222 * 3223 * @stable ICU 2.0 3224 */ 3225 UnicodeString(const char *codepageData, const char *codepage); 3226 3227 /** 3228 * char* constructor. 3229 * @param codepageData an array of bytes. 3230 * @param dataLength The number of bytes in <TT>codepageData</TT>. 3231 * @param codepage the encoding of <TT>codepageData</TT>. The special 3232 * value 0 for <TT>codepage</TT> indicates that the text is in the 3233 * platform's default codepage. 3234 * If <code>codepage</code> is an empty string (<code>""</code>), 3235 * then a simple conversion is performed on the codepage-invariant 3236 * subset ("invariant characters") of the platform encoding. See utypes.h. 3237 * Recommendation: For invariant-character strings use the constructor 3238 * UnicodeString(const char *src, int32_t length, enum EInvariant inv) 3239 * because it avoids object code dependencies of UnicodeString on 3240 * the conversion code. 3241 * 3242 * @stable ICU 2.0 3243 */ 3244 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); 3245 3246 /** 3247 * char * / UConverter constructor. 3248 * This constructor uses an existing UConverter object to 3249 * convert the codepage string to Unicode and construct a UnicodeString 3250 * from that. 3251 * 3252 * The converter is reset at first. 3253 * If the error code indicates a failure before this constructor is called, 3254 * or if an error occurs during conversion or construction, 3255 * then the string will be bogus. 3256 * 3257 * This function avoids the overhead of opening and closing a converter if 3258 * multiple strings are constructed. 3259 * 3260 * @param src input codepage string 3261 * @param srcLength length of the input string, can be -1 for NUL-terminated strings 3262 * @param cnv converter object (ucnv_resetToUnicode() will be called), 3263 * can be NULL for the default converter 3264 * @param errorCode normal ICU error code 3265 * @stable ICU 2.0 3266 */ 3267 UnicodeString( 3268 const char *src, int32_t srcLength, 3269 UConverter *cnv, 3270 UErrorCode &errorCode); 3271 3272 #endif 3273 3274 /** 3275 * Constructs a Unicode string from an invariant-character char * string. 3276 * About invariant characters see utypes.h. 3277 * This constructor has no runtime dependency on conversion code and is 3278 * therefore recommended over ones taking a charset name string 3279 * (where the empty string "" indicates invariant-character conversion). 3280 * 3281 * Use the macro US_INV as the third, signature-distinguishing parameter. 3282 * 3283 * For example: 3284 * \code 3285 * void fn(const char *s) { 3286 * UnicodeString ustr(s, -1, US_INV); 3287 * // use ustr ... 3288 * } 3289 * \endcode 3290 * 3291 * @param src String using only invariant characters. 3292 * @param length Length of src, or -1 if NUL-terminated. 3293 * @param inv Signature-distinguishing paramater, use US_INV. 3294 * 3295 * @see US_INV 3296 * @stable ICU 3.2 3297 */ 3298 UnicodeString(const char *src, int32_t length, enum EInvariant inv); 3299 3300 3301 /** 3302 * Copy constructor. 3303 * 3304 * Starting with ICU 2.4, the assignment operator and the copy constructor 3305 * allocate a new buffer and copy the buffer contents even for readonly aliases. 3306 * By contrast, the fastCopyFrom() function implements the old, 3307 * more efficient but less safe behavior 3308 * of making this string also a readonly alias to the same buffer. 3309 * 3310 * If the source object has an "open" buffer from getBuffer(minCapacity), 3311 * then the copy is an empty string. 3312 * 3313 * @param that The UnicodeString object to copy. 3314 * @stable ICU 2.0 3315 * @see fastCopyFrom 3316 */ 3317 UnicodeString(const UnicodeString& that); 3318 3319 /** 3320 * Move constructor; might leave src in bogus state. 3321 * This string will have the same contents and state that the source string had. 3322 * @param src source string 3323 * @stable ICU 56 3324 */ 3325 UnicodeString(UnicodeString &&src) U_NOEXCEPT; 3326 3327 /** 3328 * 'Substring' constructor from tail of source string. 3329 * @param src The UnicodeString object to copy. 3330 * @param srcStart The offset into <tt>src</tt> at which to start copying. 3331 * @stable ICU 2.2 3332 */ 3333 UnicodeString(const UnicodeString& src, int32_t srcStart); 3334 3335 /** 3336 * 'Substring' constructor from subrange of source string. 3337 * @param src The UnicodeString object to copy. 3338 * @param srcStart The offset into <tt>src</tt> at which to start copying. 3339 * @param srcLength The number of characters from <tt>src</tt> to copy. 3340 * @stable ICU 2.2 3341 */ 3342 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 3343 3344 /** 3345 * Clone this object, an instance of a subclass of Replaceable. 3346 * Clones can be used concurrently in multiple threads. 3347 * If a subclass does not implement clone(), or if an error occurs, 3348 * then NULL is returned. 3349 * The clone functions in all subclasses return a pointer to a Replaceable 3350 * because some compilers do not support covariant (same-as-this) 3351 * return types; cast to the appropriate subclass if necessary. 3352 * The caller must delete the clone. 3353 * 3354 * @return a clone of this object 3355 * 3356 * @see Replaceable::clone 3357 * @see getDynamicClassID 3358 * @stable ICU 2.6 3359 */ 3360 virtual Replaceable *clone() const; 3361 3362 /** Destructor. 3363 * @stable ICU 2.0 3364 */ 3365 virtual ~UnicodeString(); 3366 3367 /** 3368 * Create a UnicodeString from a UTF-8 string. 3369 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 3370 * Calls u_strFromUTF8WithSub(). 3371 * 3372 * @param utf8 UTF-8 input string. 3373 * Note that a StringPiece can be implicitly constructed 3374 * from a std::string or a NUL-terminated const char * string. 3375 * @return A UnicodeString with equivalent UTF-16 contents. 3376 * @see toUTF8 3377 * @see toUTF8String 3378 * @stable ICU 4.2 3379 */ 3380 static UnicodeString fromUTF8(StringPiece utf8); 3381 3382 /** 3383 * Create a UnicodeString from a UTF-32 string. 3384 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. 3385 * Calls u_strFromUTF32WithSub(). 3386 * 3387 * @param utf32 UTF-32 input string. Must not be NULL. 3388 * @param length Length of the input string, or -1 if NUL-terminated. 3389 * @return A UnicodeString with equivalent UTF-16 contents. 3390 * @see toUTF32 3391 * @stable ICU 4.2 3392 */ 3393 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); 3394 3395 /* Miscellaneous operations */ 3396 3397 /** 3398 * Unescape a string of characters and return a string containing 3399 * the result. The following escape sequences are recognized: 3400 * 3401 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] 3402 * \\Uhhhhhhhh 8 hex digits 3403 * \\xhh 1-2 hex digits 3404 * \\ooo 1-3 octal digits; o in [0-7] 3405 * \\cX control-X; X is masked with 0x1F 3406 * 3407 * as well as the standard ANSI C escapes: 3408 * 3409 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, 3410 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, 3411 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C 3412 * 3413 * Anything else following a backslash is generically escaped. For 3414 * example, "[a\\-z]" returns "[a-z]". 3415 * 3416 * If an escape sequence is ill-formed, this method returns an empty 3417 * string. An example of an ill-formed sequence is "\\u" followed by 3418 * fewer than 4 hex digits. 3419 * 3420 * This function is similar to u_unescape() but not identical to it. 3421 * The latter takes a source char*, so it does escape recognition 3422 * and also invariant conversion. 3423 * 3424 * @return a string with backslash escapes interpreted, or an 3425 * empty string on error. 3426 * @see UnicodeString#unescapeAt() 3427 * @see u_unescape() 3428 * @see u_unescapeAt() 3429 * @stable ICU 2.0 3430 */ 3431 UnicodeString unescape() const; 3432 3433 /** 3434 * Unescape a single escape sequence and return the represented 3435 * character. See unescape() for a listing of the recognized escape 3436 * sequences. The character at offset-1 is assumed (without 3437 * checking) to be a backslash. If the escape sequence is 3438 * ill-formed, or the offset is out of range, U_SENTINEL=-1 is 3439 * returned. 3440 * 3441 * @param offset an input output parameter. On input, it is the 3442 * offset into this string where the escape sequence is located, 3443 * after the initial backslash. On output, it is advanced after the 3444 * last character parsed. On error, it is not advanced at all. 3445 * @return the character represented by the escape sequence at 3446 * offset, or U_SENTINEL=-1 on error. 3447 * @see UnicodeString#unescape() 3448 * @see u_unescape() 3449 * @see u_unescapeAt() 3450 * @stable ICU 2.0 3451 */ 3452 UChar32 unescapeAt(int32_t &offset) const; 3453 3454 /** 3455 * ICU "poor man's RTTI", returns a UClassID for this class. 3456 * 3457 * @stable ICU 2.2 3458 */ 3459 static UClassID U_EXPORT2 getStaticClassID(); 3460 3461 /** 3462 * ICU "poor man's RTTI", returns a UClassID for the actual class. 3463 * 3464 * @stable ICU 2.2 3465 */ 3466 virtual UClassID getDynamicClassID() const; 3467 3468 //======================================== 3469 // Implementation methods 3470 //======================================== 3471 3472 protected: 3473 /** 3474 * Implement Replaceable::getLength() (see jitterbug 1027). 3475 * @stable ICU 2.4 3476 */ 3477 virtual int32_t getLength() const; 3478 3479 /** 3480 * The change in Replaceable to use virtual getCharAt() allows 3481 * UnicodeString::charAt() to be inline again (see jitterbug 709). 3482 * @stable ICU 2.4 3483 */ 3484 virtual char16_t getCharAt(int32_t offset) const; 3485 3486 /** 3487 * The change in Replaceable to use virtual getChar32At() allows 3488 * UnicodeString::char32At() to be inline again (see jitterbug 709). 3489 * @stable ICU 2.4 3490 */ 3491 virtual UChar32 getChar32At(int32_t offset) const; 3492 3493 private: 3494 // For char* constructors. Could be made public. 3495 UnicodeString &setToUTF8(StringPiece utf8); 3496 // For extract(char*). 3497 // We could make a toUTF8(target, capacity, errorCode) public but not 3498 // this version: New API will be cleaner if we make callers create substrings 3499 // rather than having start+length on every method, 3500 // and it should take a UErrorCode&. 3501 int32_t 3502 toUTF8(int32_t start, int32_t len, 3503 char *target, int32_t capacity) const; 3504 3505 /** 3506 * Internal string contents comparison, called by operator==. 3507 * Requires: this & text not bogus and have same lengths. 3508 */ 3509 UBool doEquals(const UnicodeString &text, int32_t len) const; 3510 3511 inline int8_t 3512 doCompare(int32_t start, 3513 int32_t length, 3514 const UnicodeString& srcText, 3515 int32_t srcStart, 3516 int32_t srcLength) const; 3517 3518 int8_t doCompare(int32_t start, 3519 int32_t length, 3520 const char16_t *srcChars, 3521 int32_t srcStart, 3522 int32_t srcLength) const; 3523 3524 inline int8_t 3525 doCompareCodePointOrder(int32_t start, 3526 int32_t length, 3527 const UnicodeString& srcText, 3528 int32_t srcStart, 3529 int32_t srcLength) const; 3530 3531 int8_t doCompareCodePointOrder(int32_t start, 3532 int32_t length, 3533 const char16_t *srcChars, 3534 int32_t srcStart, 3535 int32_t srcLength) const; 3536 3537 inline int8_t 3538 doCaseCompare(int32_t start, 3539 int32_t length, 3540 const UnicodeString &srcText, 3541 int32_t srcStart, 3542 int32_t srcLength, 3543 uint32_t options) const; 3544 3545 int8_t 3546 doCaseCompare(int32_t start, 3547 int32_t length, 3548 const char16_t *srcChars, 3549 int32_t srcStart, 3550 int32_t srcLength, 3551 uint32_t options) const; 3552 3553 int32_t doIndexOf(char16_t c, 3554 int32_t start, 3555 int32_t length) const; 3556 3557 int32_t doIndexOf(UChar32 c, 3558 int32_t start, 3559 int32_t length) const; 3560 3561 int32_t doLastIndexOf(char16_t c, 3562 int32_t start, 3563 int32_t length) const; 3564 3565 int32_t doLastIndexOf(UChar32 c, 3566 int32_t start, 3567 int32_t length) const; 3568 3569 void doExtract(int32_t start, 3570 int32_t length, 3571 char16_t *dst, 3572 int32_t dstStart) const; 3573 3574 inline void doExtract(int32_t start, 3575 int32_t length, 3576 UnicodeString& target) const; 3577 3578 inline char16_t doCharAt(int32_t offset) const; 3579 3580 UnicodeString& doReplace(int32_t start, 3581 int32_t length, 3582 const UnicodeString& srcText, 3583 int32_t srcStart, 3584 int32_t srcLength); 3585 3586 UnicodeString& doReplace(int32_t start, 3587 int32_t length, 3588 const char16_t *srcChars, 3589 int32_t srcStart, 3590 int32_t srcLength); 3591 3592 UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 3593 UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); 3594 3595 UnicodeString& doReverse(int32_t start, 3596 int32_t length); 3597 3598 // calculate hash code 3599 int32_t doHashCode(void) const; 3600 3601 // get pointer to start of array 3602 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 3603 inline char16_t* getArrayStart(void); 3604 inline const char16_t* getArrayStart(void) const; 3605 3606 inline UBool hasShortLength() const; 3607 inline int32_t getShortLength() const; 3608 3609 // A UnicodeString object (not necessarily its current buffer) 3610 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 3611 inline UBool isWritable() const; 3612 3613 // Is the current buffer writable? 3614 inline UBool isBufferWritable() const; 3615 3616 // None of the following does releaseArray(). 3617 inline void setZeroLength(); 3618 inline void setShortLength(int32_t len); 3619 inline void setLength(int32_t len); 3620 inline void setToEmpty(); 3621 inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags 3622 3623 // allocate the array; result may be the stack buffer 3624 // sets refCount to 1 if appropriate 3625 // sets fArray, fCapacity, and flags 3626 // sets length to 0 3627 // returns boolean for success or failure 3628 UBool allocate(int32_t capacity); 3629 3630 // release the array if owned 3631 void releaseArray(void); 3632 3633 // turn a bogus string into an empty one 3634 void unBogus(); 3635 3636 // implements assigment operator, copy constructor, and fastCopyFrom() 3637 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); 3638 3639 // Copies just the fields without memory management. 3640 void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT; 3641 3642 // Pin start and limit to acceptable values. 3643 inline void pinIndex(int32_t& start) const; 3644 inline void pinIndices(int32_t& start, 3645 int32_t& length) const; 3646 3647 #if !UCONFIG_NO_CONVERSION 3648 3649 /* Internal extract() using UConverter. */ 3650 int32_t doExtract(int32_t start, int32_t length, 3651 char *dest, int32_t destCapacity, 3652 UConverter *cnv, 3653 UErrorCode &errorCode) const; 3654 3655 /* 3656 * Real constructor for converting from codepage data. 3657 * It assumes that it is called with !fRefCounted. 3658 * 3659 * If <code>codepage==0</code>, then the default converter 3660 * is used for the platform encoding. 3661 * If <code>codepage</code> is an empty string (<code>""</code>), 3662 * then a simple conversion is performed on the codepage-invariant 3663 * subset ("invariant characters") of the platform encoding. See utypes.h. 3664 */ 3665 void doCodepageCreate(const char *codepageData, 3666 int32_t dataLength, 3667 const char *codepage); 3668 3669 /* 3670 * Worker function for creating a UnicodeString from 3671 * a codepage string using a UConverter. 3672 */ 3673 void 3674 doCodepageCreate(const char *codepageData, 3675 int32_t dataLength, 3676 UConverter *converter, 3677 UErrorCode &status); 3678 3679 #endif 3680 3681 /* 3682 * This function is called when write access to the array 3683 * is necessary. 3684 * 3685 * We need to make a copy of the array if 3686 * the buffer is read-only, or 3687 * the buffer is refCounted (shared), and refCount>1, or 3688 * the buffer is too small. 3689 * 3690 * Return FALSE if memory could not be allocated. 3691 */ 3692 UBool cloneArrayIfNeeded(int32_t newCapacity = -1, 3693 int32_t growCapacity = -1, 3694 UBool doCopyArray = TRUE, 3695 int32_t **pBufferToDelete = 0, 3696 UBool forceClone = FALSE); 3697 3698 /** 3699 * Common function for UnicodeString case mappings. 3700 * The stringCaseMapper has the same type UStringCaseMapper 3701 * as in ustr_imp.h for ustrcase_map(). 3702 */ 3703 UnicodeString & 3704 caseMap(int32_t caseLocale, uint32_t options, 3705 #if !UCONFIG_NO_BREAK_ITERATION 3706 BreakIterator *iter, 3707 #endif 3708 UStringCaseMapper *stringCaseMapper); 3709 3710 // ref counting 3711 void addRef(void); 3712 int32_t removeRef(void); 3713 int32_t refCount(void) const; 3714 3715 // constants 3716 enum { 3717 /** 3718 * Size of stack buffer for short strings. 3719 * Must be at least U16_MAX_LENGTH for the single-code point constructor to work. 3720 * @see UNISTR_OBJECT_SIZE 3721 */ 3722 US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR, 3723 kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index) 3724 kInvalidHashCode=0, // invalid hash code 3725 kEmptyHashCode=1, // hash code for empty string 3726 3727 // bit flag values for fLengthAndFlags 3728 kIsBogus=1, // this string is bogus, i.e., not valid or NULL 3729 kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields 3730 kRefCounted=4, // there is a refCount field before the characters in fArray 3731 kBufferIsReadonly=8,// do not write to this buffer 3732 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), 3733 // and releaseBuffer(newLength) must be called 3734 kAllStorageFlags=0x1f, 3735 3736 kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long 3737 kLength1=1<<kLengthShift, 3738 kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0) 3739 kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength 3740 3741 // combined values for convenience 3742 kShortString=kUsingStackBuffer, 3743 kLongString=kRefCounted, 3744 kReadonlyAlias=kBufferIsReadonly, 3745 kWritableAlias=0 3746 }; 3747 3748 friend class UnicodeStringAppendable; 3749 3750 union StackBufferOrFields; // forward declaration necessary before friend declaration 3751 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion 3752 3753 /* 3754 * The following are all the class fields that are stored 3755 * in each UnicodeString object. 3756 * Note that UnicodeString has virtual functions, 3757 * therefore there is an implicit vtable pointer 3758 * as the first real field. 3759 * The fields should be aligned such that no padding is necessary. 3760 * On 32-bit machines, the size should be 32 bytes, 3761 * on 64-bit machines (8-byte pointers), it should be 40 bytes. 3762 * 3763 * We use a hack to achieve this. 3764 * 3765 * With at least some compilers, each of the following is forced to 3766 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], 3767 * rounded up with additional padding if the fields do not already fit that requirement: 3768 * - sizeof(class UnicodeString) 3769 * - offsetof(UnicodeString, fUnion) 3770 * - sizeof(fUnion) 3771 * - sizeof(fStackFields) 3772 * 3773 * We optimize for the longest possible internal buffer for short strings. 3774 * fUnion.fStackFields begins with 2 bytes for storage flags 3775 * and the length of relatively short strings, 3776 * followed by the buffer for short string contents. 3777 * There is no padding inside fStackFields. 3778 * 3779 * Heap-allocated and aliased strings use fUnion.fFields. 3780 * Both fStackFields and fFields must begin with the same fields for flags and short length, 3781 * that is, those must have the same memory offsets inside the object, 3782 * because the flags must be inspected in order to decide which half of fUnion is being used. 3783 * We assume that the compiler does not reorder the fields. 3784 * 3785 * (Padding at the end of fFields is ok: 3786 * As long as it is no larger than fStackFields, it is not wasted space.) 3787 * 3788 * For some of the history of the UnicodeString class fields layout, see 3789 * - ICU ticket #11551 "longer UnicodeString contents in stack buffer" 3790 * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays" 3791 * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?" 3792 */ 3793 // (implicit) *vtable; 3794 union StackBufferOrFields { 3795 // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used. 3796 // Each struct of the union must begin with fLengthAndFlags. 3797 struct { 3798 int16_t fLengthAndFlags; // bit fields: see constants above 3799 char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings 3800 } fStackFields; 3801 struct { 3802 int16_t fLengthAndFlags; // bit fields: see constants above 3803 int32_t fLength; // number of characters in fArray if >127; else undefined 3804 int32_t fCapacity; // capacity of fArray (in char16_ts) 3805 // array pointer last to minimize padding for machines with P128 data model 3806 // or pointer sizes that are not a power of 2 3807 char16_t *fArray; // the Unicode data 3808 } fFields; 3809 } fUnion; 3810 }; 3811 3812 /** 3813 * Create a new UnicodeString with the concatenation of two others. 3814 * 3815 * @param s1 The first string to be copied to the new one. 3816 * @param s2 The second string to be copied to the new one, after s1. 3817 * @return UnicodeString(s1).append(s2) 3818 * @stable ICU 2.8 3819 */ 3820 U_COMMON_API UnicodeString U_EXPORT2 3821 operator+ (const UnicodeString &s1, const UnicodeString &s2); 3822 3823 //======================================== 3824 // Inline members 3825 //======================================== 3826 3827 //======================================== 3828 // Privates 3829 //======================================== 3830 3831 inline void 3832 UnicodeString::pinIndex(int32_t& start) const 3833 { 3834 // pin index 3835 if(start < 0) { 3836 start = 0; 3837 } else if(start > length()) { 3838 start = length(); 3839 } 3840 } 3841 3842 inline void 3843 UnicodeString::pinIndices(int32_t& start, 3844 int32_t& _length) const 3845 { 3846 // pin indices 3847 int32_t len = length(); 3848 if(start < 0) { 3849 start = 0; 3850 } else if(start > len) { 3851 start = len; 3852 } 3853 if(_length < 0) { 3854 _length = 0; 3855 } else if(_length > (len - start)) { 3856 _length = (len - start); 3857 } 3858 } 3859 3860 inline char16_t* 3861 UnicodeString::getArrayStart() { 3862 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 3863 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; 3864 } 3865 3866 inline const char16_t* 3867 UnicodeString::getArrayStart() const { 3868 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 3869 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; 3870 } 3871 3872 //======================================== 3873 // Default constructor 3874 //======================================== 3875 3876 inline 3877 UnicodeString::UnicodeString() { 3878 fUnion.fStackFields.fLengthAndFlags=kShortString; 3879 } 3880 3881 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) { 3882 fUnion.fStackFields.fLengthAndFlags=kShortString; 3883 } 3884 3885 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) { 3886 fUnion.fStackFields.fLengthAndFlags=kShortString; 3887 } 3888 3889 inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) { 3890 fUnion.fStackFields.fLengthAndFlags=kShortString; 3891 } 3892 3893 //======================================== 3894 // Read-only implementation methods 3895 //======================================== 3896 inline UBool 3897 UnicodeString::hasShortLength() const { 3898 return fUnion.fFields.fLengthAndFlags>=0; 3899 } 3900 3901 inline int32_t 3902 UnicodeString::getShortLength() const { 3903 // fLengthAndFlags must be non-negative -> short length >= 0 3904 // and arithmetic or logical shift does not matter. 3905 return fUnion.fFields.fLengthAndFlags>>kLengthShift; 3906 } 3907 3908 inline int32_t 3909 UnicodeString::length() const { 3910 return hasShortLength() ? getShortLength() : fUnion.fFields.fLength; 3911 } 3912 3913 inline int32_t 3914 UnicodeString::getCapacity() const { 3915 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? 3916 US_STACKBUF_SIZE : fUnion.fFields.fCapacity; 3917 } 3918 3919 inline int32_t 3920 UnicodeString::hashCode() const 3921 { return doHashCode(); } 3922 3923 inline UBool 3924 UnicodeString::isBogus() const 3925 { return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); } 3926 3927 inline UBool 3928 UnicodeString::isWritable() const 3929 { return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); } 3930 3931 inline UBool 3932 UnicodeString::isBufferWritable() const 3933 { 3934 return (UBool)( 3935 !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && 3936 (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1)); 3937 } 3938 3939 inline const char16_t * 3940 UnicodeString::getBuffer() const { 3941 if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) { 3942 return nullptr; 3943 } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { 3944 return fUnion.fStackFields.fBuffer; 3945 } else { 3946 return fUnion.fFields.fArray; 3947 } 3948 } 3949 3950 //======================================== 3951 // Read-only alias methods 3952 //======================================== 3953 inline int8_t 3954 UnicodeString::doCompare(int32_t start, 3955 int32_t thisLength, 3956 const UnicodeString& srcText, 3957 int32_t srcStart, 3958 int32_t srcLength) const 3959 { 3960 if(srcText.isBogus()) { 3961 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 3962 } else { 3963 srcText.pinIndices(srcStart, srcLength); 3964 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 3965 } 3966 } 3967 3968 inline UBool 3969 UnicodeString::operator== (const UnicodeString& text) const 3970 { 3971 if(isBogus()) { 3972 return text.isBogus(); 3973 } else { 3974 int32_t len = length(), textLength = text.length(); 3975 return !text.isBogus() && len == textLength && doEquals(text, len); 3976 } 3977 } 3978 3979 inline UBool 3980 UnicodeString::operator!= (const UnicodeString& text) const 3981 { return (! operator==(text)); } 3982 3983 inline UBool 3984 UnicodeString::operator> (const UnicodeString& text) const 3985 { return doCompare(0, length(), text, 0, text.length()) == 1; } 3986 3987 inline UBool 3988 UnicodeString::operator< (const UnicodeString& text) const 3989 { return doCompare(0, length(), text, 0, text.length()) == -1; } 3990 3991 inline UBool 3992 UnicodeString::operator>= (const UnicodeString& text) const 3993 { return doCompare(0, length(), text, 0, text.length()) != -1; } 3994 3995 inline UBool 3996 UnicodeString::operator<= (const UnicodeString& text) const 3997 { return doCompare(0, length(), text, 0, text.length()) != 1; } 3998 3999 inline int8_t 4000 UnicodeString::compare(const UnicodeString& text) const 4001 { return doCompare(0, length(), text, 0, text.length()); } 4002 4003 inline int8_t 4004 UnicodeString::compare(int32_t start, 4005 int32_t _length, 4006 const UnicodeString& srcText) const 4007 { return doCompare(start, _length, srcText, 0, srcText.length()); } 4008 4009 inline int8_t 4010 UnicodeString::compare(ConstChar16Ptr srcChars, 4011 int32_t srcLength) const 4012 { return doCompare(0, length(), srcChars, 0, srcLength); } 4013 4014 inline int8_t 4015 UnicodeString::compare(int32_t start, 4016 int32_t _length, 4017 const UnicodeString& srcText, 4018 int32_t srcStart, 4019 int32_t srcLength) const 4020 { return doCompare(start, _length, srcText, srcStart, srcLength); } 4021 4022 inline int8_t 4023 UnicodeString::compare(int32_t start, 4024 int32_t _length, 4025 const char16_t *srcChars) const 4026 { return doCompare(start, _length, srcChars, 0, _length); } 4027 4028 inline int8_t 4029 UnicodeString::compare(int32_t start, 4030 int32_t _length, 4031 const char16_t *srcChars, 4032 int32_t srcStart, 4033 int32_t srcLength) const 4034 { return doCompare(start, _length, srcChars, srcStart, srcLength); } 4035 4036 inline int8_t 4037 UnicodeString::compareBetween(int32_t start, 4038 int32_t limit, 4039 const UnicodeString& srcText, 4040 int32_t srcStart, 4041 int32_t srcLimit) const 4042 { return doCompare(start, limit - start, 4043 srcText, srcStart, srcLimit - srcStart); } 4044 4045 inline int8_t 4046 UnicodeString::doCompareCodePointOrder(int32_t start, 4047 int32_t thisLength, 4048 const UnicodeString& srcText, 4049 int32_t srcStart, 4050 int32_t srcLength) const 4051 { 4052 if(srcText.isBogus()) { 4053 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 4054 } else { 4055 srcText.pinIndices(srcStart, srcLength); 4056 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 4057 } 4058 } 4059 4060 inline int8_t 4061 UnicodeString::compareCodePointOrder(const UnicodeString& text) const 4062 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); } 4063 4064 inline int8_t 4065 UnicodeString::compareCodePointOrder(int32_t start, 4066 int32_t _length, 4067 const UnicodeString& srcText) const 4068 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } 4069 4070 inline int8_t 4071 UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars, 4072 int32_t srcLength) const 4073 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } 4074 4075 inline int8_t 4076 UnicodeString::compareCodePointOrder(int32_t start, 4077 int32_t _length, 4078 const UnicodeString& srcText, 4079 int32_t srcStart, 4080 int32_t srcLength) const 4081 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } 4082 4083 inline int8_t 4084 UnicodeString::compareCodePointOrder(int32_t start, 4085 int32_t _length, 4086 const char16_t *srcChars) const 4087 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } 4088 4089 inline int8_t 4090 UnicodeString::compareCodePointOrder(int32_t start, 4091 int32_t _length, 4092 const char16_t *srcChars, 4093 int32_t srcStart, 4094 int32_t srcLength) const 4095 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } 4096 4097 inline int8_t 4098 UnicodeString::compareCodePointOrderBetween(int32_t start, 4099 int32_t limit, 4100 const UnicodeString& srcText, 4101 int32_t srcStart, 4102 int32_t srcLimit) const 4103 { return doCompareCodePointOrder(start, limit - start, 4104 srcText, srcStart, srcLimit - srcStart); } 4105 4106 inline int8_t 4107 UnicodeString::doCaseCompare(int32_t start, 4108 int32_t thisLength, 4109 const UnicodeString &srcText, 4110 int32_t srcStart, 4111 int32_t srcLength, 4112 uint32_t options) const 4113 { 4114 if(srcText.isBogus()) { 4115 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 4116 } else { 4117 srcText.pinIndices(srcStart, srcLength); 4118 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); 4119 } 4120 } 4121 4122 inline int8_t 4123 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { 4124 return doCaseCompare(0, length(), text, 0, text.length(), options); 4125 } 4126 4127 inline int8_t 4128 UnicodeString::caseCompare(int32_t start, 4129 int32_t _length, 4130 const UnicodeString &srcText, 4131 uint32_t options) const { 4132 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); 4133 } 4134 4135 inline int8_t 4136 UnicodeString::caseCompare(ConstChar16Ptr srcChars, 4137 int32_t srcLength, 4138 uint32_t options) const { 4139 return doCaseCompare(0, length(), srcChars, 0, srcLength, options); 4140 } 4141 4142 inline int8_t 4143 UnicodeString::caseCompare(int32_t start, 4144 int32_t _length, 4145 const UnicodeString &srcText, 4146 int32_t srcStart, 4147 int32_t srcLength, 4148 uint32_t options) const { 4149 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); 4150 } 4151 4152 inline int8_t 4153 UnicodeString::caseCompare(int32_t start, 4154 int32_t _length, 4155 const char16_t *srcChars, 4156 uint32_t options) const { 4157 return doCaseCompare(start, _length, srcChars, 0, _length, options); 4158 } 4159 4160 inline int8_t 4161 UnicodeString::caseCompare(int32_t start, 4162 int32_t _length, 4163 const char16_t *srcChars, 4164 int32_t srcStart, 4165 int32_t srcLength, 4166 uint32_t options) const { 4167 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); 4168 } 4169 4170 inline int8_t 4171 UnicodeString::caseCompareBetween(int32_t start, 4172 int32_t limit, 4173 const UnicodeString &srcText, 4174 int32_t srcStart, 4175 int32_t srcLimit, 4176 uint32_t options) const { 4177 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); 4178 } 4179 4180 inline int32_t 4181 UnicodeString::indexOf(const UnicodeString& srcText, 4182 int32_t srcStart, 4183 int32_t srcLength, 4184 int32_t start, 4185 int32_t _length) const 4186 { 4187 if(!srcText.isBogus()) { 4188 srcText.pinIndices(srcStart, srcLength); 4189 if(srcLength > 0) { 4190 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 4191 } 4192 } 4193 return -1; 4194 } 4195 4196 inline int32_t 4197 UnicodeString::indexOf(const UnicodeString& text) const 4198 { return indexOf(text, 0, text.length(), 0, length()); } 4199 4200 inline int32_t 4201 UnicodeString::indexOf(const UnicodeString& text, 4202 int32_t start) const { 4203 pinIndex(start); 4204 return indexOf(text, 0, text.length(), start, length() - start); 4205 } 4206 4207 inline int32_t 4208 UnicodeString::indexOf(const UnicodeString& text, 4209 int32_t start, 4210 int32_t _length) const 4211 { return indexOf(text, 0, text.length(), start, _length); } 4212 4213 inline int32_t 4214 UnicodeString::indexOf(const char16_t *srcChars, 4215 int32_t srcLength, 4216 int32_t start) const { 4217 pinIndex(start); 4218 return indexOf(srcChars, 0, srcLength, start, length() - start); 4219 } 4220 4221 inline int32_t 4222 UnicodeString::indexOf(ConstChar16Ptr srcChars, 4223 int32_t srcLength, 4224 int32_t start, 4225 int32_t _length) const 4226 { return indexOf(srcChars, 0, srcLength, start, _length); } 4227 4228 inline int32_t 4229 UnicodeString::indexOf(char16_t c, 4230 int32_t start, 4231 int32_t _length) const 4232 { return doIndexOf(c, start, _length); } 4233 4234 inline int32_t 4235 UnicodeString::indexOf(UChar32 c, 4236 int32_t start, 4237 int32_t _length) const 4238 { return doIndexOf(c, start, _length); } 4239 4240 inline int32_t 4241 UnicodeString::indexOf(char16_t c) const 4242 { return doIndexOf(c, 0, length()); } 4243 4244 inline int32_t 4245 UnicodeString::indexOf(UChar32 c) const 4246 { return indexOf(c, 0, length()); } 4247 4248 inline int32_t 4249 UnicodeString::indexOf(char16_t c, 4250 int32_t start) const { 4251 pinIndex(start); 4252 return doIndexOf(c, start, length() - start); 4253 } 4254 4255 inline int32_t 4256 UnicodeString::indexOf(UChar32 c, 4257 int32_t start) const { 4258 pinIndex(start); 4259 return indexOf(c, start, length() - start); 4260 } 4261 4262 inline int32_t 4263 UnicodeString::lastIndexOf(ConstChar16Ptr srcChars, 4264 int32_t srcLength, 4265 int32_t start, 4266 int32_t _length) const 4267 { return lastIndexOf(srcChars, 0, srcLength, start, _length); } 4268 4269 inline int32_t 4270 UnicodeString::lastIndexOf(const char16_t *srcChars, 4271 int32_t srcLength, 4272 int32_t start) const { 4273 pinIndex(start); 4274 return lastIndexOf(srcChars, 0, srcLength, start, length() - start); 4275 } 4276 4277 inline int32_t 4278 UnicodeString::lastIndexOf(const UnicodeString& srcText, 4279 int32_t srcStart, 4280 int32_t srcLength, 4281 int32_t start, 4282 int32_t _length) const 4283 { 4284 if(!srcText.isBogus()) { 4285 srcText.pinIndices(srcStart, srcLength); 4286 if(srcLength > 0) { 4287 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 4288 } 4289 } 4290 return -1; 4291 } 4292 4293 inline int32_t 4294 UnicodeString::lastIndexOf(const UnicodeString& text, 4295 int32_t start, 4296 int32_t _length) const 4297 { return lastIndexOf(text, 0, text.length(), start, _length); } 4298 4299 inline int32_t 4300 UnicodeString::lastIndexOf(const UnicodeString& text, 4301 int32_t start) const { 4302 pinIndex(start); 4303 return lastIndexOf(text, 0, text.length(), start, length() - start); 4304 } 4305 4306 inline int32_t 4307 UnicodeString::lastIndexOf(const UnicodeString& text) const 4308 { return lastIndexOf(text, 0, text.length(), 0, length()); } 4309 4310 inline int32_t 4311 UnicodeString::lastIndexOf(char16_t c, 4312 int32_t start, 4313 int32_t _length) const 4314 { return doLastIndexOf(c, start, _length); } 4315 4316 inline int32_t 4317 UnicodeString::lastIndexOf(UChar32 c, 4318 int32_t start, 4319 int32_t _length) const { 4320 return doLastIndexOf(c, start, _length); 4321 } 4322 4323 inline int32_t 4324 UnicodeString::lastIndexOf(char16_t c) const 4325 { return doLastIndexOf(c, 0, length()); } 4326 4327 inline int32_t 4328 UnicodeString::lastIndexOf(UChar32 c) const { 4329 return lastIndexOf(c, 0, length()); 4330 } 4331 4332 inline int32_t 4333 UnicodeString::lastIndexOf(char16_t c, 4334 int32_t start) const { 4335 pinIndex(start); 4336 return doLastIndexOf(c, start, length() - start); 4337 } 4338 4339 inline int32_t 4340 UnicodeString::lastIndexOf(UChar32 c, 4341 int32_t start) const { 4342 pinIndex(start); 4343 return lastIndexOf(c, start, length() - start); 4344 } 4345 4346 inline UBool 4347 UnicodeString::startsWith(const UnicodeString& text) const 4348 { return compare(0, text.length(), text, 0, text.length()) == 0; } 4349 4350 inline UBool 4351 UnicodeString::startsWith(const UnicodeString& srcText, 4352 int32_t srcStart, 4353 int32_t srcLength) const 4354 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } 4355 4356 inline UBool 4357 UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { 4358 if(srcLength < 0) { 4359 srcLength = u_strlen(toUCharPtr(srcChars)); 4360 } 4361 return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; 4362 } 4363 4364 inline UBool 4365 UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { 4366 if(srcLength < 0) { 4367 srcLength = u_strlen(toUCharPtr(srcChars)); 4368 } 4369 return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; 4370 } 4371 4372 inline UBool 4373 UnicodeString::endsWith(const UnicodeString& text) const 4374 { return doCompare(length() - text.length(), text.length(), 4375 text, 0, text.length()) == 0; } 4376 4377 inline UBool 4378 UnicodeString::endsWith(const UnicodeString& srcText, 4379 int32_t srcStart, 4380 int32_t srcLength) const { 4381 srcText.pinIndices(srcStart, srcLength); 4382 return doCompare(length() - srcLength, srcLength, 4383 srcText, srcStart, srcLength) == 0; 4384 } 4385 4386 inline UBool 4387 UnicodeString::endsWith(ConstChar16Ptr srcChars, 4388 int32_t srcLength) const { 4389 if(srcLength < 0) { 4390 srcLength = u_strlen(toUCharPtr(srcChars)); 4391 } 4392 return doCompare(length() - srcLength, srcLength, 4393 srcChars, 0, srcLength) == 0; 4394 } 4395 4396 inline UBool 4397 UnicodeString::endsWith(const char16_t *srcChars, 4398 int32_t srcStart, 4399 int32_t srcLength) const { 4400 if(srcLength < 0) { 4401 srcLength = u_strlen(toUCharPtr(srcChars + srcStart)); 4402 } 4403 return doCompare(length() - srcLength, srcLength, 4404 srcChars, srcStart, srcLength) == 0; 4405 } 4406 4407 //======================================== 4408 // replace 4409 //======================================== 4410 inline UnicodeString& 4411 UnicodeString::replace(int32_t start, 4412 int32_t _length, 4413 const UnicodeString& srcText) 4414 { return doReplace(start, _length, srcText, 0, srcText.length()); } 4415 4416 inline UnicodeString& 4417 UnicodeString::replace(int32_t start, 4418 int32_t _length, 4419 const UnicodeString& srcText, 4420 int32_t srcStart, 4421 int32_t srcLength) 4422 { return doReplace(start, _length, srcText, srcStart, srcLength); } 4423 4424 inline UnicodeString& 4425 UnicodeString::replace(int32_t start, 4426 int32_t _length, 4427 ConstChar16Ptr srcChars, 4428 int32_t srcLength) 4429 { return doReplace(start, _length, srcChars, 0, srcLength); } 4430 4431 inline UnicodeString& 4432 UnicodeString::replace(int32_t start, 4433 int32_t _length, 4434 const char16_t *srcChars, 4435 int32_t srcStart, 4436 int32_t srcLength) 4437 { return doReplace(start, _length, srcChars, srcStart, srcLength); } 4438 4439 inline UnicodeString& 4440 UnicodeString::replace(int32_t start, 4441 int32_t _length, 4442 char16_t srcChar) 4443 { return doReplace(start, _length, &srcChar, 0, 1); } 4444 4445 inline UnicodeString& 4446 UnicodeString::replaceBetween(int32_t start, 4447 int32_t limit, 4448 const UnicodeString& srcText) 4449 { return doReplace(start, limit - start, srcText, 0, srcText.length()); } 4450 4451 inline UnicodeString& 4452 UnicodeString::replaceBetween(int32_t start, 4453 int32_t limit, 4454 const UnicodeString& srcText, 4455 int32_t srcStart, 4456 int32_t srcLimit) 4457 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } 4458 4459 inline UnicodeString& 4460 UnicodeString::findAndReplace(const UnicodeString& oldText, 4461 const UnicodeString& newText) 4462 { return findAndReplace(0, length(), oldText, 0, oldText.length(), 4463 newText, 0, newText.length()); } 4464 4465 inline UnicodeString& 4466 UnicodeString::findAndReplace(int32_t start, 4467 int32_t _length, 4468 const UnicodeString& oldText, 4469 const UnicodeString& newText) 4470 { return findAndReplace(start, _length, oldText, 0, oldText.length(), 4471 newText, 0, newText.length()); } 4472 4473 // ============================ 4474 // extract 4475 // ============================ 4476 inline void 4477 UnicodeString::doExtract(int32_t start, 4478 int32_t _length, 4479 UnicodeString& target) const 4480 { target.replace(0, target.length(), *this, start, _length); } 4481 4482 inline void 4483 UnicodeString::extract(int32_t start, 4484 int32_t _length, 4485 Char16Ptr target, 4486 int32_t targetStart) const 4487 { doExtract(start, _length, target, targetStart); } 4488 4489 inline void 4490 UnicodeString::extract(int32_t start, 4491 int32_t _length, 4492 UnicodeString& target) const 4493 { doExtract(start, _length, target); } 4494 4495 #if !UCONFIG_NO_CONVERSION 4496 4497 inline int32_t 4498 UnicodeString::extract(int32_t start, 4499 int32_t _length, 4500 char *dst, 4501 const char *codepage) const 4502 4503 { 4504 // This dstSize value will be checked explicitly 4505 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); 4506 } 4507 4508 #endif 4509 4510 inline void 4511 UnicodeString::extractBetween(int32_t start, 4512 int32_t limit, 4513 char16_t *dst, 4514 int32_t dstStart) const { 4515 pinIndex(start); 4516 pinIndex(limit); 4517 doExtract(start, limit - start, dst, dstStart); 4518 } 4519 4520 inline UnicodeString 4521 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { 4522 return tempSubString(start, limit - start); 4523 } 4524 4525 inline char16_t 4526 UnicodeString::doCharAt(int32_t offset) const 4527 { 4528 if((uint32_t)offset < (uint32_t)length()) { 4529 return getArrayStart()[offset]; 4530 } else { 4531 return kInvalidUChar; 4532 } 4533 } 4534 4535 inline char16_t 4536 UnicodeString::charAt(int32_t offset) const 4537 { return doCharAt(offset); } 4538 4539 inline char16_t 4540 UnicodeString::operator[] (int32_t offset) const 4541 { return doCharAt(offset); } 4542 4543 inline UBool 4544 UnicodeString::isEmpty() const { 4545 // Arithmetic or logical right shift does not matter: only testing for 0. 4546 return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0; 4547 } 4548 4549 //======================================== 4550 // Write implementation methods 4551 //======================================== 4552 inline void 4553 UnicodeString::setZeroLength() { 4554 fUnion.fFields.fLengthAndFlags &= kAllStorageFlags; 4555 } 4556 4557 inline void 4558 UnicodeString::setShortLength(int32_t len) { 4559 // requires 0 <= len <= kMaxShortLength 4560 fUnion.fFields.fLengthAndFlags = 4561 (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift)); 4562 } 4563 4564 inline void 4565 UnicodeString::setLength(int32_t len) { 4566 if(len <= kMaxShortLength) { 4567 setShortLength(len); 4568 } else { 4569 fUnion.fFields.fLengthAndFlags |= kLengthIsLarge; 4570 fUnion.fFields.fLength = len; 4571 } 4572 } 4573 4574 inline void 4575 UnicodeString::setToEmpty() { 4576 fUnion.fFields.fLengthAndFlags = kShortString; 4577 } 4578 4579 inline void 4580 UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) { 4581 setLength(len); 4582 fUnion.fFields.fArray = array; 4583 fUnion.fFields.fCapacity = capacity; 4584 } 4585 4586 inline UnicodeString& 4587 UnicodeString::operator= (char16_t ch) 4588 { return doReplace(0, length(), &ch, 0, 1); } 4589 4590 inline UnicodeString& 4591 UnicodeString::operator= (UChar32 ch) 4592 { return replace(0, length(), ch); } 4593 4594 inline UnicodeString& 4595 UnicodeString::setTo(const UnicodeString& srcText, 4596 int32_t srcStart, 4597 int32_t srcLength) 4598 { 4599 unBogus(); 4600 return doReplace(0, length(), srcText, srcStart, srcLength); 4601 } 4602 4603 inline UnicodeString& 4604 UnicodeString::setTo(const UnicodeString& srcText, 4605 int32_t srcStart) 4606 { 4607 unBogus(); 4608 srcText.pinIndex(srcStart); 4609 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); 4610 } 4611 4612 inline UnicodeString& 4613 UnicodeString::setTo(const UnicodeString& srcText) 4614 { 4615 return copyFrom(srcText); 4616 } 4617 4618 inline UnicodeString& 4619 UnicodeString::setTo(const char16_t *srcChars, 4620 int32_t srcLength) 4621 { 4622 unBogus(); 4623 return doReplace(0, length(), srcChars, 0, srcLength); 4624 } 4625 4626 inline UnicodeString& 4627 UnicodeString::setTo(char16_t srcChar) 4628 { 4629 unBogus(); 4630 return doReplace(0, length(), &srcChar, 0, 1); 4631 } 4632 4633 inline UnicodeString& 4634 UnicodeString::setTo(UChar32 srcChar) 4635 { 4636 unBogus(); 4637 return replace(0, length(), srcChar); 4638 } 4639 4640 inline UnicodeString& 4641 UnicodeString::append(const UnicodeString& srcText, 4642 int32_t srcStart, 4643 int32_t srcLength) 4644 { return doAppend(srcText, srcStart, srcLength); } 4645 4646 inline UnicodeString& 4647 UnicodeString::append(const UnicodeString& srcText) 4648 { return doAppend(srcText, 0, srcText.length()); } 4649 4650 inline UnicodeString& 4651 UnicodeString::append(const char16_t *srcChars, 4652 int32_t srcStart, 4653 int32_t srcLength) 4654 { return doAppend(srcChars, srcStart, srcLength); } 4655 4656 inline UnicodeString& 4657 UnicodeString::append(ConstChar16Ptr srcChars, 4658 int32_t srcLength) 4659 { return doAppend(srcChars, 0, srcLength); } 4660 4661 inline UnicodeString& 4662 UnicodeString::append(char16_t srcChar) 4663 { return doAppend(&srcChar, 0, 1); } 4664 4665 inline UnicodeString& 4666 UnicodeString::operator+= (char16_t ch) 4667 { return doAppend(&ch, 0, 1); } 4668 4669 inline UnicodeString& 4670 UnicodeString::operator+= (UChar32 ch) { 4671 return append(ch); 4672 } 4673 4674 inline UnicodeString& 4675 UnicodeString::operator+= (const UnicodeString& srcText) 4676 { return doAppend(srcText, 0, srcText.length()); } 4677 4678 inline UnicodeString& 4679 UnicodeString::insert(int32_t start, 4680 const UnicodeString& srcText, 4681 int32_t srcStart, 4682 int32_t srcLength) 4683 { return doReplace(start, 0, srcText, srcStart, srcLength); } 4684 4685 inline UnicodeString& 4686 UnicodeString::insert(int32_t start, 4687 const UnicodeString& srcText) 4688 { return doReplace(start, 0, srcText, 0, srcText.length()); } 4689 4690 inline UnicodeString& 4691 UnicodeString::insert(int32_t start, 4692 const char16_t *srcChars, 4693 int32_t srcStart, 4694 int32_t srcLength) 4695 { return doReplace(start, 0, srcChars, srcStart, srcLength); } 4696 4697 inline UnicodeString& 4698 UnicodeString::insert(int32_t start, 4699 ConstChar16Ptr srcChars, 4700 int32_t srcLength) 4701 { return doReplace(start, 0, srcChars, 0, srcLength); } 4702 4703 inline UnicodeString& 4704 UnicodeString::insert(int32_t start, 4705 char16_t srcChar) 4706 { return doReplace(start, 0, &srcChar, 0, 1); } 4707 4708 inline UnicodeString& 4709 UnicodeString::insert(int32_t start, 4710 UChar32 srcChar) 4711 { return replace(start, 0, srcChar); } 4712 4713 4714 inline UnicodeString& 4715 UnicodeString::remove() 4716 { 4717 // remove() of a bogus string makes the string empty and non-bogus 4718 if(isBogus()) { 4719 setToEmpty(); 4720 } else { 4721 setZeroLength(); 4722 } 4723 return *this; 4724 } 4725 4726 inline UnicodeString& 4727 UnicodeString::remove(int32_t start, 4728 int32_t _length) 4729 { 4730 if(start <= 0 && _length == INT32_MAX) { 4731 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 4732 return remove(); 4733 } 4734 return doReplace(start, _length, NULL, 0, 0); 4735 } 4736 4737 inline UnicodeString& 4738 UnicodeString::removeBetween(int32_t start, 4739 int32_t limit) 4740 { return doReplace(start, limit - start, NULL, 0, 0); } 4741 4742 inline UnicodeString & 4743 UnicodeString::retainBetween(int32_t start, int32_t limit) { 4744 truncate(limit); 4745 return doReplace(0, start, NULL, 0, 0); 4746 } 4747 4748 inline UBool 4749 UnicodeString::truncate(int32_t targetLength) 4750 { 4751 if(isBogus() && targetLength == 0) { 4752 // truncate(0) of a bogus string makes the string empty and non-bogus 4753 unBogus(); 4754 return FALSE; 4755 } else if((uint32_t)targetLength < (uint32_t)length()) { 4756 setLength(targetLength); 4757 return TRUE; 4758 } else { 4759 return FALSE; 4760 } 4761 } 4762 4763 inline UnicodeString& 4764 UnicodeString::reverse() 4765 { return doReverse(0, length()); } 4766 4767 inline UnicodeString& 4768 UnicodeString::reverse(int32_t start, 4769 int32_t _length) 4770 { return doReverse(start, _length); } 4771 4772 U_NAMESPACE_END 4773 4774 #endif 4775