Home | History | Annotate | Download | only in unicode
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 1998-2015, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *
      7 * File unistr.h
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   09/25/98    stephen     Creation.
     13 *   11/11/98    stephen     Changed per 11/9 code review.
     14 *   04/20/99    stephen     Overhauled per 4/16 code review.
     15 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
     16 *                           handleReplaceBetween(); other methods unchanged.
     17 *   06/25/01    grhoten     Remove dependency on iostream.
     18 ******************************************************************************
     19 */
     20 
     21 #ifndef UNISTR_H
     22 #define UNISTR_H
     23 
     24 /**
     25  * \file
     26  * \brief C++ API: Unicode String
     27  */
     28 
     29 #include "unicode/utypes.h"
     30 #include "unicode/rep.h"
     31 #include "unicode/std_string.h"
     32 #include "unicode/stringpiece.h"
     33 #include "unicode/bytestream.h"
     34 #include "unicode/ucasemap.h"
     35 
     36 struct UConverter;          // unicode/ucnv.h
     37 
     38 #ifndef U_COMPARE_CODE_POINT_ORDER
     39 /* see also ustring.h and unorm.h */
     40 /**
     41  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
     42  * Compare strings in code point order instead of code unit order.
     43  * @stable ICU 2.2
     44  */
     45 #define U_COMPARE_CODE_POINT_ORDER  0x8000
     46 #endif
     47 
     48 #ifndef USTRING_H
     49 /**
     50  * \ingroup ustring_ustrlen
     51  */
     52 U_STABLE int32_t U_EXPORT2
     53 u_strlen(const UChar *s);
     54 #endif
     55 
     56 /**
     57  * \def U_STRING_CASE_MAPPER_DEFINED
     58  * @internal
     59  */
     60 #ifndef U_STRING_CASE_MAPPER_DEFINED
     61 #define U_STRING_CASE_MAPPER_DEFINED
     62 
     63 /**
     64  * Internal string case mapping function type.
     65  * @internal
     66  */
     67 typedef int32_t U_CALLCONV
     68 UStringCaseMapper(const UCaseMap *csm,
     69                   UChar *dest, int32_t destCapacity,
     70                   const UChar *src, int32_t srcLength,
     71                   UErrorCode *pErrorCode);
     72 
     73 #endif
     74 
     75 U_NAMESPACE_BEGIN
     76 
     77 class BreakIterator;        // unicode/brkiter.h
     78 class Locale;               // unicode/locid.h
     79 class StringCharacterIterator;
     80 class UnicodeStringAppendable;  // unicode/appendable.h
     81 
     82 /* The <iostream> include has been moved to unicode/ustream.h */
     83 
     84 /**
     85  * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
     86  * which constructs a Unicode string from an invariant-character char * string.
     87  * About invariant characters see utypes.h.
     88  * This constructor has no runtime dependency on conversion code and is
     89  * therefore recommended over ones taking a charset name string
     90  * (where the empty string "" indicates invariant-character conversion).
     91  *
     92  * @stable ICU 3.2
     93  */
     94 #define US_INV icu::UnicodeString::kInvariant
     95 
     96 /**
     97  * Unicode String literals in C++.
     98  * Dependent on the platform properties, different UnicodeString
     99  * constructors should be used to create a UnicodeString object from
    100  * a string literal.
    101  * The macros are defined for maximum performance.
    102  * They work only for strings that contain "invariant characters", i.e.,
    103  * only latin letters, digits, and some punctuation.
    104  * See utypes.h for details.
    105  *
    106  * The string parameter must be a C string literal.
    107  * The length of the string, not including the terminating
    108  * <code>NUL</code>, must be specified as a constant.
    109  * The U_STRING_DECL macro should be invoked exactly once for one
    110  * such string variable before it is used.
    111  * @stable ICU 2.0
    112  */
    113 #if defined(U_DECLARE_UTF16)
    114 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
    115 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
    116 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
    117 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
    118 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
    119 #else
    120 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
    121 #endif
    122 
    123 /**
    124  * Unicode String literals in C++.
    125  * Dependent on the platform properties, different UnicodeString
    126  * constructors should be used to create a UnicodeString object from
    127  * a string literal.
    128  * The macros are defined for improved performance.
    129  * They work only for strings that contain "invariant characters", i.e.,
    130  * only latin letters, digits, and some punctuation.
    131  * See utypes.h for details.
    132  *
    133  * The string parameter must be a C string literal.
    134  * @stable ICU 2.0
    135  */
    136 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
    137 
    138 /**
    139  * \def UNISTR_FROM_CHAR_EXPLICIT
    140  * This can be defined to be empty or "explicit".
    141  * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
    142  * constructors are marked as explicit, preventing their inadvertent use.
    143  * @stable ICU 49
    144  */
    145 #ifndef UNISTR_FROM_CHAR_EXPLICIT
    146 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
    147     // Auto-"explicit" in ICU library code.
    148 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
    149 # else
    150     // Empty by default for source code compatibility.
    151 #   define UNISTR_FROM_CHAR_EXPLICIT
    152 # endif
    153 #endif
    154 
    155 /**
    156  * \def UNISTR_FROM_STRING_EXPLICIT
    157  * This can be defined to be empty or "explicit".
    158  * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
    159  * constructors are marked as explicit, preventing their inadvertent use.
    160  *
    161  * In particular, this helps prevent accidentally depending on ICU conversion code
    162  * by passing a string literal into an API with a const UnicodeString & parameter.
    163  * @stable ICU 49
    164  */
    165 #ifndef UNISTR_FROM_STRING_EXPLICIT
    166 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
    167     // Auto-"explicit" in ICU library code.
    168 #   define UNISTR_FROM_STRING_EXPLICIT explicit
    169 # else
    170     // Empty by default for source code compatibility.
    171 #   define UNISTR_FROM_STRING_EXPLICIT
    172 # endif
    173 #endif
    174 
    175 /* Cannot make the following #ifndef U_HIDE_INTERNAL_API,
    176    it is used to construct other non-internal constants */
    177 /**
    178  * \def UNISTR_OBJECT_SIZE
    179  * Desired sizeof(UnicodeString) in bytes.
    180  * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
    181  * The object size may want to be a multiple of 16 bytes,
    182  * which is a common granularity for heap allocation.
    183  *
    184  * Any space inside the object beyond sizeof(vtable pointer) + 2
    185  * is available for storing short strings inside the object.
    186  * The bigger the object, the longer a string that can be stored inside the object,
    187  * without additional heap allocation.
    188  *
    189  * Depending on a platform's pointer size, pointer alignment requirements,
    190  * and struct padding, the compiler will usually round up sizeof(UnicodeString)
    191  * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
    192  * to hold the fields for heap-allocated strings.
    193  * Such a minimum size also ensures that the object is easily large enough
    194  * to hold at least 2 UChars, for one supplementary code point (U16_MAX_LENGTH).
    195  *
    196  * sizeof(UnicodeString) >= 48 should work for all known platforms.
    197  *
    198  * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
    199  * sizeof(UnicodeString) = 64 would leave space for
    200  * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
    201  * UChars stored inside the object.
    202  *
    203  * The minimum object size on a 64-bit machine would be
    204  * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
    205  * and the internal buffer would hold up to 11 UChars in that case.
    206  *
    207  * @see U16_MAX_LENGTH
    208  * @draft ICU 56
    209  */
    210 #ifndef UNISTR_OBJECT_SIZE
    211 # define UNISTR_OBJECT_SIZE 64
    212 #endif
    213 
    214 /**
    215  * UnicodeString is a string class that stores Unicode characters directly and provides
    216  * similar functionality as the Java String and StringBuffer/StringBuilder classes.
    217  * It is a concrete implementation of the abstract class Replaceable (for transliteration).
    218  *
    219  * A UnicodeString may also "alias" an external array of characters
    220  * (that is, point to it, rather than own the array)
    221  * whose lifetime must then at least match the lifetime of the aliasing object.
    222  * This aliasing may be preserved when returning a UnicodeString by value,
    223  * depending on the compiler and the function implementation,
    224  * via Return Value Optimization (RVO) or the move assignment operator.
    225  * (However, the copy assignment operator does not preserve aliasing.)
    226  * For details see the description of storage models at the end of the class API docs
    227  * and in the User Guide chapter linked from there.
    228  *
    229  * The UnicodeString class is not suitable for subclassing.
    230  *
    231  * <p>For an overview of Unicode strings in C and C++ see the
    232  * <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p>
    233  *
    234  * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
    235  * A Unicode character may be stored with either one code unit
    236  * (the most common case) or with a matched pair of special code units
    237  * ("surrogates"). The data type for code units is UChar.
    238  * For single-character handling, a Unicode character code <em>point</em> is a value
    239  * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
    240  *
    241  * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
    242  * This is the same as with multi-byte char* strings in traditional string handling.
    243  * Operations on partial strings typically do not test for code point boundaries.
    244  * If necessary, the user needs to take care of such boundaries by testing for the code unit
    245  * values or by using functions like
    246  * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
    247  * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
    248  *
    249  * UnicodeString methods are more lenient with regard to input parameter values
    250  * than other ICU APIs. In particular:
    251  * - If indexes are out of bounds for a UnicodeString object
    252  *   (<0 or >length()) then they are "pinned" to the nearest boundary.
    253  * - If primitive string pointer values (e.g., const UChar * or char *)
    254  *   for input strings are NULL, then those input string parameters are treated
    255  *   as if they pointed to an empty string.
    256  *   However, this is <em>not</em> the case for char * parameters for charset names
    257  *   or other IDs.
    258  * - Most UnicodeString methods do not take a UErrorCode parameter because
    259  *   there are usually very few opportunities for failure other than a shortage
    260  *   of memory, error codes in low-level C++ string methods would be inconvenient,
    261  *   and the error code as the last parameter (ICU convention) would prevent
    262  *   the use of default parameter values.
    263  *   Instead, such methods set the UnicodeString into a "bogus" state
    264  *   (see isBogus()) if an error occurs.
    265  *
    266  * In string comparisons, two UnicodeString objects that are both "bogus"
    267  * compare equal (to be transitive and prevent endless loops in sorting),
    268  * and a "bogus" string compares less than any non-"bogus" one.
    269  *
    270  * Const UnicodeString methods are thread-safe. Multiple threads can use
    271  * const methods on the same UnicodeString object simultaneously,
    272  * but non-const methods must not be called concurrently (in multiple threads)
    273  * with any other (const or non-const) methods.
    274  *
    275  * Similarly, const UnicodeString & parameters are thread-safe.
    276  * One object may be passed in as such a parameter concurrently in multiple threads.
    277  * This includes the const UnicodeString & parameters for
    278  * copy construction, assignment, and cloning.
    279  *
    280  * <p>UnicodeString uses several storage methods.
    281  * String contents can be stored inside the UnicodeString object itself,
    282  * in an allocated and shared buffer, or in an outside buffer that is "aliased".
    283  * Most of this is done transparently, but careful aliasing in particular provides
    284  * significant performance improvements.
    285  * Also, the internal buffer is accessible via special functions.
    286  * For details see the
    287  * <a href="http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model">User Guide Strings chapter</a>.</p>
    288  *
    289  * @see utf.h
    290  * @see CharacterIterator
    291  * @stable ICU 2.0
    292  */
    293 class U_COMMON_API UnicodeString : public Replaceable
    294 {
    295 public:
    296 
    297   /**
    298    * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
    299    * which constructs a Unicode string from an invariant-character char * string.
    300    * Use the macro US_INV instead of the full qualification for this value.
    301    *
    302    * @see US_INV
    303    * @stable ICU 3.2
    304    */
    305   enum EInvariant {
    306     /**
    307      * @see EInvariant
    308      * @stable ICU 3.2
    309      */
    310     kInvariant
    311   };
    312 
    313   //========================================
    314   // Read-only operations
    315   //========================================
    316 
    317   /* Comparison - bitwise only - for international comparison use collation */
    318 
    319   /**
    320    * Equality operator. Performs only bitwise comparison.
    321    * @param text The UnicodeString to compare to this one.
    322    * @return TRUE if <TT>text</TT> contains the same characters as this one,
    323    * FALSE otherwise.
    324    * @stable ICU 2.0
    325    */
    326   inline UBool operator== (const UnicodeString& text) const;
    327 
    328   /**
    329    * Inequality operator. Performs only bitwise comparison.
    330    * @param text The UnicodeString to compare to this one.
    331    * @return FALSE if <TT>text</TT> contains the same characters as this one,
    332    * TRUE otherwise.
    333    * @stable ICU 2.0
    334    */
    335   inline UBool operator!= (const UnicodeString& text) const;
    336 
    337   /**
    338    * Greater than operator. Performs only bitwise comparison.
    339    * @param text The UnicodeString to compare to this one.
    340    * @return TRUE if the characters in this are bitwise
    341    * greater than the characters in <code>text</code>, FALSE otherwise
    342    * @stable ICU 2.0
    343    */
    344   inline UBool operator> (const UnicodeString& text) const;
    345 
    346   /**
    347    * Less than operator. Performs only bitwise comparison.
    348    * @param text The UnicodeString to compare to this one.
    349    * @return TRUE if the characters in this are bitwise
    350    * less than the characters in <code>text</code>, FALSE otherwise
    351    * @stable ICU 2.0
    352    */
    353   inline UBool operator< (const UnicodeString& text) const;
    354 
    355   /**
    356    * Greater than or equal operator. Performs only bitwise comparison.
    357    * @param text The UnicodeString to compare to this one.
    358    * @return TRUE if the characters in this are bitwise
    359    * greater than or equal to the characters in <code>text</code>, FALSE otherwise
    360    * @stable ICU 2.0
    361    */
    362   inline UBool operator>= (const UnicodeString& text) const;
    363 
    364   /**
    365    * Less than or equal operator. Performs only bitwise comparison.
    366    * @param text The UnicodeString to compare to this one.
    367    * @return TRUE if the characters in this are bitwise
    368    * less than or equal to the characters in <code>text</code>, FALSE otherwise
    369    * @stable ICU 2.0
    370    */
    371   inline UBool operator<= (const UnicodeString& text) const;
    372 
    373   /**
    374    * Compare the characters bitwise in this UnicodeString to
    375    * the characters in <code>text</code>.
    376    * @param text The UnicodeString to compare to this one.
    377    * @return The result of bitwise character comparison: 0 if this
    378    * contains the same characters as <code>text</code>, -1 if the characters in
    379    * this are bitwise less than the characters in <code>text</code>, +1 if the
    380    * characters in this are bitwise greater than the characters
    381    * in <code>text</code>.
    382    * @stable ICU 2.0
    383    */
    384   inline int8_t compare(const UnicodeString& text) const;
    385 
    386   /**
    387    * Compare the characters bitwise in the range
    388    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
    389    * in the <b>entire string</b> <TT>text</TT>.
    390    * (The parameters "start" and "length" are not applied to the other text "text".)
    391    * @param start the offset at which the compare operation begins
    392    * @param length the number of characters of text to compare.
    393    * @param text the other text to be compared against this string.
    394    * @return The result of bitwise character comparison: 0 if this
    395    * contains the same characters as <code>text</code>, -1 if the characters in
    396    * this are bitwise less than the characters in <code>text</code>, +1 if the
    397    * characters in this are bitwise greater than the characters
    398    * in <code>text</code>.
    399    * @stable ICU 2.0
    400    */
    401   inline int8_t compare(int32_t start,
    402          int32_t length,
    403          const UnicodeString& text) const;
    404 
    405   /**
    406    * Compare the characters bitwise in the range
    407    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
    408    * in <TT>srcText</TT> in the range
    409    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
    410    * @param start the offset at which the compare operation begins
    411    * @param length the number of characters in this to compare.
    412    * @param srcText the text to be compared
    413    * @param srcStart the offset into <TT>srcText</TT> to start comparison
    414    * @param srcLength the number of characters in <TT>src</TT> to compare
    415    * @return The result of bitwise character comparison: 0 if this
    416    * contains the same characters as <code>srcText</code>, -1 if the characters in
    417    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
    418    * characters in this are bitwise greater than the characters
    419    * in <code>srcText</code>.
    420    * @stable ICU 2.0
    421    */
    422    inline int8_t compare(int32_t start,
    423          int32_t length,
    424          const UnicodeString& srcText,
    425          int32_t srcStart,
    426          int32_t srcLength) const;
    427 
    428   /**
    429    * Compare the characters bitwise in this UnicodeString with the first
    430    * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
    431    * @param srcChars The characters to compare to this UnicodeString.
    432    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
    433    * @return The result of bitwise character comparison: 0 if this
    434    * contains the same characters as <code>srcChars</code>, -1 if the characters in
    435    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
    436    * characters in this are bitwise greater than the characters
    437    * in <code>srcChars</code>.
    438    * @stable ICU 2.0
    439    */
    440   inline int8_t compare(const UChar *srcChars,
    441          int32_t srcLength) const;
    442 
    443   /**
    444    * Compare the characters bitwise in the range
    445    * [<TT>start</TT>, <TT>start + length</TT>) with the first
    446    * <TT>length</TT> characters in <TT>srcChars</TT>
    447    * @param start the offset at which the compare operation begins
    448    * @param length the number of characters to compare.
    449    * @param srcChars the characters to be compared
    450    * @return The result of bitwise character comparison: 0 if this
    451    * contains the same characters as <code>srcChars</code>, -1 if the characters in
    452    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
    453    * characters in this are bitwise greater than the characters
    454    * in <code>srcChars</code>.
    455    * @stable ICU 2.0
    456    */
    457   inline int8_t compare(int32_t start,
    458          int32_t length,
    459          const UChar *srcChars) const;
    460 
    461   /**
    462    * Compare the characters bitwise in the range
    463    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
    464    * in <TT>srcChars</TT> in the range
    465    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
    466    * @param start the offset at which the compare operation begins
    467    * @param length the number of characters in this to compare
    468    * @param srcChars the characters to be compared
    469    * @param srcStart the offset into <TT>srcChars</TT> to start comparison
    470    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
    471    * @return The result of bitwise character comparison: 0 if this
    472    * contains the same characters as <code>srcChars</code>, -1 if the characters in
    473    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
    474    * characters in this are bitwise greater than the characters
    475    * in <code>srcChars</code>.
    476    * @stable ICU 2.0
    477    */
    478   inline int8_t compare(int32_t start,
    479          int32_t length,
    480          const UChar *srcChars,
    481          int32_t srcStart,
    482          int32_t srcLength) const;
    483 
    484   /**
    485    * Compare the characters bitwise in the range
    486    * [<TT>start</TT>, <TT>limit</TT>) with the characters
    487    * in <TT>srcText</TT> in the range
    488    * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
    489    * @param start the offset at which the compare operation begins
    490    * @param limit the offset immediately following the compare operation
    491    * @param srcText the text to be compared
    492    * @param srcStart the offset into <TT>srcText</TT> to start comparison
    493    * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
    494    * @return The result of bitwise character comparison: 0 if this
    495    * contains the same characters as <code>srcText</code>, -1 if the characters in
    496    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
    497    * characters in this are bitwise greater than the characters
    498    * in <code>srcText</code>.
    499    * @stable ICU 2.0
    500    */
    501   inline int8_t compareBetween(int32_t start,
    502             int32_t limit,
    503             const UnicodeString& srcText,
    504             int32_t srcStart,
    505             int32_t srcLimit) const;
    506 
    507   /**
    508    * Compare two Unicode strings in code point order.
    509    * The result may be different from the results of compare(), operator<, etc.
    510    * if supplementary characters are present:
    511    *
    512    * In UTF-16, supplementary characters (with code points U+10000 and above) are
    513    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    514    * which means that they compare as less than some other BMP characters like U+feff.
    515    * This function compares Unicode strings in code point order.
    516    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    517    *
    518    * @param text Another string to compare this one to.
    519    * @return a negative/zero/positive integer corresponding to whether
    520    * this string is less than/equal to/greater than the second one
    521    * in code point order
    522    * @stable ICU 2.0
    523    */
    524   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
    525 
    526   /**
    527    * Compare two Unicode strings in code point order.
    528    * The result may be different from the results of compare(), operator<, etc.
    529    * if supplementary characters are present:
    530    *
    531    * In UTF-16, supplementary characters (with code points U+10000 and above) are
    532    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    533    * which means that they compare as less than some other BMP characters like U+feff.
    534    * This function compares Unicode strings in code point order.
    535    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    536    *
    537    * @param start The start offset in this string at which the compare operation begins.
    538    * @param length The number of code units from this string to compare.
    539    * @param srcText Another string to compare this one to.
    540    * @return a negative/zero/positive integer corresponding to whether
    541    * this string is less than/equal to/greater than the second one
    542    * in code point order
    543    * @stable ICU 2.0
    544    */
    545   inline int8_t compareCodePointOrder(int32_t start,
    546                                       int32_t length,
    547                                       const UnicodeString& srcText) const;
    548 
    549   /**
    550    * Compare two Unicode strings in code point order.
    551    * The result may be different from the results of compare(), operator<, etc.
    552    * if supplementary characters are present:
    553    *
    554    * In UTF-16, supplementary characters (with code points U+10000 and above) are
    555    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    556    * which means that they compare as less than some other BMP characters like U+feff.
    557    * This function compares Unicode strings in code point order.
    558    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    559    *
    560    * @param start The start offset in this string at which the compare operation begins.
    561    * @param length The number of code units from this string to compare.
    562    * @param srcText Another string to compare this one to.
    563    * @param srcStart The start offset in that string at which the compare operation begins.
    564    * @param srcLength The number of code units from that string to compare.
    565    * @return a negative/zero/positive integer corresponding to whether
    566    * this string is less than/equal to/greater than the second one
    567    * in code point order
    568    * @stable ICU 2.0
    569    */
    570    inline int8_t compareCodePointOrder(int32_t start,
    571                                        int32_t length,
    572                                        const UnicodeString& srcText,
    573                                        int32_t srcStart,
    574                                        int32_t srcLength) const;
    575 
    576   /**
    577    * Compare two Unicode strings in code point order.
    578    * The result may be different from the results of compare(), operator<, etc.
    579    * if supplementary characters are present:
    580    *
    581    * In UTF-16, supplementary characters (with code points U+10000 and above) are
    582    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    583    * which means that they compare as less than some other BMP characters like U+feff.
    584    * This function compares Unicode strings in code point order.
    585    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    586    *
    587    * @param srcChars A pointer to another string to compare this one to.
    588    * @param srcLength The number of code units from that string to compare.
    589    * @return a negative/zero/positive integer corresponding to whether
    590    * this string is less than/equal to/greater than the second one
    591    * in code point order
    592    * @stable ICU 2.0
    593    */
    594   inline int8_t compareCodePointOrder(const UChar *srcChars,
    595                                       int32_t srcLength) const;
    596 
    597   /**
    598    * Compare two Unicode strings in code point order.
    599    * The result may be different from the results of compare(), operator<, etc.
    600    * if supplementary characters are present:
    601    *
    602    * In UTF-16, supplementary characters (with code points U+10000 and above) are
    603    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    604    * which means that they compare as less than some other BMP characters like U+feff.
    605    * This function compares Unicode strings in code point order.
    606    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    607    *
    608    * @param start The start offset in this string at which the compare operation begins.
    609    * @param length The number of code units from this string to compare.
    610    * @param srcChars A pointer to another string to compare this one to.
    611    * @return a negative/zero/positive integer corresponding to whether
    612    * this string is less than/equal to/greater than the second one
    613    * in code point order
    614    * @stable ICU 2.0
    615    */
    616   inline int8_t compareCodePointOrder(int32_t start,
    617                                       int32_t length,
    618                                       const UChar *srcChars) const;
    619 
    620   /**
    621    * Compare two Unicode strings in code point order.
    622    * The result may be different from the results of compare(), operator<, etc.
    623    * if supplementary characters are present:
    624    *
    625    * In UTF-16, supplementary characters (with code points U+10000 and above) are
    626    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    627    * which means that they compare as less than some other BMP characters like U+feff.
    628    * This function compares Unicode strings in code point order.
    629    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    630    *
    631    * @param start The start offset in this string at which the compare operation begins.
    632    * @param length The number of code units from this string to compare.
    633    * @param srcChars A pointer to another string to compare this one to.
    634    * @param srcStart The start offset in that string at which the compare operation begins.
    635    * @param srcLength The number of code units from that string to compare.
    636    * @return a negative/zero/positive integer corresponding to whether
    637    * this string is less than/equal to/greater than the second one
    638    * in code point order
    639    * @stable ICU 2.0
    640    */
    641   inline int8_t compareCodePointOrder(int32_t start,
    642                                       int32_t length,
    643                                       const UChar *srcChars,
    644                                       int32_t srcStart,
    645                                       int32_t srcLength) const;
    646 
    647   /**
    648    * Compare two Unicode strings in code point order.
    649    * The result may be different from the results of compare(), operator<, etc.
    650    * if supplementary characters are present:
    651    *
    652    * In UTF-16, supplementary characters (with code points U+10000 and above) are
    653    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    654    * which means that they compare as less than some other BMP characters like U+feff.
    655    * This function compares Unicode strings in code point order.
    656    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    657    *
    658    * @param start The start offset in this string at which the compare operation begins.
    659    * @param limit The offset after the last code unit from this string to compare.
    660    * @param srcText Another string to compare this one to.
    661    * @param srcStart The start offset in that string at which the compare operation begins.
    662    * @param srcLimit The offset after the last code unit from that string to compare.
    663    * @return a negative/zero/positive integer corresponding to whether
    664    * this string is less than/equal to/greater than the second one
    665    * in code point order
    666    * @stable ICU 2.0
    667    */
    668   inline int8_t compareCodePointOrderBetween(int32_t start,
    669                                              int32_t limit,
    670                                              const UnicodeString& srcText,
    671                                              int32_t srcStart,
    672                                              int32_t srcLimit) const;
    673 
    674   /**
    675    * Compare two strings case-insensitively using full case folding.
    676    * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
    677    *
    678    * @param text Another string to compare this one to.
    679    * @param options A bit set of options:
    680    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    681    *     Comparison in code unit order with default case folding.
    682    *
    683    *   - U_COMPARE_CODE_POINT_ORDER
    684    *     Set to choose code point order instead of code unit order
    685    *     (see u_strCompare for details).
    686    *
    687    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    688    *
    689    * @return A negative, zero, or positive integer indicating the comparison result.
    690    * @stable ICU 2.0
    691    */
    692   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
    693 
    694   /**
    695    * Compare two strings case-insensitively using full case folding.
    696    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
    697    *
    698    * @param start The start offset in this string at which the compare operation begins.
    699    * @param length The number of code units from this string to compare.
    700    * @param srcText Another string to compare this one to.
    701    * @param options A bit set of options:
    702    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    703    *     Comparison in code unit order with default case folding.
    704    *
    705    *   - U_COMPARE_CODE_POINT_ORDER
    706    *     Set to choose code point order instead of code unit order
    707    *     (see u_strCompare for details).
    708    *
    709    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    710    *
    711    * @return A negative, zero, or positive integer indicating the comparison result.
    712    * @stable ICU 2.0
    713    */
    714   inline int8_t caseCompare(int32_t start,
    715          int32_t length,
    716          const UnicodeString& srcText,
    717          uint32_t options) const;
    718 
    719   /**
    720    * Compare two strings case-insensitively using full case folding.
    721    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
    722    *
    723    * @param start The start offset in this string at which the compare operation begins.
    724    * @param length The number of code units from this string to compare.
    725    * @param srcText Another string to compare this one to.
    726    * @param srcStart The start offset in that string at which the compare operation begins.
    727    * @param srcLength The number of code units from that string to compare.
    728    * @param options A bit set of options:
    729    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    730    *     Comparison in code unit order with default case folding.
    731    *
    732    *   - U_COMPARE_CODE_POINT_ORDER
    733    *     Set to choose code point order instead of code unit order
    734    *     (see u_strCompare for details).
    735    *
    736    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    737    *
    738    * @return A negative, zero, or positive integer indicating the comparison result.
    739    * @stable ICU 2.0
    740    */
    741   inline int8_t caseCompare(int32_t start,
    742          int32_t length,
    743          const UnicodeString& srcText,
    744          int32_t srcStart,
    745          int32_t srcLength,
    746          uint32_t options) const;
    747 
    748   /**
    749    * Compare two strings case-insensitively using full case folding.
    750    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
    751    *
    752    * @param srcChars A pointer to another string to compare this one to.
    753    * @param srcLength The number of code units from that string to compare.
    754    * @param options A bit set of options:
    755    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    756    *     Comparison in code unit order with default case folding.
    757    *
    758    *   - U_COMPARE_CODE_POINT_ORDER
    759    *     Set to choose code point order instead of code unit order
    760    *     (see u_strCompare for details).
    761    *
    762    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    763    *
    764    * @return A negative, zero, or positive integer indicating the comparison result.
    765    * @stable ICU 2.0
    766    */
    767   inline int8_t caseCompare(const UChar *srcChars,
    768          int32_t srcLength,
    769          uint32_t options) const;
    770 
    771   /**
    772    * Compare two strings case-insensitively using full case folding.
    773    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
    774    *
    775    * @param start The start offset in this string at which the compare operation begins.
    776    * @param length The number of code units from this string to compare.
    777    * @param srcChars A pointer to another string to compare this one to.
    778    * @param options A bit set of options:
    779    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    780    *     Comparison in code unit order with default case folding.
    781    *
    782    *   - U_COMPARE_CODE_POINT_ORDER
    783    *     Set to choose code point order instead of code unit order
    784    *     (see u_strCompare for details).
    785    *
    786    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    787    *
    788    * @return A negative, zero, or positive integer indicating the comparison result.
    789    * @stable ICU 2.0
    790    */
    791   inline int8_t caseCompare(int32_t start,
    792          int32_t length,
    793          const UChar *srcChars,
    794          uint32_t options) const;
    795 
    796   /**
    797    * Compare two strings case-insensitively using full case folding.
    798    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
    799    *
    800    * @param start The start offset in this string at which the compare operation begins.
    801    * @param length The number of code units from this string to compare.
    802    * @param srcChars A pointer to another string to compare this one to.
    803    * @param srcStart The start offset in that string at which the compare operation begins.
    804    * @param srcLength The number of code units from that string to compare.
    805    * @param options A bit set of options:
    806    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    807    *     Comparison in code unit order with default case folding.
    808    *
    809    *   - U_COMPARE_CODE_POINT_ORDER
    810    *     Set to choose code point order instead of code unit order
    811    *     (see u_strCompare for details).
    812    *
    813    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    814    *
    815    * @return A negative, zero, or positive integer indicating the comparison result.
    816    * @stable ICU 2.0
    817    */
    818   inline int8_t caseCompare(int32_t start,
    819          int32_t length,
    820          const UChar *srcChars,
    821          int32_t srcStart,
    822          int32_t srcLength,
    823          uint32_t options) const;
    824 
    825   /**
    826    * Compare two strings case-insensitively using full case folding.
    827    * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
    828    *
    829    * @param start The start offset in this string at which the compare operation begins.
    830    * @param limit The offset after the last code unit from this string to compare.
    831    * @param srcText Another string to compare this one to.
    832    * @param srcStart The start offset in that string at which the compare operation begins.
    833    * @param srcLimit The offset after the last code unit from that string to compare.
    834    * @param options A bit set of options:
    835    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    836    *     Comparison in code unit order with default case folding.
    837    *
    838    *   - U_COMPARE_CODE_POINT_ORDER
    839    *     Set to choose code point order instead of code unit order
    840    *     (see u_strCompare for details).
    841    *
    842    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    843    *
    844    * @return A negative, zero, or positive integer indicating the comparison result.
    845    * @stable ICU 2.0
    846    */
    847   inline int8_t caseCompareBetween(int32_t start,
    848             int32_t limit,
    849             const UnicodeString& srcText,
    850             int32_t srcStart,
    851             int32_t srcLimit,
    852             uint32_t options) const;
    853 
    854   /**
    855    * Determine if this starts with the characters in <TT>text</TT>
    856    * @param text The text to match.
    857    * @return TRUE if this starts with the characters in <TT>text</TT>,
    858    * FALSE otherwise
    859    * @stable ICU 2.0
    860    */
    861   inline UBool startsWith(const UnicodeString& text) const;
    862 
    863   /**
    864    * Determine if this starts with the characters in <TT>srcText</TT>
    865    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
    866    * @param srcText The text to match.
    867    * @param srcStart the offset into <TT>srcText</TT> to start matching
    868    * @param srcLength the number of characters in <TT>srcText</TT> to match
    869    * @return TRUE if this starts with the characters in <TT>text</TT>,
    870    * FALSE otherwise
    871    * @stable ICU 2.0
    872    */
    873   inline UBool startsWith(const UnicodeString& srcText,
    874             int32_t srcStart,
    875             int32_t srcLength) const;
    876 
    877   /**
    878    * Determine if this starts with the characters in <TT>srcChars</TT>
    879    * @param srcChars The characters to match.
    880    * @param srcLength the number of characters in <TT>srcChars</TT>
    881    * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
    882    * FALSE otherwise
    883    * @stable ICU 2.0
    884    */
    885   inline UBool startsWith(const UChar *srcChars,
    886             int32_t srcLength) const;
    887 
    888   /**
    889    * Determine if this ends with the characters in <TT>srcChars</TT>
    890    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
    891    * @param srcChars The characters to match.
    892    * @param srcStart the offset into <TT>srcText</TT> to start matching
    893    * @param srcLength the number of characters in <TT>srcChars</TT> to match
    894    * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
    895    * @stable ICU 2.0
    896    */
    897   inline UBool startsWith(const UChar *srcChars,
    898             int32_t srcStart,
    899             int32_t srcLength) const;
    900 
    901   /**
    902    * Determine if this ends with the characters in <TT>text</TT>
    903    * @param text The text to match.
    904    * @return TRUE if this ends with the characters in <TT>text</TT>,
    905    * FALSE otherwise
    906    * @stable ICU 2.0
    907    */
    908   inline UBool endsWith(const UnicodeString& text) const;
    909 
    910   /**
    911    * Determine if this ends with the characters in <TT>srcText</TT>
    912    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
    913    * @param srcText The text to match.
    914    * @param srcStart the offset into <TT>srcText</TT> to start matching
    915    * @param srcLength the number of characters in <TT>srcText</TT> to match
    916    * @return TRUE if this ends with the characters in <TT>text</TT>,
    917    * FALSE otherwise
    918    * @stable ICU 2.0
    919    */
    920   inline UBool endsWith(const UnicodeString& srcText,
    921           int32_t srcStart,
    922           int32_t srcLength) const;
    923 
    924   /**
    925    * Determine if this ends with the characters in <TT>srcChars</TT>
    926    * @param srcChars The characters to match.
    927    * @param srcLength the number of characters in <TT>srcChars</TT>
    928    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
    929    * FALSE otherwise
    930    * @stable ICU 2.0
    931    */
    932   inline UBool endsWith(const UChar *srcChars,
    933           int32_t srcLength) const;
    934 
    935   /**
    936    * Determine if this ends with the characters in <TT>srcChars</TT>
    937    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
    938    * @param srcChars The characters to match.
    939    * @param srcStart the offset into <TT>srcText</TT> to start matching
    940    * @param srcLength the number of characters in <TT>srcChars</TT> to match
    941    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
    942    * FALSE otherwise
    943    * @stable ICU 2.0
    944    */
    945   inline UBool endsWith(const UChar *srcChars,
    946           int32_t srcStart,
    947           int32_t srcLength) const;
    948 
    949 
    950   /* Searching - bitwise only */
    951 
    952   /**
    953    * Locate in this the first occurrence of the characters in <TT>text</TT>,
    954    * using bitwise comparison.
    955    * @param text The text to search for.
    956    * @return The offset into this of the start of <TT>text</TT>,
    957    * or -1 if not found.
    958    * @stable ICU 2.0
    959    */
    960   inline int32_t indexOf(const UnicodeString& text) const;
    961 
    962   /**
    963    * Locate in this the first occurrence of the characters in <TT>text</TT>
    964    * starting at offset <TT>start</TT>, using bitwise comparison.
    965    * @param text The text to search for.
    966    * @param start The offset at which searching will start.
    967    * @return The offset into this of the start of <TT>text</TT>,
    968    * or -1 if not found.
    969    * @stable ICU 2.0
    970    */
    971   inline int32_t indexOf(const UnicodeString& text,
    972               int32_t start) const;
    973 
    974   /**
    975    * Locate in this the first occurrence in the range
    976    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
    977    * in <TT>text</TT>, using bitwise comparison.
    978    * @param text The text to search for.
    979    * @param start The offset at which searching will start.
    980    * @param length The number of characters to search
    981    * @return The offset into this of the start of <TT>text</TT>,
    982    * or -1 if not found.
    983    * @stable ICU 2.0
    984    */
    985   inline int32_t indexOf(const UnicodeString& text,
    986               int32_t start,
    987               int32_t length) const;
    988 
    989   /**
    990    * Locate in this the first occurrence in the range
    991    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
    992    *  in <TT>srcText</TT> in the range
    993    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
    994    * using bitwise comparison.
    995    * @param srcText The text to search for.
    996    * @param srcStart the offset into <TT>srcText</TT> at which
    997    * to start matching
    998    * @param srcLength the number of characters in <TT>srcText</TT> to match
    999    * @param start the offset into this at which to start matching
   1000    * @param length the number of characters in this to search
   1001    * @return The offset into this of the start of <TT>text</TT>,
   1002    * or -1 if not found.
   1003    * @stable ICU 2.0
   1004    */
   1005   inline int32_t indexOf(const UnicodeString& srcText,
   1006               int32_t srcStart,
   1007               int32_t srcLength,
   1008               int32_t start,
   1009               int32_t length) const;
   1010 
   1011   /**
   1012    * Locate in this the first occurrence of the characters in
   1013    * <TT>srcChars</TT>
   1014    * starting at offset <TT>start</TT>, using bitwise comparison.
   1015    * @param srcChars The text to search for.
   1016    * @param srcLength the number of characters in <TT>srcChars</TT> to match
   1017    * @param start the offset into this at which to start matching
   1018    * @return The offset into this of the start of <TT>text</TT>,
   1019    * or -1 if not found.
   1020    * @stable ICU 2.0
   1021    */
   1022   inline int32_t indexOf(const UChar *srcChars,
   1023               int32_t srcLength,
   1024               int32_t start) const;
   1025 
   1026   /**
   1027    * Locate in this the first occurrence in the range
   1028    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   1029    * in <TT>srcChars</TT>, using bitwise comparison.
   1030    * @param srcChars The text to search for.
   1031    * @param srcLength the number of characters in <TT>srcChars</TT>
   1032    * @param start The offset at which searching will start.
   1033    * @param length The number of characters to search
   1034    * @return The offset into this of the start of <TT>srcChars</TT>,
   1035    * or -1 if not found.
   1036    * @stable ICU 2.0
   1037    */
   1038   inline int32_t indexOf(const UChar *srcChars,
   1039               int32_t srcLength,
   1040               int32_t start,
   1041               int32_t length) const;
   1042 
   1043   /**
   1044    * Locate in this the first occurrence in the range
   1045    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   1046    * in <TT>srcChars</TT> in the range
   1047    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
   1048    * using bitwise comparison.
   1049    * @param srcChars The text to search for.
   1050    * @param srcStart the offset into <TT>srcChars</TT> at which
   1051    * to start matching
   1052    * @param srcLength the number of characters in <TT>srcChars</TT> to match
   1053    * @param start the offset into this at which to start matching
   1054    * @param length the number of characters in this to search
   1055    * @return The offset into this of the start of <TT>text</TT>,
   1056    * or -1 if not found.
   1057    * @stable ICU 2.0
   1058    */
   1059   int32_t indexOf(const UChar *srcChars,
   1060               int32_t srcStart,
   1061               int32_t srcLength,
   1062               int32_t start,
   1063               int32_t length) const;
   1064 
   1065   /**
   1066    * Locate in this the first occurrence of the BMP code point <code>c</code>,
   1067    * using bitwise comparison.
   1068    * @param c The code unit to search for.
   1069    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1070    * @stable ICU 2.0
   1071    */
   1072   inline int32_t indexOf(UChar c) const;
   1073 
   1074   /**
   1075    * Locate in this the first occurrence of the code point <TT>c</TT>,
   1076    * using bitwise comparison.
   1077    *
   1078    * @param c The code point to search for.
   1079    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1080    * @stable ICU 2.0
   1081    */
   1082   inline int32_t indexOf(UChar32 c) const;
   1083 
   1084   /**
   1085    * Locate in this the first occurrence of the BMP code point <code>c</code>,
   1086    * starting at offset <TT>start</TT>, using bitwise comparison.
   1087    * @param c The code unit to search for.
   1088    * @param start The offset at which searching will start.
   1089    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1090    * @stable ICU 2.0
   1091    */
   1092   inline int32_t indexOf(UChar c,
   1093               int32_t start) const;
   1094 
   1095   /**
   1096    * Locate in this the first occurrence of the code point <TT>c</TT>
   1097    * starting at offset <TT>start</TT>, using bitwise comparison.
   1098    *
   1099    * @param c The code point to search for.
   1100    * @param start The offset at which searching will start.
   1101    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1102    * @stable ICU 2.0
   1103    */
   1104   inline int32_t indexOf(UChar32 c,
   1105               int32_t start) const;
   1106 
   1107   /**
   1108    * Locate in this the first occurrence of the BMP code point <code>c</code>
   1109    * in the range [<TT>start</TT>, <TT>start + length</TT>),
   1110    * using bitwise comparison.
   1111    * @param c The code unit to search for.
   1112    * @param start the offset into this at which to start matching
   1113    * @param length the number of characters in this to search
   1114    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1115    * @stable ICU 2.0
   1116    */
   1117   inline int32_t indexOf(UChar c,
   1118               int32_t start,
   1119               int32_t length) const;
   1120 
   1121   /**
   1122    * Locate in this the first occurrence of the code point <TT>c</TT>
   1123    * in the range [<TT>start</TT>, <TT>start + length</TT>),
   1124    * using bitwise comparison.
   1125    *
   1126    * @param c The code point to search for.
   1127    * @param start the offset into this at which to start matching
   1128    * @param length the number of characters in this to search
   1129    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1130    * @stable ICU 2.0
   1131    */
   1132   inline int32_t indexOf(UChar32 c,
   1133               int32_t start,
   1134               int32_t length) const;
   1135 
   1136   /**
   1137    * Locate in this the last occurrence of the characters in <TT>text</TT>,
   1138    * using bitwise comparison.
   1139    * @param text The text to search for.
   1140    * @return The offset into this of the start of <TT>text</TT>,
   1141    * or -1 if not found.
   1142    * @stable ICU 2.0
   1143    */
   1144   inline int32_t lastIndexOf(const UnicodeString& text) const;
   1145 
   1146   /**
   1147    * Locate in this the last occurrence of the characters in <TT>text</TT>
   1148    * starting at offset <TT>start</TT>, using bitwise comparison.
   1149    * @param text The text to search for.
   1150    * @param start The offset at which searching will start.
   1151    * @return The offset into this of the start of <TT>text</TT>,
   1152    * or -1 if not found.
   1153    * @stable ICU 2.0
   1154    */
   1155   inline int32_t lastIndexOf(const UnicodeString& text,
   1156               int32_t start) const;
   1157 
   1158   /**
   1159    * Locate in this the last occurrence in the range
   1160    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   1161    * in <TT>text</TT>, using bitwise comparison.
   1162    * @param text The text to search for.
   1163    * @param start The offset at which searching will start.
   1164    * @param length The number of characters to search
   1165    * @return The offset into this of the start of <TT>text</TT>,
   1166    * or -1 if not found.
   1167    * @stable ICU 2.0
   1168    */
   1169   inline int32_t lastIndexOf(const UnicodeString& text,
   1170               int32_t start,
   1171               int32_t length) const;
   1172 
   1173   /**
   1174    * Locate in this the last occurrence in the range
   1175    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   1176    * in <TT>srcText</TT> in the range
   1177    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
   1178    * using bitwise comparison.
   1179    * @param srcText The text to search for.
   1180    * @param srcStart the offset into <TT>srcText</TT> at which
   1181    * to start matching
   1182    * @param srcLength the number of characters in <TT>srcText</TT> to match
   1183    * @param start the offset into this at which to start matching
   1184    * @param length the number of characters in this to search
   1185    * @return The offset into this of the start of <TT>text</TT>,
   1186    * or -1 if not found.
   1187    * @stable ICU 2.0
   1188    */
   1189   inline int32_t lastIndexOf(const UnicodeString& srcText,
   1190               int32_t srcStart,
   1191               int32_t srcLength,
   1192               int32_t start,
   1193               int32_t length) const;
   1194 
   1195   /**
   1196    * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
   1197    * starting at offset <TT>start</TT>, using bitwise comparison.
   1198    * @param srcChars The text to search for.
   1199    * @param srcLength the number of characters in <TT>srcChars</TT> to match
   1200    * @param start the offset into this at which to start matching
   1201    * @return The offset into this of the start of <TT>text</TT>,
   1202    * or -1 if not found.
   1203    * @stable ICU 2.0
   1204    */
   1205   inline int32_t lastIndexOf(const UChar *srcChars,
   1206               int32_t srcLength,
   1207               int32_t start) const;
   1208 
   1209   /**
   1210    * Locate in this the last occurrence in the range
   1211    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   1212    * in <TT>srcChars</TT>, using bitwise comparison.
   1213    * @param srcChars The text to search for.
   1214    * @param srcLength the number of characters in <TT>srcChars</TT>
   1215    * @param start The offset at which searching will start.
   1216    * @param length The number of characters to search
   1217    * @return The offset into this of the start of <TT>srcChars</TT>,
   1218    * or -1 if not found.
   1219    * @stable ICU 2.0
   1220    */
   1221   inline int32_t lastIndexOf(const UChar *srcChars,
   1222               int32_t srcLength,
   1223               int32_t start,
   1224               int32_t length) const;
   1225 
   1226   /**
   1227    * Locate in this the last occurrence in the range
   1228    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   1229    * in <TT>srcChars</TT> in the range
   1230    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
   1231    * using bitwise comparison.
   1232    * @param srcChars The text to search for.
   1233    * @param srcStart the offset into <TT>srcChars</TT> at which
   1234    * to start matching
   1235    * @param srcLength the number of characters in <TT>srcChars</TT> to match
   1236    * @param start the offset into this at which to start matching
   1237    * @param length the number of characters in this to search
   1238    * @return The offset into this of the start of <TT>text</TT>,
   1239    * or -1 if not found.
   1240    * @stable ICU 2.0
   1241    */
   1242   int32_t lastIndexOf(const UChar *srcChars,
   1243               int32_t srcStart,
   1244               int32_t srcLength,
   1245               int32_t start,
   1246               int32_t length) const;
   1247 
   1248   /**
   1249    * Locate in this the last occurrence of the BMP code point <code>c</code>,
   1250    * using bitwise comparison.
   1251    * @param c The code unit to search for.
   1252    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1253    * @stable ICU 2.0
   1254    */
   1255   inline int32_t lastIndexOf(UChar c) const;
   1256 
   1257   /**
   1258    * Locate in this the last occurrence of the code point <TT>c</TT>,
   1259    * using bitwise comparison.
   1260    *
   1261    * @param c The code point to search for.
   1262    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1263    * @stable ICU 2.0
   1264    */
   1265   inline int32_t lastIndexOf(UChar32 c) const;
   1266 
   1267   /**
   1268    * Locate in this the last occurrence of the BMP code point <code>c</code>
   1269    * starting at offset <TT>start</TT>, using bitwise comparison.
   1270    * @param c The code unit to search for.
   1271    * @param start The offset at which searching will start.
   1272    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1273    * @stable ICU 2.0
   1274    */
   1275   inline int32_t lastIndexOf(UChar c,
   1276               int32_t start) const;
   1277 
   1278   /**
   1279    * Locate in this the last occurrence of the code point <TT>c</TT>
   1280    * starting at offset <TT>start</TT>, using bitwise comparison.
   1281    *
   1282    * @param c The code point to search for.
   1283    * @param start The offset at which searching will start.
   1284    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1285    * @stable ICU 2.0
   1286    */
   1287   inline int32_t lastIndexOf(UChar32 c,
   1288               int32_t start) const;
   1289 
   1290   /**
   1291    * Locate in this the last occurrence of the BMP code point <code>c</code>
   1292    * in the range [<TT>start</TT>, <TT>start + length</TT>),
   1293    * using bitwise comparison.
   1294    * @param c The code unit to search for.
   1295    * @param start the offset into this at which to start matching
   1296    * @param length the number of characters in this to search
   1297    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1298    * @stable ICU 2.0
   1299    */
   1300   inline int32_t lastIndexOf(UChar c,
   1301               int32_t start,
   1302               int32_t length) const;
   1303 
   1304   /**
   1305    * Locate in this the last occurrence of the code point <TT>c</TT>
   1306    * in the range [<TT>start</TT>, <TT>start + length</TT>),
   1307    * using bitwise comparison.
   1308    *
   1309    * @param c The code point to search for.
   1310    * @param start the offset into this at which to start matching
   1311    * @param length the number of characters in this to search
   1312    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1313    * @stable ICU 2.0
   1314    */
   1315   inline int32_t lastIndexOf(UChar32 c,
   1316               int32_t start,
   1317               int32_t length) const;
   1318 
   1319 
   1320   /* Character access */
   1321 
   1322   /**
   1323    * Return the code unit at offset <tt>offset</tt>.
   1324    * If the offset is not valid (0..length()-1) then U+ffff is returned.
   1325    * @param offset a valid offset into the text
   1326    * @return the code unit at offset <tt>offset</tt>
   1327    *         or 0xffff if the offset is not valid for this string
   1328    * @stable ICU 2.0
   1329    */
   1330   inline UChar charAt(int32_t offset) const;
   1331 
   1332   /**
   1333    * Return the code unit at offset <tt>offset</tt>.
   1334    * If the offset is not valid (0..length()-1) then U+ffff is returned.
   1335    * @param offset a valid offset into the text
   1336    * @return the code unit at offset <tt>offset</tt>
   1337    * @stable ICU 2.0
   1338    */
   1339   inline UChar operator[] (int32_t offset) const;
   1340 
   1341   /**
   1342    * Return the code point that contains the code unit
   1343    * at offset <tt>offset</tt>.
   1344    * If the offset is not valid (0..length()-1) then U+ffff is returned.
   1345    * @param offset a valid offset into the text
   1346    * that indicates the text offset of any of the code units
   1347    * that will be assembled into a code point (21-bit value) and returned
   1348    * @return the code point of text at <tt>offset</tt>
   1349    *         or 0xffff if the offset is not valid for this string
   1350    * @stable ICU 2.0
   1351    */
   1352   UChar32 char32At(int32_t offset) const;
   1353 
   1354   /**
   1355    * Adjust a random-access offset so that
   1356    * it points to the beginning of a Unicode character.
   1357    * The offset that is passed in points to
   1358    * any code unit of a code point,
   1359    * while the returned offset will point to the first code unit
   1360    * of the same code point.
   1361    * In UTF-16, if the input offset points to a second surrogate
   1362    * of a surrogate pair, then the returned offset will point
   1363    * to the first surrogate.
   1364    * @param offset a valid offset into one code point of the text
   1365    * @return offset of the first code unit of the same code point
   1366    * @see U16_SET_CP_START
   1367    * @stable ICU 2.0
   1368    */
   1369   int32_t getChar32Start(int32_t offset) const;
   1370 
   1371   /**
   1372    * Adjust a random-access offset so that
   1373    * it points behind a Unicode character.
   1374    * The offset that is passed in points behind
   1375    * any code unit of a code point,
   1376    * while the returned offset will point behind the last code unit
   1377    * of the same code point.
   1378    * In UTF-16, if the input offset points behind the first surrogate
   1379    * (i.e., to the second surrogate)
   1380    * of a surrogate pair, then the returned offset will point
   1381    * behind the second surrogate (i.e., to the first surrogate).
   1382    * @param offset a valid offset after any code unit of a code point of the text
   1383    * @return offset of the first code unit after the same code point
   1384    * @see U16_SET_CP_LIMIT
   1385    * @stable ICU 2.0
   1386    */
   1387   int32_t getChar32Limit(int32_t offset) const;
   1388 
   1389   /**
   1390    * Move the code unit index along the string by delta code points.
   1391    * Interpret the input index as a code unit-based offset into the string,
   1392    * move the index forward or backward by delta code points, and
   1393    * return the resulting index.
   1394    * The input index should point to the first code unit of a code point,
   1395    * if there is more than one.
   1396    *
   1397    * Both input and output indexes are code unit-based as for all
   1398    * string indexes/offsets in ICU (and other libraries, like MBCS char*).
   1399    * If delta<0 then the index is moved backward (toward the start of the string).
   1400    * If delta>0 then the index is moved forward (toward the end of the string).
   1401    *
   1402    * This behaves like CharacterIterator::move32(delta, kCurrent).
   1403    *
   1404    * Behavior for out-of-bounds indexes:
   1405    * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
   1406    * if the input index<0 then it is pinned to 0;
   1407    * if it is index>length() then it is pinned to length().
   1408    * Afterwards, the index is moved by <code>delta</code> code points
   1409    * forward or backward,
   1410    * but no further backward than to 0 and no further forward than to length().
   1411    * The resulting index return value will be in between 0 and length(), inclusively.
   1412    *
   1413    * Examples:
   1414    * <pre>
   1415    * // s has code points 'a' U+10000 'b' U+10ffff U+2029
   1416    * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
   1417    *
   1418    * // initial index: position of U+10000
   1419    * int32_t index=1;
   1420    *
   1421    * // the following examples will all result in index==4, position of U+10ffff
   1422    *
   1423    * // skip 2 code points from some position in the string
   1424    * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
   1425    *
   1426    * // go to the 3rd code point from the start of s (0-based)
   1427    * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
   1428    *
   1429    * // go to the next-to-last code point of s
   1430    * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
   1431    * </pre>
   1432    *
   1433    * @param index input code unit index
   1434    * @param delta (signed) code point count to move the index forward or backward
   1435    *        in the string
   1436    * @return the resulting code unit index
   1437    * @stable ICU 2.0
   1438    */
   1439   int32_t moveIndex32(int32_t index, int32_t delta) const;
   1440 
   1441   /* Substring extraction */
   1442 
   1443   /**
   1444    * Copy the characters in the range
   1445    * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
   1446    * beginning at <tt>dstStart</tt>.
   1447    * If the string aliases to <code>dst</code> itself as an external buffer,
   1448    * then extract() will not copy the contents.
   1449    *
   1450    * @param start offset of first character which will be copied into the array
   1451    * @param length the number of characters to extract
   1452    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
   1453    * must be at least (<tt>dstStart + length</tt>).
   1454    * @param dstStart the offset in <TT>dst</TT> where the first character
   1455    * will be extracted
   1456    * @stable ICU 2.0
   1457    */
   1458   inline void extract(int32_t start,
   1459            int32_t length,
   1460            UChar *dst,
   1461            int32_t dstStart = 0) const;
   1462 
   1463   /**
   1464    * Copy the contents of the string into dest.
   1465    * This is a convenience function that
   1466    * checks if there is enough space in dest,
   1467    * extracts the entire string if possible,
   1468    * and NUL-terminates dest if possible.
   1469    *
   1470    * If the string fits into dest but cannot be NUL-terminated
   1471    * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
   1472    * If the string itself does not fit into dest
   1473    * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
   1474    *
   1475    * If the string aliases to <code>dest</code> itself as an external buffer,
   1476    * then extract() will not copy the contents.
   1477    *
   1478    * @param dest Destination string buffer.
   1479    * @param destCapacity Number of UChars available at dest.
   1480    * @param errorCode ICU error code.
   1481    * @return length()
   1482    * @stable ICU 2.0
   1483    */
   1484   int32_t
   1485   extract(UChar *dest, int32_t destCapacity,
   1486           UErrorCode &errorCode) const;
   1487 
   1488   /**
   1489    * Copy the characters in the range
   1490    * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
   1491    * <tt>target</tt>.
   1492    * @param start offset of first character which will be copied
   1493    * @param length the number of characters to extract
   1494    * @param target UnicodeString into which to copy characters.
   1495    * @return A reference to <TT>target</TT>
   1496    * @stable ICU 2.0
   1497    */
   1498   inline void extract(int32_t start,
   1499            int32_t length,
   1500            UnicodeString& target) const;
   1501 
   1502   /**
   1503    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
   1504    * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
   1505    * @param start offset of first character which will be copied into the array
   1506    * @param limit offset immediately following the last character to be copied
   1507    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
   1508    * must be at least (<tt>dstStart + (limit - start)</tt>).
   1509    * @param dstStart the offset in <TT>dst</TT> where the first character
   1510    * will be extracted
   1511    * @stable ICU 2.0
   1512    */
   1513   inline void extractBetween(int32_t start,
   1514               int32_t limit,
   1515               UChar *dst,
   1516               int32_t dstStart = 0) const;
   1517 
   1518   /**
   1519    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
   1520    * into the UnicodeString <tt>target</tt>.  Replaceable API.
   1521    * @param start offset of first character which will be copied
   1522    * @param limit offset immediately following the last character to be copied
   1523    * @param target UnicodeString into which to copy characters.
   1524    * @return A reference to <TT>target</TT>
   1525    * @stable ICU 2.0
   1526    */
   1527   virtual void extractBetween(int32_t start,
   1528               int32_t limit,
   1529               UnicodeString& target) const;
   1530 
   1531   /**
   1532    * Copy the characters in the range
   1533    * [<tt>start</TT>, <tt>start + startLength</TT>) into an array of characters.
   1534    * All characters must be invariant (see utypes.h).
   1535    * Use US_INV as the last, signature-distinguishing parameter.
   1536    *
   1537    * This function does not write any more than <code>targetCapacity</code>
   1538    * characters but returns the length of the entire output string
   1539    * so that one can allocate a larger buffer and call the function again
   1540    * if necessary.
   1541    * The output string is NUL-terminated if possible.
   1542    *
   1543    * @param start offset of first character which will be copied
   1544    * @param startLength the number of characters to extract
   1545    * @param target the target buffer for extraction, can be NULL
   1546    *               if targetLength is 0
   1547    * @param targetCapacity the length of the target buffer
   1548    * @param inv Signature-distinguishing paramater, use US_INV.
   1549    * @return the output string length, not including the terminating NUL
   1550    * @stable ICU 3.2
   1551    */
   1552   int32_t extract(int32_t start,
   1553            int32_t startLength,
   1554            char *target,
   1555            int32_t targetCapacity,
   1556            enum EInvariant inv) const;
   1557 
   1558 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
   1559 
   1560   /**
   1561    * Copy the characters in the range
   1562    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
   1563    * in the platform's default codepage.
   1564    * This function does not write any more than <code>targetLength</code>
   1565    * characters but returns the length of the entire output string
   1566    * so that one can allocate a larger buffer and call the function again
   1567    * if necessary.
   1568    * The output string is NUL-terminated if possible.
   1569    *
   1570    * @param start offset of first character which will be copied
   1571    * @param startLength the number of characters to extract
   1572    * @param target the target buffer for extraction
   1573    * @param targetLength the length of the target buffer
   1574    * If <TT>target</TT> is NULL, then the number of bytes required for
   1575    * <TT>target</TT> is returned.
   1576    * @return the output string length, not including the terminating NUL
   1577    * @stable ICU 2.0
   1578    */
   1579   int32_t extract(int32_t start,
   1580            int32_t startLength,
   1581            char *target,
   1582            uint32_t targetLength) const;
   1583 
   1584 #endif
   1585 
   1586 #if !UCONFIG_NO_CONVERSION
   1587 
   1588   /**
   1589    * Copy the characters in the range
   1590    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
   1591    * in a specified codepage.
   1592    * The output string is NUL-terminated.
   1593    *
   1594    * Recommendation: For invariant-character strings use
   1595    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
   1596    * because it avoids object code dependencies of UnicodeString on
   1597    * the conversion code.
   1598    *
   1599    * @param start offset of first character which will be copied
   1600    * @param startLength the number of characters to extract
   1601    * @param target the target buffer for extraction
   1602    * @param codepage the desired codepage for the characters.  0 has
   1603    * the special meaning of the default codepage
   1604    * If <code>codepage</code> is an empty string (<code>""</code>),
   1605    * then a simple conversion is performed on the codepage-invariant
   1606    * subset ("invariant characters") of the platform encoding. See utypes.h.
   1607    * If <TT>target</TT> is NULL, then the number of bytes required for
   1608    * <TT>target</TT> is returned. It is assumed that the target is big enough
   1609    * to fit all of the characters.
   1610    * @return the output string length, not including the terminating NUL
   1611    * @stable ICU 2.0
   1612    */
   1613   inline int32_t extract(int32_t start,
   1614                  int32_t startLength,
   1615                  char *target,
   1616                  const char *codepage = 0) const;
   1617 
   1618   /**
   1619    * Copy the characters in the range
   1620    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
   1621    * in a specified codepage.
   1622    * This function does not write any more than <code>targetLength</code>
   1623    * characters but returns the length of the entire output string
   1624    * so that one can allocate a larger buffer and call the function again
   1625    * if necessary.
   1626    * The output string is NUL-terminated if possible.
   1627    *
   1628    * Recommendation: For invariant-character strings use
   1629    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
   1630    * because it avoids object code dependencies of UnicodeString on
   1631    * the conversion code.
   1632    *
   1633    * @param start offset of first character which will be copied
   1634    * @param startLength the number of characters to extract
   1635    * @param target the target buffer for extraction
   1636    * @param targetLength the length of the target buffer
   1637    * @param codepage the desired codepage for the characters.  0 has
   1638    * the special meaning of the default codepage
   1639    * If <code>codepage</code> is an empty string (<code>""</code>),
   1640    * then a simple conversion is performed on the codepage-invariant
   1641    * subset ("invariant characters") of the platform encoding. See utypes.h.
   1642    * If <TT>target</TT> is NULL, then the number of bytes required for
   1643    * <TT>target</TT> is returned.
   1644    * @return the output string length, not including the terminating NUL
   1645    * @stable ICU 2.0
   1646    */
   1647   int32_t extract(int32_t start,
   1648            int32_t startLength,
   1649            char *target,
   1650            uint32_t targetLength,
   1651            const char *codepage) const;
   1652 
   1653   /**
   1654    * Convert the UnicodeString into a codepage string using an existing UConverter.
   1655    * The output string is NUL-terminated if possible.
   1656    *
   1657    * This function avoids the overhead of opening and closing a converter if
   1658    * multiple strings are extracted.
   1659    *
   1660    * @param dest destination string buffer, can be NULL if destCapacity==0
   1661    * @param destCapacity the number of chars available at dest
   1662    * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
   1663    *        or NULL for the default converter
   1664    * @param errorCode normal ICU error code
   1665    * @return the length of the output string, not counting the terminating NUL;
   1666    *         if the length is greater than destCapacity, then the string will not fit
   1667    *         and a buffer of the indicated length would need to be passed in
   1668    * @stable ICU 2.0
   1669    */
   1670   int32_t extract(char *dest, int32_t destCapacity,
   1671                   UConverter *cnv,
   1672                   UErrorCode &errorCode) const;
   1673 
   1674 #endif
   1675 
   1676   /**
   1677    * Create a temporary substring for the specified range.
   1678    * Unlike the substring constructor and setTo() functions,
   1679    * the object returned here will be a read-only alias (using getBuffer())
   1680    * rather than copying the text.
   1681    * As a result, this substring operation is much faster but requires
   1682    * that the original string not be modified or deleted during the lifetime
   1683    * of the returned substring object.
   1684    * @param start offset of the first character visible in the substring
   1685    * @param length length of the substring
   1686    * @return a read-only alias UnicodeString object for the substring
   1687    * @stable ICU 4.4
   1688    */
   1689   UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
   1690 
   1691   /**
   1692    * Create a temporary substring for the specified range.
   1693    * Same as tempSubString(start, length) except that the substring range
   1694    * is specified as a (start, limit) pair (with an exclusive limit index)
   1695    * rather than a (start, length) pair.
   1696    * @param start offset of the first character visible in the substring
   1697    * @param limit offset immediately following the last character visible in the substring
   1698    * @return a read-only alias UnicodeString object for the substring
   1699    * @stable ICU 4.4
   1700    */
   1701   inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
   1702 
   1703   /**
   1704    * Convert the UnicodeString to UTF-8 and write the result
   1705    * to a ByteSink. This is called by toUTF8String().
   1706    * Unpaired surrogates are replaced with U+FFFD.
   1707    * Calls u_strToUTF8WithSub().
   1708    *
   1709    * @param sink A ByteSink to which the UTF-8 version of the string is written.
   1710    *             sink.Flush() is called at the end.
   1711    * @stable ICU 4.2
   1712    * @see toUTF8String
   1713    */
   1714   void toUTF8(ByteSink &sink) const;
   1715 
   1716 #if U_HAVE_STD_STRING
   1717 
   1718   /**
   1719    * Convert the UnicodeString to UTF-8 and append the result
   1720    * to a standard string.
   1721    * Unpaired surrogates are replaced with U+FFFD.
   1722    * Calls toUTF8().
   1723    *
   1724    * @param result A standard string (or a compatible object)
   1725    *        to which the UTF-8 version of the string is appended.
   1726    * @return The string object.
   1727    * @stable ICU 4.2
   1728    * @see toUTF8
   1729    */
   1730   template<typename StringClass>
   1731   StringClass &toUTF8String(StringClass &result) const {
   1732     StringByteSink<StringClass> sbs(&result);
   1733     toUTF8(sbs);
   1734     return result;
   1735   }
   1736 
   1737 #endif
   1738 
   1739   /**
   1740    * Convert the UnicodeString to UTF-32.
   1741    * Unpaired surrogates are replaced with U+FFFD.
   1742    * Calls u_strToUTF32WithSub().
   1743    *
   1744    * @param utf32 destination string buffer, can be NULL if capacity==0
   1745    * @param capacity the number of UChar32s available at utf32
   1746    * @param errorCode Standard ICU error code. Its input value must
   1747    *                  pass the U_SUCCESS() test, or else the function returns
   1748    *                  immediately. Check for U_FAILURE() on output or use with
   1749    *                  function chaining. (See User Guide for details.)
   1750    * @return The length of the UTF-32 string.
   1751    * @see fromUTF32
   1752    * @stable ICU 4.2
   1753    */
   1754   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
   1755 
   1756   /* Length operations */
   1757 
   1758   /**
   1759    * Return the length of the UnicodeString object.
   1760    * The length is the number of UChar code units are in the UnicodeString.
   1761    * If you want the number of code points, please use countChar32().
   1762    * @return the length of the UnicodeString object
   1763    * @see countChar32
   1764    * @stable ICU 2.0
   1765    */
   1766   inline int32_t length(void) const;
   1767 
   1768   /**
   1769    * Count Unicode code points in the length UChar code units of the string.
   1770    * A code point may occupy either one or two UChar code units.
   1771    * Counting code points involves reading all code units.
   1772    *
   1773    * This functions is basically the inverse of moveIndex32().
   1774    *
   1775    * @param start the index of the first code unit to check
   1776    * @param length the number of UChar code units to check
   1777    * @return the number of code points in the specified code units
   1778    * @see length
   1779    * @stable ICU 2.0
   1780    */
   1781   int32_t
   1782   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
   1783 
   1784   /**
   1785    * Check if the length UChar code units of the string
   1786    * contain more Unicode code points than a certain number.
   1787    * This is more efficient than counting all code points in this part of the string
   1788    * and comparing that number with a threshold.
   1789    * This function may not need to scan the string at all if the length
   1790    * falls within a certain range, and
   1791    * never needs to count more than 'number+1' code points.
   1792    * Logically equivalent to (countChar32(start, length)>number).
   1793    * A Unicode code point may occupy either one or two UChar code units.
   1794    *
   1795    * @param start the index of the first code unit to check (0 for the entire string)
   1796    * @param length the number of UChar code units to check
   1797    *               (use INT32_MAX for the entire string; remember that start/length
   1798    *                values are pinned)
   1799    * @param number The number of code points in the (sub)string is compared against
   1800    *               the 'number' parameter.
   1801    * @return Boolean value for whether the string contains more Unicode code points
   1802    *         than 'number'. Same as (u_countChar32(s, length)>number).
   1803    * @see countChar32
   1804    * @see u_strHasMoreChar32Than
   1805    * @stable ICU 2.4
   1806    */
   1807   UBool
   1808   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
   1809 
   1810   /**
   1811    * Determine if this string is empty.
   1812    * @return TRUE if this string contains 0 characters, FALSE otherwise.
   1813    * @stable ICU 2.0
   1814    */
   1815   inline UBool isEmpty(void) const;
   1816 
   1817   /**
   1818    * Return the capacity of the internal buffer of the UnicodeString object.
   1819    * This is useful together with the getBuffer functions.
   1820    * See there for details.
   1821    *
   1822    * @return the number of UChars available in the internal buffer
   1823    * @see getBuffer
   1824    * @stable ICU 2.0
   1825    */
   1826   inline int32_t getCapacity(void) const;
   1827 
   1828   /* Other operations */
   1829 
   1830   /**
   1831    * Generate a hash code for this object.
   1832    * @return The hash code of this UnicodeString.
   1833    * @stable ICU 2.0
   1834    */
   1835   inline int32_t hashCode(void) const;
   1836 
   1837   /**
   1838    * Determine if this object contains a valid string.
   1839    * A bogus string has no value. It is different from an empty string,
   1840    * although in both cases isEmpty() returns TRUE and length() returns 0.
   1841    * setToBogus() and isBogus() can be used to indicate that no string value is available.
   1842    * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
   1843    * length() returns 0.
   1844    *
   1845    * @return TRUE if the string is bogus/invalid, FALSE otherwise
   1846    * @see setToBogus()
   1847    * @stable ICU 2.0
   1848    */
   1849   inline UBool isBogus(void) const;
   1850 
   1851 
   1852   //========================================
   1853   // Write operations
   1854   //========================================
   1855 
   1856   /* Assignment operations */
   1857 
   1858   /**
   1859    * Assignment operator.  Replace the characters in this UnicodeString
   1860    * with the characters from <TT>srcText</TT>.
   1861    *
   1862    * Starting with ICU 2.4, the assignment operator and the copy constructor
   1863    * allocate a new buffer and copy the buffer contents even for readonly aliases.
   1864    * By contrast, the fastCopyFrom() function implements the old,
   1865    * more efficient but less safe behavior
   1866    * of making this string also a readonly alias to the same buffer.
   1867    *
   1868    * If the source object has an "open" buffer from getBuffer(minCapacity),
   1869    * then the copy is an empty string.
   1870    *
   1871    * @param srcText The text containing the characters to replace
   1872    * @return a reference to this
   1873    * @stable ICU 2.0
   1874    * @see fastCopyFrom
   1875    */
   1876   UnicodeString &operator=(const UnicodeString &srcText);
   1877 
   1878   /**
   1879    * Almost the same as the assignment operator.
   1880    * Replace the characters in this UnicodeString
   1881    * with the characters from <code>srcText</code>.
   1882    *
   1883    * This function works the same as the assignment operator
   1884    * for all strings except for ones that are readonly aliases.
   1885    *
   1886    * Starting with ICU 2.4, the assignment operator and the copy constructor
   1887    * allocate a new buffer and copy the buffer contents even for readonly aliases.
   1888    * This function implements the old, more efficient but less safe behavior
   1889    * of making this string also a readonly alias to the same buffer.
   1890    *
   1891    * The fastCopyFrom function must be used only if it is known that the lifetime of
   1892    * this UnicodeString does not exceed the lifetime of the aliased buffer
   1893    * including its contents, for example for strings from resource bundles
   1894    * or aliases to string constants.
   1895    *
   1896    * If the source object has an "open" buffer from getBuffer(minCapacity),
   1897    * then the copy is an empty string.
   1898    *
   1899    * @param src The text containing the characters to replace.
   1900    * @return a reference to this
   1901    * @stable ICU 2.4
   1902    */
   1903   UnicodeString &fastCopyFrom(const UnicodeString &src);
   1904 
   1905 #ifndef U_HIDE_DRAFT_API
   1906 #if U_HAVE_RVALUE_REFERENCES
   1907   /**
   1908    * Move assignment operator, might leave src in bogus state.
   1909    * This string will have the same contents and state that the source string had.
   1910    * The behavior is undefined if *this and src are the same object.
   1911    * @param src source string
   1912    * @return *this
   1913    * @draft ICU 56
   1914    */
   1915   UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT {
   1916     return moveFrom(src);
   1917   }
   1918 #endif
   1919   /**
   1920    * Move assignment, might leave src in bogus state.
   1921    * This string will have the same contents and state that the source string had.
   1922    * The behavior is undefined if *this and src are the same object.
   1923    *
   1924    * Can be called explicitly, does not need C++11 support.
   1925    * @param src source string
   1926    * @return *this
   1927    * @draft ICU 56
   1928    */
   1929   UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT;
   1930 
   1931   /**
   1932    * Swap strings.
   1933    * @param other other string
   1934    * @draft ICU 56
   1935    */
   1936   void swap(UnicodeString &other) U_NOEXCEPT;
   1937 
   1938   /**
   1939    * Non-member UnicodeString swap function.
   1940    * @param s1 will get s2's contents and state
   1941    * @param s2 will get s1's contents and state
   1942    * @draft ICU 56
   1943    */
   1944   friend U_COMMON_API inline void U_EXPORT2
   1945   swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
   1946     s1.swap(s2);
   1947   }
   1948 #endif  /* U_HIDE_DRAFT_API */
   1949 
   1950   /**
   1951    * Assignment operator.  Replace the characters in this UnicodeString
   1952    * with the code unit <TT>ch</TT>.
   1953    * @param ch the code unit to replace
   1954    * @return a reference to this
   1955    * @stable ICU 2.0
   1956    */
   1957   inline UnicodeString& operator= (UChar ch);
   1958 
   1959   /**
   1960    * Assignment operator.  Replace the characters in this UnicodeString
   1961    * with the code point <TT>ch</TT>.
   1962    * @param ch the code point to replace
   1963    * @return a reference to this
   1964    * @stable ICU 2.0
   1965    */
   1966   inline UnicodeString& operator= (UChar32 ch);
   1967 
   1968   /**
   1969    * Set the text in the UnicodeString object to the characters
   1970    * in <TT>srcText</TT> in the range
   1971    * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
   1972    * <TT>srcText</TT> is not modified.
   1973    * @param srcText the source for the new characters
   1974    * @param srcStart the offset into <TT>srcText</TT> where new characters
   1975    * will be obtained
   1976    * @return a reference to this
   1977    * @stable ICU 2.2
   1978    */
   1979   inline UnicodeString& setTo(const UnicodeString& srcText,
   1980                int32_t srcStart);
   1981 
   1982   /**
   1983    * Set the text in the UnicodeString object to the characters
   1984    * in <TT>srcText</TT> in the range
   1985    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   1986    * <TT>srcText</TT> is not modified.
   1987    * @param srcText the source for the new characters
   1988    * @param srcStart the offset into <TT>srcText</TT> where new characters
   1989    * will be obtained
   1990    * @param srcLength the number of characters in <TT>srcText</TT> in the
   1991    * replace string.
   1992    * @return a reference to this
   1993    * @stable ICU 2.0
   1994    */
   1995   inline UnicodeString& setTo(const UnicodeString& srcText,
   1996                int32_t srcStart,
   1997                int32_t srcLength);
   1998 
   1999   /**
   2000    * Set the text in the UnicodeString object to the characters in
   2001    * <TT>srcText</TT>.
   2002    * <TT>srcText</TT> is not modified.
   2003    * @param srcText the source for the new characters
   2004    * @return a reference to this
   2005    * @stable ICU 2.0
   2006    */
   2007   inline UnicodeString& setTo(const UnicodeString& srcText);
   2008 
   2009   /**
   2010    * Set the characters in the UnicodeString object to the characters
   2011    * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
   2012    * @param srcChars the source for the new characters
   2013    * @param srcLength the number of Unicode characters in srcChars.
   2014    * @return a reference to this
   2015    * @stable ICU 2.0
   2016    */
   2017   inline UnicodeString& setTo(const UChar *srcChars,
   2018                int32_t srcLength);
   2019 
   2020   /**
   2021    * Set the characters in the UnicodeString object to the code unit
   2022    * <TT>srcChar</TT>.
   2023    * @param srcChar the code unit which becomes the UnicodeString's character
   2024    * content
   2025    * @return a reference to this
   2026    * @stable ICU 2.0
   2027    */
   2028   UnicodeString& setTo(UChar srcChar);
   2029 
   2030   /**
   2031    * Set the characters in the UnicodeString object to the code point
   2032    * <TT>srcChar</TT>.
   2033    * @param srcChar the code point which becomes the UnicodeString's character
   2034    * content
   2035    * @return a reference to this
   2036    * @stable ICU 2.0
   2037    */
   2038   UnicodeString& setTo(UChar32 srcChar);
   2039 
   2040   /**
   2041    * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
   2042    * The text will be used for the UnicodeString object, but
   2043    * it will not be released when the UnicodeString is destroyed.
   2044    * This has copy-on-write semantics:
   2045    * When the string is modified, then the buffer is first copied into
   2046    * newly allocated memory.
   2047    * The aliased buffer is never modified.
   2048    *
   2049    * In an assignment to another UnicodeString, when using the copy constructor
   2050    * or the assignment operator, the text will be copied.
   2051    * When using fastCopyFrom(), the text will be aliased again,
   2052    * so that both strings then alias the same readonly-text.
   2053    *
   2054    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
   2055    *                     This must be true if <code>textLength==-1</code>.
   2056    * @param text The characters to alias for the UnicodeString.
   2057    * @param textLength The number of Unicode characters in <code>text</code> to alias.
   2058    *                   If -1, then this constructor will determine the length
   2059    *                   by calling <code>u_strlen()</code>.
   2060    * @return a reference to this
   2061    * @stable ICU 2.0
   2062    */
   2063   UnicodeString &setTo(UBool isTerminated,
   2064                        const UChar *text,
   2065                        int32_t textLength);
   2066 
   2067   /**
   2068    * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
   2069    * The text will be used for the UnicodeString object, but
   2070    * it will not be released when the UnicodeString is destroyed.
   2071    * This has write-through semantics:
   2072    * For as long as the capacity of the buffer is sufficient, write operations
   2073    * will directly affect the buffer. When more capacity is necessary, then
   2074    * a new buffer will be allocated and the contents copied as with regularly
   2075    * constructed strings.
   2076    * In an assignment to another UnicodeString, the buffer will be copied.
   2077    * The extract(UChar *dst) function detects whether the dst pointer is the same
   2078    * as the string buffer itself and will in this case not copy the contents.
   2079    *
   2080    * @param buffer The characters to alias for the UnicodeString.
   2081    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
   2082    * @param buffCapacity The size of <code>buffer</code> in UChars.
   2083    * @return a reference to this
   2084    * @stable ICU 2.0
   2085    */
   2086   UnicodeString &setTo(UChar *buffer,
   2087                        int32_t buffLength,
   2088                        int32_t buffCapacity);
   2089 
   2090   /**
   2091    * Make this UnicodeString object invalid.
   2092    * The string will test TRUE with isBogus().
   2093    *
   2094    * A bogus string has no value. It is different from an empty string.
   2095    * It can be used to indicate that no string value is available.
   2096    * getBuffer() and getTerminatedBuffer() return NULL, and
   2097    * length() returns 0.
   2098    *
   2099    * This utility function is used throughout the UnicodeString
   2100    * implementation to indicate that a UnicodeString operation failed,
   2101    * and may be used in other functions,
   2102    * especially but not exclusively when such functions do not
   2103    * take a UErrorCode for simplicity.
   2104    *
   2105    * The following methods, and no others, will clear a string object's bogus flag:
   2106    * - remove()
   2107    * - remove(0, INT32_MAX)
   2108    * - truncate(0)
   2109    * - operator=() (assignment operator)
   2110    * - setTo(...)
   2111    *
   2112    * The simplest ways to turn a bogus string into an empty one
   2113    * is to use the remove() function.
   2114    * Examples for other functions that are equivalent to "set to empty string":
   2115    * \code
   2116    * if(s.isBogus()) {
   2117    *   s.remove();           // set to an empty string (remove all), or
   2118    *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
   2119    *   s.truncate(0);        // set to an empty string (complete truncation), or
   2120    *   s=UnicodeString();    // assign an empty string, or
   2121    *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
   2122    *   static const UChar nul=0;
   2123    *   s.setTo(&nul, 0);     // set to an empty C Unicode string
   2124    * }
   2125    * \endcode
   2126    *
   2127    * @see isBogus()
   2128    * @stable ICU 2.0
   2129    */
   2130   void setToBogus();
   2131 
   2132   /**
   2133    * Set the character at the specified offset to the specified character.
   2134    * @param offset A valid offset into the text of the character to set
   2135    * @param ch The new character
   2136    * @return A reference to this
   2137    * @stable ICU 2.0
   2138    */
   2139   UnicodeString& setCharAt(int32_t offset,
   2140                UChar ch);
   2141 
   2142 
   2143   /* Append operations */
   2144 
   2145   /**
   2146    * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
   2147    * object.
   2148    * @param ch the code unit to be appended
   2149    * @return a reference to this
   2150    * @stable ICU 2.0
   2151    */
   2152  inline  UnicodeString& operator+= (UChar ch);
   2153 
   2154   /**
   2155    * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
   2156    * object.
   2157    * @param ch the code point to be appended
   2158    * @return a reference to this
   2159    * @stable ICU 2.0
   2160    */
   2161  inline  UnicodeString& operator+= (UChar32 ch);
   2162 
   2163   /**
   2164    * Append operator. Append the characters in <TT>srcText</TT> to the
   2165    * UnicodeString object. <TT>srcText</TT> is not modified.
   2166    * @param srcText the source for the new characters
   2167    * @return a reference to this
   2168    * @stable ICU 2.0
   2169    */
   2170   inline UnicodeString& operator+= (const UnicodeString& srcText);
   2171 
   2172   /**
   2173    * Append the characters
   2174    * in <TT>srcText</TT> in the range
   2175    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
   2176    * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
   2177    * is not modified.
   2178    * @param srcText the source for the new characters
   2179    * @param srcStart the offset into <TT>srcText</TT> where new characters
   2180    * will be obtained
   2181    * @param srcLength the number of characters in <TT>srcText</TT> in
   2182    * the append string
   2183    * @return a reference to this
   2184    * @stable ICU 2.0
   2185    */
   2186   inline UnicodeString& append(const UnicodeString& srcText,
   2187             int32_t srcStart,
   2188             int32_t srcLength);
   2189 
   2190   /**
   2191    * Append the characters in <TT>srcText</TT> to the UnicodeString object.
   2192    * <TT>srcText</TT> is not modified.
   2193    * @param srcText the source for the new characters
   2194    * @return a reference to this
   2195    * @stable ICU 2.0
   2196    */
   2197   inline UnicodeString& append(const UnicodeString& srcText);
   2198 
   2199   /**
   2200    * Append the characters in <TT>srcChars</TT> in the range
   2201    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
   2202    * object at offset
   2203    * <TT>start</TT>. <TT>srcChars</TT> is not modified.
   2204    * @param srcChars the source for the new characters
   2205    * @param srcStart the offset into <TT>srcChars</TT> where new characters
   2206    * will be obtained
   2207    * @param srcLength the number of characters in <TT>srcChars</TT> in
   2208    *                  the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
   2209    * @return a reference to this
   2210    * @stable ICU 2.0
   2211    */
   2212   inline UnicodeString& append(const UChar *srcChars,
   2213             int32_t srcStart,
   2214             int32_t srcLength);
   2215 
   2216   /**
   2217    * Append the characters in <TT>srcChars</TT> to the UnicodeString object
   2218    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
   2219    * @param srcChars the source for the new characters
   2220    * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
   2221    *                  can be -1 if <TT>srcChars</TT> is NUL-terminated
   2222    * @return a reference to this
   2223    * @stable ICU 2.0
   2224    */
   2225   inline UnicodeString& append(const UChar *srcChars,
   2226             int32_t srcLength);
   2227 
   2228   /**
   2229    * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
   2230    * @param srcChar the code unit to append
   2231    * @return a reference to this
   2232    * @stable ICU 2.0
   2233    */
   2234   inline UnicodeString& append(UChar srcChar);
   2235 
   2236   /**
   2237    * Append the code point <TT>srcChar</TT> to the UnicodeString object.
   2238    * @param srcChar the code point to append
   2239    * @return a reference to this
   2240    * @stable ICU 2.0
   2241    */
   2242   UnicodeString& append(UChar32 srcChar);
   2243 
   2244 
   2245   /* Insert operations */
   2246 
   2247   /**
   2248    * Insert the characters in <TT>srcText</TT> in the range
   2249    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
   2250    * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
   2251    * @param start the offset where the insertion begins
   2252    * @param srcText the source for the new characters
   2253    * @param srcStart the offset into <TT>srcText</TT> where new characters
   2254    * will be obtained
   2255    * @param srcLength the number of characters in <TT>srcText</TT> in
   2256    * the insert string
   2257    * @return a reference to this
   2258    * @stable ICU 2.0
   2259    */
   2260   inline UnicodeString& insert(int32_t start,
   2261             const UnicodeString& srcText,
   2262             int32_t srcStart,
   2263             int32_t srcLength);
   2264 
   2265   /**
   2266    * Insert the characters in <TT>srcText</TT> into the UnicodeString object
   2267    * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
   2268    * @param start the offset where the insertion begins
   2269    * @param srcText the source for the new characters
   2270    * @return a reference to this
   2271    * @stable ICU 2.0
   2272    */
   2273   inline UnicodeString& insert(int32_t start,
   2274             const UnicodeString& srcText);
   2275 
   2276   /**
   2277    * Insert the characters in <TT>srcChars</TT> in the range
   2278    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
   2279    *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
   2280    * @param start the offset at which the insertion begins
   2281    * @param srcChars the source for the new characters
   2282    * @param srcStart the offset into <TT>srcChars</TT> where new characters
   2283    * will be obtained
   2284    * @param srcLength the number of characters in <TT>srcChars</TT>
   2285    * in the insert string
   2286    * @return a reference to this
   2287    * @stable ICU 2.0
   2288    */
   2289   inline UnicodeString& insert(int32_t start,
   2290             const UChar *srcChars,
   2291             int32_t srcStart,
   2292             int32_t srcLength);
   2293 
   2294   /**
   2295    * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
   2296    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
   2297    * @param start the offset where the insertion begins
   2298    * @param srcChars the source for the new characters
   2299    * @param srcLength the number of Unicode characters in srcChars.
   2300    * @return a reference to this
   2301    * @stable ICU 2.0
   2302    */
   2303   inline UnicodeString& insert(int32_t start,
   2304             const UChar *srcChars,
   2305             int32_t srcLength);
   2306 
   2307   /**
   2308    * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
   2309    * offset <TT>start</TT>.
   2310    * @param start the offset at which the insertion occurs
   2311    * @param srcChar the code unit to insert
   2312    * @return a reference to this
   2313    * @stable ICU 2.0
   2314    */
   2315   inline UnicodeString& insert(int32_t start,
   2316             UChar srcChar);
   2317 
   2318   /**
   2319    * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
   2320    * offset <TT>start</TT>.
   2321    * @param start the offset at which the insertion occurs
   2322    * @param srcChar the code point to insert
   2323    * @return a reference to this
   2324    * @stable ICU 2.0
   2325    */
   2326   inline UnicodeString& insert(int32_t start,
   2327             UChar32 srcChar);
   2328 
   2329 
   2330   /* Replace operations */
   2331 
   2332   /**
   2333    * Replace the characters in the range
   2334    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
   2335    * <TT>srcText</TT> in the range
   2336    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   2337    * <TT>srcText</TT> is not modified.
   2338    * @param start the offset at which the replace operation begins
   2339    * @param length the number of characters to replace. The character at
   2340    * <TT>start + length</TT> is not modified.
   2341    * @param srcText the source for the new characters
   2342    * @param srcStart the offset into <TT>srcText</TT> where new characters
   2343    * will be obtained
   2344    * @param srcLength the number of characters in <TT>srcText</TT> in
   2345    * the replace string
   2346    * @return a reference to this
   2347    * @stable ICU 2.0
   2348    */
   2349   UnicodeString& replace(int32_t start,
   2350              int32_t length,
   2351              const UnicodeString& srcText,
   2352              int32_t srcStart,
   2353              int32_t srcLength);
   2354 
   2355   /**
   2356    * Replace the characters in the range
   2357    * [<TT>start</TT>, <TT>start + length</TT>)
   2358    * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
   2359    *  not modified.
   2360    * @param start the offset at which the replace operation begins
   2361    * @param length the number of characters to replace. The character at
   2362    * <TT>start + length</TT> is not modified.
   2363    * @param srcText the source for the new characters
   2364    * @return a reference to this
   2365    * @stable ICU 2.0
   2366    */
   2367   UnicodeString& replace(int32_t start,
   2368              int32_t length,
   2369              const UnicodeString& srcText);
   2370 
   2371   /**
   2372    * Replace the characters in the range
   2373    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
   2374    * <TT>srcChars</TT> in the range
   2375    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
   2376    * is not modified.
   2377    * @param start the offset at which the replace operation begins
   2378    * @param length the number of characters to replace.  The character at
   2379    * <TT>start + length</TT> is not modified.
   2380    * @param srcChars the source for the new characters
   2381    * @param srcStart the offset into <TT>srcChars</TT> where new characters
   2382    * will be obtained
   2383    * @param srcLength the number of characters in <TT>srcChars</TT>
   2384    * in the replace string
   2385    * @return a reference to this
   2386    * @stable ICU 2.0
   2387    */
   2388   UnicodeString& replace(int32_t start,
   2389              int32_t length,
   2390              const UChar *srcChars,
   2391              int32_t srcStart,
   2392              int32_t srcLength);
   2393 
   2394   /**
   2395    * Replace the characters in the range
   2396    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
   2397    * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
   2398    * @param start the offset at which the replace operation begins
   2399    * @param length number of characters to replace.  The character at
   2400    * <TT>start + length</TT> is not modified.
   2401    * @param srcChars the source for the new characters
   2402    * @param srcLength the number of Unicode characters in srcChars
   2403    * @return a reference to this
   2404    * @stable ICU 2.0
   2405    */
   2406   inline UnicodeString& replace(int32_t start,
   2407              int32_t length,
   2408              const UChar *srcChars,
   2409              int32_t srcLength);
   2410 
   2411   /**
   2412    * Replace the characters in the range
   2413    * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
   2414    * <TT>srcChar</TT>.
   2415    * @param start the offset at which the replace operation begins
   2416    * @param length the number of characters to replace.  The character at
   2417    * <TT>start + length</TT> is not modified.
   2418    * @param srcChar the new code unit
   2419    * @return a reference to this
   2420    * @stable ICU 2.0
   2421    */
   2422   inline UnicodeString& replace(int32_t start,
   2423              int32_t length,
   2424              UChar srcChar);
   2425 
   2426   /**
   2427    * Replace the characters in the range
   2428    * [<TT>start</TT>, <TT>start + length</TT>) with the code point
   2429    * <TT>srcChar</TT>.
   2430    * @param start the offset at which the replace operation begins
   2431    * @param length the number of characters to replace.  The character at
   2432    * <TT>start + length</TT> is not modified.
   2433    * @param srcChar the new code point
   2434    * @return a reference to this
   2435    * @stable ICU 2.0
   2436    */
   2437   UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
   2438 
   2439   /**
   2440    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
   2441    * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
   2442    * @param start the offset at which the replace operation begins
   2443    * @param limit the offset immediately following the replace range
   2444    * @param srcText the source for the new characters
   2445    * @return a reference to this
   2446    * @stable ICU 2.0
   2447    */
   2448   inline UnicodeString& replaceBetween(int32_t start,
   2449                 int32_t limit,
   2450                 const UnicodeString& srcText);
   2451 
   2452   /**
   2453    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
   2454    * with the characters in <TT>srcText</TT> in the range
   2455    * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
   2456    * @param start the offset at which the replace operation begins
   2457    * @param limit the offset immediately following the replace range
   2458    * @param srcText the source for the new characters
   2459    * @param srcStart the offset into <TT>srcChars</TT> where new characters
   2460    * will be obtained
   2461    * @param srcLimit the offset immediately following the range to copy
   2462    * in <TT>srcText</TT>
   2463    * @return a reference to this
   2464    * @stable ICU 2.0
   2465    */
   2466   inline UnicodeString& replaceBetween(int32_t start,
   2467                 int32_t limit,
   2468                 const UnicodeString& srcText,
   2469                 int32_t srcStart,
   2470                 int32_t srcLimit);
   2471 
   2472   /**
   2473    * Replace a substring of this object with the given text.
   2474    * @param start the beginning index, inclusive; <code>0 <= start
   2475    * <= limit</code>.
   2476    * @param limit the ending index, exclusive; <code>start <= limit
   2477    * <= length()</code>.
   2478    * @param text the text to replace characters <code>start</code>
   2479    * to <code>limit - 1</code>
   2480    * @stable ICU 2.0
   2481    */
   2482   virtual void handleReplaceBetween(int32_t start,
   2483                                     int32_t limit,
   2484                                     const UnicodeString& text);
   2485 
   2486   /**
   2487    * Replaceable API
   2488    * @return TRUE if it has MetaData
   2489    * @stable ICU 2.4
   2490    */
   2491   virtual UBool hasMetaData() const;
   2492 
   2493   /**
   2494    * Copy a substring of this object, retaining attribute (out-of-band)
   2495    * information.  This method is used to duplicate or reorder substrings.
   2496    * The destination index must not overlap the source range.
   2497    *
   2498    * @param start the beginning index, inclusive; <code>0 <= start <=
   2499    * limit</code>.
   2500    * @param limit the ending index, exclusive; <code>start <= limit <=
   2501    * length()</code>.
   2502    * @param dest the destination index.  The characters from
   2503    * <code>start..limit-1</code> will be copied to <code>dest</code>.
   2504    * Implementations of this method may assume that <code>dest <= start ||
   2505    * dest >= limit</code>.
   2506    * @stable ICU 2.0
   2507    */
   2508   virtual void copy(int32_t start, int32_t limit, int32_t dest);
   2509 
   2510   /* Search and replace operations */
   2511 
   2512   /**
   2513    * Replace all occurrences of characters in oldText with the characters
   2514    * in newText
   2515    * @param oldText the text containing the search text
   2516    * @param newText the text containing the replacement text
   2517    * @return a reference to this
   2518    * @stable ICU 2.0
   2519    */
   2520   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
   2521                 const UnicodeString& newText);
   2522 
   2523   /**
   2524    * Replace all occurrences of characters in oldText with characters
   2525    * in newText
   2526    * in the range [<TT>start</TT>, <TT>start + length</TT>).
   2527    * @param start the start of the range in which replace will performed
   2528    * @param length the length of the range in which replace will be performed
   2529    * @param oldText the text containing the search text
   2530    * @param newText the text containing the replacement text
   2531    * @return a reference to this
   2532    * @stable ICU 2.0
   2533    */
   2534   inline UnicodeString& findAndReplace(int32_t start,
   2535                 int32_t length,
   2536                 const UnicodeString& oldText,
   2537                 const UnicodeString& newText);
   2538 
   2539   /**
   2540    * Replace all occurrences of characters in oldText in the range
   2541    * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
   2542    * in newText in the range
   2543    * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
   2544    * in the range [<TT>start</TT>, <TT>start + length</TT>).
   2545    * @param start the start of the range in which replace will performed
   2546    * @param length the length of the range in which replace will be performed
   2547    * @param oldText the text containing the search text
   2548    * @param oldStart the start of the search range in <TT>oldText</TT>
   2549    * @param oldLength the length of the search range in <TT>oldText</TT>
   2550    * @param newText the text containing the replacement text
   2551    * @param newStart the start of the replacement range in <TT>newText</TT>
   2552    * @param newLength the length of the replacement range in <TT>newText</TT>
   2553    * @return a reference to this
   2554    * @stable ICU 2.0
   2555    */
   2556   UnicodeString& findAndReplace(int32_t start,
   2557                 int32_t length,
   2558                 const UnicodeString& oldText,
   2559                 int32_t oldStart,
   2560                 int32_t oldLength,
   2561                 const UnicodeString& newText,
   2562                 int32_t newStart,
   2563                 int32_t newLength);
   2564 
   2565 
   2566   /* Remove operations */
   2567 
   2568   /**
   2569    * Remove all characters from the UnicodeString object.
   2570    * @return a reference to this
   2571    * @stable ICU 2.0
   2572    */
   2573   inline UnicodeString& remove(void);
   2574 
   2575   /**
   2576    * Remove the characters in the range
   2577    * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
   2578    * @param start the offset of the first character to remove
   2579    * @param length the number of characters to remove
   2580    * @return a reference to this
   2581    * @stable ICU 2.0
   2582    */
   2583   inline UnicodeString& remove(int32_t start,
   2584                                int32_t length = (int32_t)INT32_MAX);
   2585 
   2586   /**
   2587    * Remove the characters in the range
   2588    * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
   2589    * @param start the offset of the first character to remove
   2590    * @param limit the offset immediately following the range to remove
   2591    * @return a reference to this
   2592    * @stable ICU 2.0
   2593    */
   2594   inline UnicodeString& removeBetween(int32_t start,
   2595                                       int32_t limit = (int32_t)INT32_MAX);
   2596 
   2597   /**
   2598    * Retain only the characters in the range
   2599    * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
   2600    * Removes characters before <code>start</code> and at and after <code>limit</code>.
   2601    * @param start the offset of the first character to retain
   2602    * @param limit the offset immediately following the range to retain
   2603    * @return a reference to this
   2604    * @stable ICU 4.4
   2605    */
   2606   inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
   2607 
   2608   /* Length operations */
   2609 
   2610   /**
   2611    * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
   2612    * If the length of this UnicodeString is less than targetLength,
   2613    * length() - targetLength copies of padChar will be added to the
   2614    * beginning of this UnicodeString.
   2615    * @param targetLength the desired length of the string
   2616    * @param padChar the character to use for padding. Defaults to
   2617    * space (U+0020)
   2618    * @return TRUE if the text was padded, FALSE otherwise.
   2619    * @stable ICU 2.0
   2620    */
   2621   UBool padLeading(int32_t targetLength,
   2622                     UChar padChar = 0x0020);
   2623 
   2624   /**
   2625    * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
   2626    * If the length of this UnicodeString is less than targetLength,
   2627    * length() - targetLength copies of padChar will be added to the
   2628    * end of this UnicodeString.
   2629    * @param targetLength the desired length of the string
   2630    * @param padChar the character to use for padding. Defaults to
   2631    * space (U+0020)
   2632    * @return TRUE if the text was padded, FALSE otherwise.
   2633    * @stable ICU 2.0
   2634    */
   2635   UBool padTrailing(int32_t targetLength,
   2636                      UChar padChar = 0x0020);
   2637 
   2638   /**
   2639    * Truncate this UnicodeString to the <TT>targetLength</TT>.
   2640    * @param targetLength the desired length of this UnicodeString.
   2641    * @return TRUE if the text was truncated, FALSE otherwise
   2642    * @stable ICU 2.0
   2643    */
   2644   inline UBool truncate(int32_t targetLength);
   2645 
   2646   /**
   2647    * Trims leading and trailing whitespace from this UnicodeString.
   2648    * @return a reference to this
   2649    * @stable ICU 2.0
   2650    */
   2651   UnicodeString& trim(void);
   2652 
   2653 
   2654   /* Miscellaneous operations */
   2655 
   2656   /**
   2657    * Reverse this UnicodeString in place.
   2658    * @return a reference to this
   2659    * @stable ICU 2.0
   2660    */
   2661   inline UnicodeString& reverse(void);
   2662 
   2663   /**
   2664    * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
   2665    * this UnicodeString.
   2666    * @param start the start of the range to reverse
   2667    * @param length the number of characters to to reverse
   2668    * @return a reference to this
   2669    * @stable ICU 2.0
   2670    */
   2671   inline UnicodeString& reverse(int32_t start,
   2672              int32_t length);
   2673 
   2674   /**
   2675    * Convert the characters in this to UPPER CASE following the conventions of
   2676    * the default locale.
   2677    * @return A reference to this.
   2678    * @stable ICU 2.0
   2679    */
   2680   UnicodeString& toUpper(void);
   2681 
   2682   /**
   2683    * Convert the characters in this to UPPER CASE following the conventions of
   2684    * a specific locale.
   2685    * @param locale The locale containing the conventions to use.
   2686    * @return A reference to this.
   2687    * @stable ICU 2.0
   2688    */
   2689   UnicodeString& toUpper(const Locale& locale);
   2690 
   2691   /**
   2692    * Convert the characters in this to lower case following the conventions of
   2693    * the default locale.
   2694    * @return A reference to this.
   2695    * @stable ICU 2.0
   2696    */
   2697   UnicodeString& toLower(void);
   2698 
   2699   /**
   2700    * Convert the characters in this to lower case following the conventions of
   2701    * a specific locale.
   2702    * @param locale The locale containing the conventions to use.
   2703    * @return A reference to this.
   2704    * @stable ICU 2.0
   2705    */
   2706   UnicodeString& toLower(const Locale& locale);
   2707 
   2708 #if !UCONFIG_NO_BREAK_ITERATION
   2709 
   2710   /**
   2711    * Titlecase this string, convenience function using the default locale.
   2712    *
   2713    * Casing is locale-dependent and context-sensitive.
   2714    * Titlecasing uses a break iterator to find the first characters of words
   2715    * that are to be titlecased. It titlecases those characters and lowercases
   2716    * all others.
   2717    *
   2718    * The titlecase break iterator can be provided to customize for arbitrary
   2719    * styles, using rules and dictionaries beyond the standard iterators.
   2720    * It may be more efficient to always provide an iterator to avoid
   2721    * opening and closing one for each string.
   2722    * The standard titlecase iterator for the root locale implements the
   2723    * algorithm of Unicode TR 21.
   2724    *
   2725    * This function uses only the setText(), first() and next() methods of the
   2726    * provided break iterator.
   2727    *
   2728    * @param titleIter A break iterator to find the first characters of words
   2729    *                  that are to be titlecased.
   2730    *                  If none is provided (0), then a standard titlecase
   2731    *                  break iterator is opened.
   2732    *                  Otherwise the provided iterator is set to the string's text.
   2733    * @return A reference to this.
   2734    * @stable ICU 2.1
   2735    */
   2736   UnicodeString &toTitle(BreakIterator *titleIter);
   2737 
   2738   /**
   2739    * Titlecase this string.
   2740    *
   2741    * Casing is locale-dependent and context-sensitive.
   2742    * Titlecasing uses a break iterator to find the first characters of words
   2743    * that are to be titlecased. It titlecases those characters and lowercases
   2744    * all others.
   2745    *
   2746    * The titlecase break iterator can be provided to customize for arbitrary
   2747    * styles, using rules and dictionaries beyond the standard iterators.
   2748    * It may be more efficient to always provide an iterator to avoid
   2749    * opening and closing one for each string.
   2750    * The standard titlecase iterator for the root locale implements the
   2751    * algorithm of Unicode TR 21.
   2752    *
   2753    * This function uses only the setText(), first() and next() methods of the
   2754    * provided break iterator.
   2755    *
   2756    * @param titleIter A break iterator to find the first characters of words
   2757    *                  that are to be titlecased.
   2758    *                  If none is provided (0), then a standard titlecase
   2759    *                  break iterator is opened.
   2760    *                  Otherwise the provided iterator is set to the string's text.
   2761    * @param locale    The locale to consider.
   2762    * @return A reference to this.
   2763    * @stable ICU 2.1
   2764    */
   2765   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
   2766 
   2767   /**
   2768    * Titlecase this string, with options.
   2769    *
   2770    * Casing is locale-dependent and context-sensitive.
   2771    * Titlecasing uses a break iterator to find the first characters of words
   2772    * that are to be titlecased. It titlecases those characters and lowercases
   2773    * all others. (This can be modified with options.)
   2774    *
   2775    * The titlecase break iterator can be provided to customize for arbitrary
   2776    * styles, using rules and dictionaries beyond the standard iterators.
   2777    * It may be more efficient to always provide an iterator to avoid
   2778    * opening and closing one for each string.
   2779    * The standard titlecase iterator for the root locale implements the
   2780    * algorithm of Unicode TR 21.
   2781    *
   2782    * This function uses only the setText(), first() and next() methods of the
   2783    * provided break iterator.
   2784    *
   2785    * @param titleIter A break iterator to find the first characters of words
   2786    *                  that are to be titlecased.
   2787    *                  If none is provided (0), then a standard titlecase
   2788    *                  break iterator is opened.
   2789    *                  Otherwise the provided iterator is set to the string's text.
   2790    * @param locale    The locale to consider.
   2791    * @param options Options bit set, see ucasemap_open().
   2792    * @return A reference to this.
   2793    * @see U_TITLECASE_NO_LOWERCASE
   2794    * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
   2795    * @see ucasemap_open
   2796    * @stable ICU 3.8
   2797    */
   2798   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
   2799 
   2800 #endif
   2801 
   2802   /**
   2803    * Case-folds the characters in this string.
   2804    *
   2805    * Case-folding is locale-independent and not context-sensitive,
   2806    * but there is an option for whether to include or exclude mappings for dotted I
   2807    * and dotless i that are marked with 'T' in CaseFolding.txt.
   2808    *
   2809    * The result may be longer or shorter than the original.
   2810    *
   2811    * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
   2812    * @return A reference to this.
   2813    * @stable ICU 2.0
   2814    */
   2815   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
   2816 
   2817   //========================================
   2818   // Access to the internal buffer
   2819   //========================================
   2820 
   2821   /**
   2822    * Get a read/write pointer to the internal buffer.
   2823    * The buffer is guaranteed to be large enough for at least minCapacity UChars,
   2824    * writable, and is still owned by the UnicodeString object.
   2825    * Calls to getBuffer(minCapacity) must not be nested, and
   2826    * must be matched with calls to releaseBuffer(newLength).
   2827    * If the string buffer was read-only or shared,
   2828    * then it will be reallocated and copied.
   2829    *
   2830    * An attempted nested call will return 0, and will not further modify the
   2831    * state of the UnicodeString object.
   2832    * It also returns 0 if the string is bogus.
   2833    *
   2834    * The actual capacity of the string buffer may be larger than minCapacity.
   2835    * getCapacity() returns the actual capacity.
   2836    * For many operations, the full capacity should be used to avoid reallocations.
   2837    *
   2838    * While the buffer is "open" between getBuffer(minCapacity)
   2839    * and releaseBuffer(newLength), the following applies:
   2840    * - The string length is set to 0.
   2841    * - Any read API call on the UnicodeString object will behave like on a 0-length string.
   2842    * - Any write API call on the UnicodeString object is disallowed and will have no effect.
   2843    * - You can read from and write to the returned buffer.
   2844    * - The previous string contents will still be in the buffer;
   2845    *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
   2846    *   If the length() was greater than minCapacity, then any contents after minCapacity
   2847    *   may be lost.
   2848    *   The buffer contents is not NUL-terminated by getBuffer().
   2849    *   If length()<getCapacity() then you can terminate it by writing a NUL
   2850    *   at index length().
   2851    * - You must call releaseBuffer(newLength) before and in order to
   2852    *   return to normal UnicodeString operation.
   2853    *
   2854    * @param minCapacity the minimum number of UChars that are to be available
   2855    *        in the buffer, starting at the returned pointer;
   2856    *        default to the current string capacity if minCapacity==-1
   2857    * @return a writable pointer to the internal string buffer,
   2858    *         or 0 if an error occurs (nested calls, out of memory)
   2859    *
   2860    * @see releaseBuffer
   2861    * @see getTerminatedBuffer()
   2862    * @stable ICU 2.0
   2863    */
   2864   UChar *getBuffer(int32_t minCapacity);
   2865 
   2866   /**
   2867    * Release a read/write buffer on a UnicodeString object with an
   2868    * "open" getBuffer(minCapacity).
   2869    * This function must be called in a matched pair with getBuffer(minCapacity).
   2870    * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
   2871    *
   2872    * It will set the string length to newLength, at most to the current capacity.
   2873    * If newLength==-1 then it will set the length according to the
   2874    * first NUL in the buffer, or to the capacity if there is no NUL.
   2875    *
   2876    * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
   2877    *
   2878    * @param newLength the new length of the UnicodeString object;
   2879    *        defaults to the current capacity if newLength is greater than that;
   2880    *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
   2881    *        the current capacity of the string
   2882    *
   2883    * @see getBuffer(int32_t minCapacity)
   2884    * @stable ICU 2.0
   2885    */
   2886   void releaseBuffer(int32_t newLength=-1);
   2887 
   2888   /**
   2889    * Get a read-only pointer to the internal buffer.
   2890    * This can be called at any time on a valid UnicodeString.
   2891    *
   2892    * It returns 0 if the string is bogus, or
   2893    * during an "open" getBuffer(minCapacity).
   2894    *
   2895    * It can be called as many times as desired.
   2896    * The pointer that it returns will remain valid until the UnicodeString object is modified,
   2897    * at which time the pointer is semantically invalidated and must not be used any more.
   2898    *
   2899    * The capacity of the buffer can be determined with getCapacity().
   2900    * The part after length() may or may not be initialized and valid,
   2901    * depending on the history of the UnicodeString object.
   2902    *
   2903    * The buffer contents is (probably) not NUL-terminated.
   2904    * You can check if it is with
   2905    * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
   2906    * (See getTerminatedBuffer().)
   2907    *
   2908    * The buffer may reside in read-only memory. Its contents must not
   2909    * be modified.
   2910    *
   2911    * @return a read-only pointer to the internal string buffer,
   2912    *         or 0 if the string is empty or bogus
   2913    *
   2914    * @see getBuffer(int32_t minCapacity)
   2915    * @see getTerminatedBuffer()
   2916    * @stable ICU 2.0
   2917    */
   2918   inline const UChar *getBuffer() const;
   2919 
   2920   /**
   2921    * Get a read-only pointer to the internal buffer,
   2922    * making sure that it is NUL-terminated.
   2923    * This can be called at any time on a valid UnicodeString.
   2924    *
   2925    * It returns 0 if the string is bogus, or
   2926    * during an "open" getBuffer(minCapacity), or if the buffer cannot
   2927    * be NUL-terminated (because memory allocation failed).
   2928    *
   2929    * It can be called as many times as desired.
   2930    * The pointer that it returns will remain valid until the UnicodeString object is modified,
   2931    * at which time the pointer is semantically invalidated and must not be used any more.
   2932    *
   2933    * The capacity of the buffer can be determined with getCapacity().
   2934    * The part after length()+1 may or may not be initialized and valid,
   2935    * depending on the history of the UnicodeString object.
   2936    *
   2937    * The buffer contents is guaranteed to be NUL-terminated.
   2938    * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
   2939    * is written.
   2940    * For this reason, this function is not const, unlike getBuffer().
   2941    * Note that a UnicodeString may also contain NUL characters as part of its contents.
   2942    *
   2943    * The buffer may reside in read-only memory. Its contents must not
   2944    * be modified.
   2945    *
   2946    * @return a read-only pointer to the internal string buffer,
   2947    *         or 0 if the string is empty or bogus
   2948    *
   2949    * @see getBuffer(int32_t minCapacity)
   2950    * @see getBuffer()
   2951    * @stable ICU 2.2
   2952    */
   2953   const UChar *getTerminatedBuffer();
   2954 
   2955   //========================================
   2956   // Constructors
   2957   //========================================
   2958 
   2959   /** Construct an empty UnicodeString.
   2960    * @stable ICU 2.0
   2961    */
   2962   inline UnicodeString();
   2963 
   2964   /**
   2965    * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
   2966    * @param capacity the number of UChars this UnicodeString should hold
   2967    * before a resize is necessary; if count is greater than 0 and count
   2968    * code points c take up more space than capacity, then capacity is adjusted
   2969    * accordingly.
   2970    * @param c is used to initially fill the string
   2971    * @param count specifies how many code points c are to be written in the
   2972    *              string
   2973    * @stable ICU 2.0
   2974    */
   2975   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
   2976 
   2977   /**
   2978    * Single UChar (code unit) constructor.
   2979    *
   2980    * It is recommended to mark this constructor "explicit" by
   2981    * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
   2982    * on the compiler command line or similar.
   2983    * @param ch the character to place in the UnicodeString
   2984    * @stable ICU 2.0
   2985    */
   2986   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
   2987 
   2988   /**
   2989    * Single UChar32 (code point) constructor.
   2990    *
   2991    * It is recommended to mark this constructor "explicit" by
   2992    * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
   2993    * on the compiler command line or similar.
   2994    * @param ch the character to place in the UnicodeString
   2995    * @stable ICU 2.0
   2996    */
   2997   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
   2998 
   2999   /**
   3000    * UChar* constructor.
   3001    *
   3002    * It is recommended to mark this constructor "explicit" by
   3003    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
   3004    * on the compiler command line or similar.
   3005    * @param text The characters to place in the UnicodeString.  <TT>text</TT>
   3006    * must be NULL (U+0000) terminated.
   3007    * @stable ICU 2.0
   3008    */
   3009   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
   3010 
   3011   /**
   3012    * UChar* constructor.
   3013    * @param text The characters to place in the UnicodeString.
   3014    * @param textLength The number of Unicode characters in <TT>text</TT>
   3015    * to copy.
   3016    * @stable ICU 2.0
   3017    */
   3018   UnicodeString(const UChar *text,
   3019         int32_t textLength);
   3020 
   3021   /**
   3022    * Readonly-aliasing UChar* constructor.
   3023    * The text will be used for the UnicodeString object, but
   3024    * it will not be released when the UnicodeString is destroyed.
   3025    * This has copy-on-write semantics:
   3026    * When the string is modified, then the buffer is first copied into
   3027    * newly allocated memory.
   3028    * The aliased buffer is never modified.
   3029    *
   3030    * In an assignment to another UnicodeString, when using the copy constructor
   3031    * or the assignment operator, the text will be copied.
   3032    * When using fastCopyFrom(), the text will be aliased again,
   3033    * so that both strings then alias the same readonly-text.
   3034    *
   3035    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
   3036    *                     This must be true if <code>textLength==-1</code>.
   3037    * @param text The characters to alias for the UnicodeString.
   3038    * @param textLength The number of Unicode characters in <code>text</code> to alias.
   3039    *                   If -1, then this constructor will determine the length
   3040    *                   by calling <code>u_strlen()</code>.
   3041    * @stable ICU 2.0
   3042    */
   3043   UnicodeString(UBool isTerminated,
   3044                 const UChar *text,
   3045                 int32_t textLength);
   3046 
   3047   /**
   3048    * Writable-aliasing UChar* constructor.
   3049    * The text will be used for the UnicodeString object, but
   3050    * it will not be released when the UnicodeString is destroyed.
   3051    * This has write-through semantics:
   3052    * For as long as the capacity of the buffer is sufficient, write operations
   3053    * will directly affect the buffer. When more capacity is necessary, then
   3054    * a new buffer will be allocated and the contents copied as with regularly
   3055    * constructed strings.
   3056    * In an assignment to another UnicodeString, the buffer will be copied.
   3057    * The extract(UChar *dst) function detects whether the dst pointer is the same
   3058    * as the string buffer itself and will in this case not copy the contents.
   3059    *
   3060    * @param buffer The characters to alias for the UnicodeString.
   3061    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
   3062    * @param buffCapacity The size of <code>buffer</code> in UChars.
   3063    * @stable ICU 2.0
   3064    */
   3065   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
   3066 
   3067 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
   3068 
   3069   /**
   3070    * char* constructor.
   3071    * Uses the default converter (and thus depends on the ICU conversion code)
   3072    * unless U_CHARSET_IS_UTF8 is set to 1.
   3073    *
   3074    * For ASCII (really "invariant character") strings it is more efficient to use
   3075    * the constructor that takes a US_INV (for its enum EInvariant).
   3076    * For ASCII (invariant-character) string literals, see UNICODE_STRING and
   3077    * UNICODE_STRING_SIMPLE.
   3078    *
   3079    * It is recommended to mark this constructor "explicit" by
   3080    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
   3081    * on the compiler command line or similar.
   3082    * @param codepageData an array of bytes, null-terminated,
   3083    *                     in the platform's default codepage.
   3084    * @stable ICU 2.0
   3085    * @see UNICODE_STRING
   3086    * @see UNICODE_STRING_SIMPLE
   3087    */
   3088   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
   3089 
   3090   /**
   3091    * char* constructor.
   3092    * Uses the default converter (and thus depends on the ICU conversion code)
   3093    * unless U_CHARSET_IS_UTF8 is set to 1.
   3094    * @param codepageData an array of bytes in the platform's default codepage.
   3095    * @param dataLength The number of bytes in <TT>codepageData</TT>.
   3096    * @stable ICU 2.0
   3097    */
   3098   UnicodeString(const char *codepageData, int32_t dataLength);
   3099 
   3100 #endif
   3101 
   3102 #if !UCONFIG_NO_CONVERSION
   3103 
   3104   /**
   3105    * char* constructor.
   3106    * @param codepageData an array of bytes, null-terminated
   3107    * @param codepage the encoding of <TT>codepageData</TT>.  The special
   3108    * value 0 for <TT>codepage</TT> indicates that the text is in the
   3109    * platform's default codepage.
   3110    *
   3111    * If <code>codepage</code> is an empty string (<code>""</code>),
   3112    * then a simple conversion is performed on the codepage-invariant
   3113    * subset ("invariant characters") of the platform encoding. See utypes.h.
   3114    * Recommendation: For invariant-character strings use the constructor
   3115    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
   3116    * because it avoids object code dependencies of UnicodeString on
   3117    * the conversion code.
   3118    *
   3119    * @stable ICU 2.0
   3120    */
   3121   UnicodeString(const char *codepageData, const char *codepage);
   3122 
   3123   /**
   3124    * char* constructor.
   3125    * @param codepageData an array of bytes.
   3126    * @param dataLength The number of bytes in <TT>codepageData</TT>.
   3127    * @param codepage the encoding of <TT>codepageData</TT>.  The special
   3128    * value 0 for <TT>codepage</TT> indicates that the text is in the
   3129    * platform's default codepage.
   3130    * If <code>codepage</code> is an empty string (<code>""</code>),
   3131    * then a simple conversion is performed on the codepage-invariant
   3132    * subset ("invariant characters") of the platform encoding. See utypes.h.
   3133    * Recommendation: For invariant-character strings use the constructor
   3134    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
   3135    * because it avoids object code dependencies of UnicodeString on
   3136    * the conversion code.
   3137    *
   3138    * @stable ICU 2.0
   3139    */
   3140   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
   3141 
   3142   /**
   3143    * char * / UConverter constructor.
   3144    * This constructor uses an existing UConverter object to
   3145    * convert the codepage string to Unicode and construct a UnicodeString
   3146    * from that.
   3147    *
   3148    * The converter is reset at first.
   3149    * If the error code indicates a failure before this constructor is called,
   3150    * or if an error occurs during conversion or construction,
   3151    * then the string will be bogus.
   3152    *
   3153    * This function avoids the overhead of opening and closing a converter if
   3154    * multiple strings are constructed.
   3155    *
   3156    * @param src input codepage string
   3157    * @param srcLength length of the input string, can be -1 for NUL-terminated strings
   3158    * @param cnv converter object (ucnv_resetToUnicode() will be called),
   3159    *        can be NULL for the default converter
   3160    * @param errorCode normal ICU error code
   3161    * @stable ICU 2.0
   3162    */
   3163   UnicodeString(
   3164         const char *src, int32_t srcLength,
   3165         UConverter *cnv,
   3166         UErrorCode &errorCode);
   3167 
   3168 #endif
   3169 
   3170   /**
   3171    * Constructs a Unicode string from an invariant-character char * string.
   3172    * About invariant characters see utypes.h.
   3173    * This constructor has no runtime dependency on conversion code and is
   3174    * therefore recommended over ones taking a charset name string
   3175    * (where the empty string "" indicates invariant-character conversion).
   3176    *
   3177    * Use the macro US_INV as the third, signature-distinguishing parameter.
   3178    *
   3179    * For example:
   3180    * \code
   3181    * void fn(const char *s) {
   3182    *   UnicodeString ustr(s, -1, US_INV);
   3183    *   // use ustr ...
   3184    * }
   3185    * \endcode
   3186    *
   3187    * @param src String using only invariant characters.
   3188    * @param length Length of src, or -1 if NUL-terminated.
   3189    * @param inv Signature-distinguishing paramater, use US_INV.
   3190    *
   3191    * @see US_INV
   3192    * @stable ICU 3.2
   3193    */
   3194   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
   3195 
   3196 
   3197   /**
   3198    * Copy constructor.
   3199    *
   3200    * Starting with ICU 2.4, the assignment operator and the copy constructor
   3201    * allocate a new buffer and copy the buffer contents even for readonly aliases.
   3202    * By contrast, the fastCopyFrom() function implements the old,
   3203    * more efficient but less safe behavior
   3204    * of making this string also a readonly alias to the same buffer.
   3205    *
   3206    * If the source object has an "open" buffer from getBuffer(minCapacity),
   3207    * then the copy is an empty string.
   3208    *
   3209    * @param that The UnicodeString object to copy.
   3210    * @stable ICU 2.0
   3211    * @see fastCopyFrom
   3212    */
   3213   UnicodeString(const UnicodeString& that);
   3214 
   3215 #ifndef U_HIDE_DRAFT_API
   3216 #if U_HAVE_RVALUE_REFERENCES
   3217   /**
   3218    * Move constructor, might leave src in bogus state.
   3219    * This string will have the same contents and state that the source string had.
   3220    * @param src source string
   3221    * @draft ICU 56
   3222    */
   3223   UnicodeString(UnicodeString &&src) U_NOEXCEPT;
   3224 #endif
   3225 #endif  /* U_HIDE_DRAFT_API */
   3226 
   3227   /**
   3228    * 'Substring' constructor from tail of source string.
   3229    * @param src The UnicodeString object to copy.
   3230    * @param srcStart The offset into <tt>src</tt> at which to start copying.
   3231    * @stable ICU 2.2
   3232    */
   3233   UnicodeString(const UnicodeString& src, int32_t srcStart);
   3234 
   3235   /**
   3236    * 'Substring' constructor from subrange of source string.
   3237    * @param src The UnicodeString object to copy.
   3238    * @param srcStart The offset into <tt>src</tt> at which to start copying.
   3239    * @param srcLength The number of characters from <tt>src</tt> to copy.
   3240    * @stable ICU 2.2
   3241    */
   3242   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
   3243 
   3244   /**
   3245    * Clone this object, an instance of a subclass of Replaceable.
   3246    * Clones can be used concurrently in multiple threads.
   3247    * If a subclass does not implement clone(), or if an error occurs,
   3248    * then NULL is returned.
   3249    * The clone functions in all subclasses return a pointer to a Replaceable
   3250    * because some compilers do not support covariant (same-as-this)
   3251    * return types; cast to the appropriate subclass if necessary.
   3252    * The caller must delete the clone.
   3253    *
   3254    * @return a clone of this object
   3255    *
   3256    * @see Replaceable::clone
   3257    * @see getDynamicClassID
   3258    * @stable ICU 2.6
   3259    */
   3260   virtual Replaceable *clone() const;
   3261 
   3262   /** Destructor.
   3263    * @stable ICU 2.0
   3264    */
   3265   virtual ~UnicodeString();
   3266 
   3267   /**
   3268    * Create a UnicodeString from a UTF-8 string.
   3269    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
   3270    * Calls u_strFromUTF8WithSub().
   3271    *
   3272    * @param utf8 UTF-8 input string.
   3273    *             Note that a StringPiece can be implicitly constructed
   3274    *             from a std::string or a NUL-terminated const char * string.
   3275    * @return A UnicodeString with equivalent UTF-16 contents.
   3276    * @see toUTF8
   3277    * @see toUTF8String
   3278    * @stable ICU 4.2
   3279    */
   3280   static UnicodeString fromUTF8(const StringPiece &utf8);
   3281 
   3282   /**
   3283    * Create a UnicodeString from a UTF-32 string.
   3284    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
   3285    * Calls u_strFromUTF32WithSub().
   3286    *
   3287    * @param utf32 UTF-32 input string. Must not be NULL.
   3288    * @param length Length of the input string, or -1 if NUL-terminated.
   3289    * @return A UnicodeString with equivalent UTF-16 contents.
   3290    * @see toUTF32
   3291    * @stable ICU 4.2
   3292    */
   3293   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
   3294 
   3295   /* Miscellaneous operations */
   3296 
   3297   /**
   3298    * Unescape a string of characters and return a string containing
   3299    * the result.  The following escape sequences are recognized:
   3300    *
   3301    * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
   3302    * \\Uhhhhhhhh   8 hex digits
   3303    * \\xhh         1-2 hex digits
   3304    * \\ooo         1-3 octal digits; o in [0-7]
   3305    * \\cX          control-X; X is masked with 0x1F
   3306    *
   3307    * as well as the standard ANSI C escapes:
   3308    *
   3309    * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
   3310    * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
   3311    * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
   3312    *
   3313    * Anything else following a backslash is generically escaped.  For
   3314    * example, "[a\\-z]" returns "[a-z]".
   3315    *
   3316    * If an escape sequence is ill-formed, this method returns an empty
   3317    * string.  An example of an ill-formed sequence is "\\u" followed by
   3318    * fewer than 4 hex digits.
   3319    *
   3320    * This function is similar to u_unescape() but not identical to it.
   3321    * The latter takes a source char*, so it does escape recognition
   3322    * and also invariant conversion.
   3323    *
   3324    * @return a string with backslash escapes interpreted, or an
   3325    * empty string on error.
   3326    * @see UnicodeString#unescapeAt()
   3327    * @see u_unescape()
   3328    * @see u_unescapeAt()
   3329    * @stable ICU 2.0
   3330    */
   3331   UnicodeString unescape() const;
   3332 
   3333   /**
   3334    * Unescape a single escape sequence and return the represented
   3335    * character.  See unescape() for a listing of the recognized escape
   3336    * sequences.  The character at offset-1 is assumed (without
   3337    * checking) to be a backslash.  If the escape sequence is
   3338    * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
   3339    * returned.
   3340    *
   3341    * @param offset an input output parameter.  On input, it is the
   3342    * offset into this string where the escape sequence is located,
   3343    * after the initial backslash.  On output, it is advanced after the
   3344    * last character parsed.  On error, it is not advanced at all.
   3345    * @return the character represented by the escape sequence at
   3346    * offset, or U_SENTINEL=-1 on error.
   3347    * @see UnicodeString#unescape()
   3348    * @see u_unescape()
   3349    * @see u_unescapeAt()
   3350    * @stable ICU 2.0
   3351    */
   3352   UChar32 unescapeAt(int32_t &offset) const;
   3353 
   3354   /**
   3355    * ICU "poor man's RTTI", returns a UClassID for this class.
   3356    *
   3357    * @stable ICU 2.2
   3358    */
   3359   static UClassID U_EXPORT2 getStaticClassID();
   3360 
   3361   /**
   3362    * ICU "poor man's RTTI", returns a UClassID for the actual class.
   3363    *
   3364    * @stable ICU 2.2
   3365    */
   3366   virtual UClassID getDynamicClassID() const;
   3367 
   3368   //========================================
   3369   // Implementation methods
   3370   //========================================
   3371 
   3372 protected:
   3373   /**
   3374    * Implement Replaceable::getLength() (see jitterbug 1027).
   3375    * @stable ICU 2.4
   3376    */
   3377   virtual int32_t getLength() const;
   3378 
   3379   /**
   3380    * The change in Replaceable to use virtual getCharAt() allows
   3381    * UnicodeString::charAt() to be inline again (see jitterbug 709).
   3382    * @stable ICU 2.4
   3383    */
   3384   virtual UChar getCharAt(int32_t offset) const;
   3385 
   3386   /**
   3387    * The change in Replaceable to use virtual getChar32At() allows
   3388    * UnicodeString::char32At() to be inline again (see jitterbug 709).
   3389    * @stable ICU 2.4
   3390    */
   3391   virtual UChar32 getChar32At(int32_t offset) const;
   3392 
   3393 private:
   3394   // For char* constructors. Could be made public.
   3395   UnicodeString &setToUTF8(const StringPiece &utf8);
   3396   // For extract(char*).
   3397   // We could make a toUTF8(target, capacity, errorCode) public but not
   3398   // this version: New API will be cleaner if we make callers create substrings
   3399   // rather than having start+length on every method,
   3400   // and it should take a UErrorCode&.
   3401   int32_t
   3402   toUTF8(int32_t start, int32_t len,
   3403          char *target, int32_t capacity) const;
   3404 
   3405   /**
   3406    * Internal string contents comparison, called by operator==.
   3407    * Requires: this & text not bogus and have same lengths.
   3408    */
   3409   UBool doEquals(const UnicodeString &text, int32_t len) const;
   3410 
   3411   inline int8_t
   3412   doCompare(int32_t start,
   3413            int32_t length,
   3414            const UnicodeString& srcText,
   3415            int32_t srcStart,
   3416            int32_t srcLength) const;
   3417 
   3418   int8_t doCompare(int32_t start,
   3419            int32_t length,
   3420            const UChar *srcChars,
   3421            int32_t srcStart,
   3422            int32_t srcLength) const;
   3423 
   3424   inline int8_t
   3425   doCompareCodePointOrder(int32_t start,
   3426                           int32_t length,
   3427                           const UnicodeString& srcText,
   3428                           int32_t srcStart,
   3429                           int32_t srcLength) const;
   3430 
   3431   int8_t doCompareCodePointOrder(int32_t start,
   3432                                  int32_t length,
   3433                                  const UChar *srcChars,
   3434                                  int32_t srcStart,
   3435                                  int32_t srcLength) const;
   3436 
   3437   inline int8_t
   3438   doCaseCompare(int32_t start,
   3439                 int32_t length,
   3440                 const UnicodeString &srcText,
   3441                 int32_t srcStart,
   3442                 int32_t srcLength,
   3443                 uint32_t options) const;
   3444 
   3445   int8_t
   3446   doCaseCompare(int32_t start,
   3447                 int32_t length,
   3448                 const UChar *srcChars,
   3449                 int32_t srcStart,
   3450                 int32_t srcLength,
   3451                 uint32_t options) const;
   3452 
   3453   int32_t doIndexOf(UChar c,
   3454             int32_t start,
   3455             int32_t length) const;
   3456 
   3457   int32_t doIndexOf(UChar32 c,
   3458                         int32_t start,
   3459                         int32_t length) const;
   3460 
   3461   int32_t doLastIndexOf(UChar c,
   3462                 int32_t start,
   3463                 int32_t length) const;
   3464 
   3465   int32_t doLastIndexOf(UChar32 c,
   3466                             int32_t start,
   3467                             int32_t length) const;
   3468 
   3469   void doExtract(int32_t start,
   3470          int32_t length,
   3471          UChar *dst,
   3472          int32_t dstStart) const;
   3473 
   3474   inline void doExtract(int32_t start,
   3475          int32_t length,
   3476          UnicodeString& target) const;
   3477 
   3478   inline UChar doCharAt(int32_t offset)  const;
   3479 
   3480   UnicodeString& doReplace(int32_t start,
   3481                int32_t length,
   3482                const UnicodeString& srcText,
   3483                int32_t srcStart,
   3484                int32_t srcLength);
   3485 
   3486   UnicodeString& doReplace(int32_t start,
   3487                int32_t length,
   3488                const UChar *srcChars,
   3489                int32_t srcStart,
   3490                int32_t srcLength);
   3491 
   3492   UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
   3493   UnicodeString& doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength);
   3494 
   3495   UnicodeString& doReverse(int32_t start,
   3496                int32_t length);
   3497 
   3498   // calculate hash code
   3499   int32_t doHashCode(void) const;
   3500 
   3501   // get pointer to start of array
   3502   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
   3503   inline UChar* getArrayStart(void);
   3504   inline const UChar* getArrayStart(void) const;
   3505 
   3506   inline UBool hasShortLength() const;
   3507   inline int32_t getShortLength() const;
   3508 
   3509   // A UnicodeString object (not necessarily its current buffer)
   3510   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
   3511   inline UBool isWritable() const;
   3512 
   3513   // Is the current buffer writable?
   3514   inline UBool isBufferWritable() const;
   3515 
   3516   // None of the following does releaseArray().
   3517   inline void setZeroLength();
   3518   inline void setShortLength(int32_t len);
   3519   inline void setLength(int32_t len);
   3520   inline void setToEmpty();
   3521   inline void setArray(UChar *array, int32_t len, int32_t capacity); // sets length but not flags
   3522 
   3523   // allocate the array; result may be the stack buffer
   3524   // sets refCount to 1 if appropriate
   3525   // sets fArray, fCapacity, and flags
   3526   // sets length to 0
   3527   // returns boolean for success or failure
   3528   UBool allocate(int32_t capacity);
   3529 
   3530   // release the array if owned
   3531   void releaseArray(void);
   3532 
   3533   // turn a bogus string into an empty one
   3534   void unBogus();
   3535 
   3536   // implements assigment operator, copy constructor, and fastCopyFrom()
   3537   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
   3538 
   3539   // Copies just the fields without memory management.
   3540   void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
   3541 
   3542   // Pin start and limit to acceptable values.
   3543   inline void pinIndex(int32_t& start) const;
   3544   inline void pinIndices(int32_t& start,
   3545                          int32_t& length) const;
   3546 
   3547 #if !UCONFIG_NO_CONVERSION
   3548 
   3549   /* Internal extract() using UConverter. */
   3550   int32_t doExtract(int32_t start, int32_t length,
   3551                     char *dest, int32_t destCapacity,
   3552                     UConverter *cnv,
   3553                     UErrorCode &errorCode) const;
   3554 
   3555   /*
   3556    * Real constructor for converting from codepage data.
   3557    * It assumes that it is called with !fRefCounted.
   3558    *
   3559    * If <code>codepage==0</code>, then the default converter
   3560    * is used for the platform encoding.
   3561    * If <code>codepage</code> is an empty string (<code>""</code>),
   3562    * then a simple conversion is performed on the codepage-invariant
   3563    * subset ("invariant characters") of the platform encoding. See utypes.h.
   3564    */
   3565   void doCodepageCreate(const char *codepageData,
   3566                         int32_t dataLength,
   3567                         const char *codepage);
   3568 
   3569   /*
   3570    * Worker function for creating a UnicodeString from
   3571    * a codepage string using a UConverter.
   3572    */
   3573   void
   3574   doCodepageCreate(const char *codepageData,
   3575                    int32_t dataLength,
   3576                    UConverter *converter,
   3577                    UErrorCode &status);
   3578 
   3579 #endif
   3580 
   3581   /*
   3582    * This function is called when write access to the array
   3583    * is necessary.
   3584    *
   3585    * We need to make a copy of the array if
   3586    * the buffer is read-only, or
   3587    * the buffer is refCounted (shared), and refCount>1, or
   3588    * the buffer is too small.
   3589    *
   3590    * Return FALSE if memory could not be allocated.
   3591    */
   3592   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
   3593                             int32_t growCapacity = -1,
   3594                             UBool doCopyArray = TRUE,
   3595                             int32_t **pBufferToDelete = 0,
   3596                             UBool forceClone = FALSE);
   3597 
   3598   /**
   3599    * Common function for UnicodeString case mappings.
   3600    * The stringCaseMapper has the same type UStringCaseMapper
   3601    * as in ustr_imp.h for ustrcase_map().
   3602    */
   3603   UnicodeString &
   3604   caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
   3605 
   3606   // ref counting
   3607   void addRef(void);
   3608   int32_t removeRef(void);
   3609   int32_t refCount(void) const;
   3610 
   3611   // constants
   3612   enum {
   3613     /**
   3614      * Size of stack buffer for short strings.
   3615      * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
   3616      * @see UNISTR_OBJECT_SIZE
   3617      */
   3618     US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
   3619     kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
   3620     kGrowSize=128, // grow size for this buffer
   3621     kInvalidHashCode=0, // invalid hash code
   3622     kEmptyHashCode=1, // hash code for empty string
   3623 
   3624     // bit flag values for fLengthAndFlags
   3625     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
   3626     kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
   3627     kRefCounted=4,      // there is a refCount field before the characters in fArray
   3628     kBufferIsReadonly=8,// do not write to this buffer
   3629     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
   3630                         // and releaseBuffer(newLength) must be called
   3631     kAllStorageFlags=0x1f,
   3632 
   3633     kLengthShift=5,     // remaining 11 bits for non-negative short length, or negative if long
   3634     kLength1=1<<kLengthShift,
   3635     kMaxShortLength=0x3ff,  // max non-negative short length (leaves top bit 0)
   3636     kLengthIsLarge=0xffe0,  // short length < 0, real length is in fUnion.fFields.fLength
   3637 
   3638     // combined values for convenience
   3639     kShortString=kUsingStackBuffer,
   3640     kLongString=kRefCounted,
   3641     kReadonlyAlias=kBufferIsReadonly,
   3642     kWritableAlias=0
   3643   };
   3644 
   3645   friend class UnicodeStringAppendable;
   3646 
   3647   union StackBufferOrFields;        // forward declaration necessary before friend declaration
   3648   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
   3649 
   3650   /*
   3651    * The following are all the class fields that are stored
   3652    * in each UnicodeString object.
   3653    * Note that UnicodeString has virtual functions,
   3654    * therefore there is an implicit vtable pointer
   3655    * as the first real field.
   3656    * The fields should be aligned such that no padding is necessary.
   3657    * On 32-bit machines, the size should be 32 bytes,
   3658    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
   3659    *
   3660    * We use a hack to achieve this.
   3661    *
   3662    * With at least some compilers, each of the following is forced to
   3663    * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
   3664    * rounded up with additional padding if the fields do not already fit that requirement:
   3665    * - sizeof(class UnicodeString)
   3666    * - offsetof(UnicodeString, fUnion)
   3667    * - sizeof(fUnion)
   3668    * - sizeof(fStackFields)
   3669    *
   3670    * We optimize for the longest possible internal buffer for short strings.
   3671    * fUnion.fStackFields begins with 2 bytes for storage flags
   3672    * and the length of relatively short strings,
   3673    * followed by the buffer for short string contents.
   3674    * There is no padding inside fStackFields.
   3675    *
   3676    * Heap-allocated and aliased strings use fUnion.fFields.
   3677    * Both fStackFields and fFields must begin with the same fields for flags and short length,
   3678    * that is, those must have the same memory offsets inside the object,
   3679    * because the flags must be inspected in order to decide which half of fUnion is being used.
   3680    * We assume that the compiler does not reorder the fields.
   3681    *
   3682    * (Padding at the end of fFields is ok:
   3683    * As long as it is no larger than fStackFields, it is not wasted space.)
   3684    *
   3685    * For some of the history of the UnicodeString class fields layout, see
   3686    * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
   3687    * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
   3688    * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
   3689    */
   3690   // (implicit) *vtable;
   3691   union StackBufferOrFields {
   3692     // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
   3693     // Each struct of the union must begin with fLengthAndFlags.
   3694     struct {
   3695       int16_t fLengthAndFlags;          // bit fields: see constants above
   3696       UChar fBuffer[US_STACKBUF_SIZE];  // buffer for short strings
   3697     } fStackFields;
   3698     struct {
   3699       int16_t fLengthAndFlags;          // bit fields: see constants above
   3700       int32_t fLength;    // number of characters in fArray if >127; else undefined
   3701       int32_t fCapacity;  // capacity of fArray (in UChars)
   3702       // array pointer last to minimize padding for machines with P128 data model
   3703       // or pointer sizes that are not a power of 2
   3704       UChar   *fArray;    // the Unicode data
   3705     } fFields;
   3706   } fUnion;
   3707 };
   3708 
   3709 /**
   3710  * Create a new UnicodeString with the concatenation of two others.
   3711  *
   3712  * @param s1 The first string to be copied to the new one.
   3713  * @param s2 The second string to be copied to the new one, after s1.
   3714  * @return UnicodeString(s1).append(s2)
   3715  * @stable ICU 2.8
   3716  */
   3717 U_COMMON_API UnicodeString U_EXPORT2
   3718 operator+ (const UnicodeString &s1, const UnicodeString &s2);
   3719 
   3720 //========================================
   3721 // Inline members
   3722 //========================================
   3723 
   3724 //========================================
   3725 // Privates
   3726 //========================================
   3727 
   3728 inline void
   3729 UnicodeString::pinIndex(int32_t& start) const
   3730 {
   3731   // pin index
   3732   if(start < 0) {
   3733     start = 0;
   3734   } else if(start > length()) {
   3735     start = length();
   3736   }
   3737 }
   3738 
   3739 inline void
   3740 UnicodeString::pinIndices(int32_t& start,
   3741                           int32_t& _length) const
   3742 {
   3743   // pin indices
   3744   int32_t len = length();
   3745   if(start < 0) {
   3746     start = 0;
   3747   } else if(start > len) {
   3748     start = len;
   3749   }
   3750   if(_length < 0) {
   3751     _length = 0;
   3752   } else if(_length > (len - start)) {
   3753     _length = (len - start);
   3754   }
   3755 }
   3756 
   3757 inline UChar*
   3758 UnicodeString::getArrayStart() {
   3759   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
   3760     fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
   3761 }
   3762 
   3763 inline const UChar*
   3764 UnicodeString::getArrayStart() const {
   3765   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
   3766     fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
   3767 }
   3768 
   3769 //========================================
   3770 // Default constructor
   3771 //========================================
   3772 
   3773 inline
   3774 UnicodeString::UnicodeString() {
   3775   fUnion.fStackFields.fLengthAndFlags=kShortString;
   3776 }
   3777 
   3778 //========================================
   3779 // Read-only implementation methods
   3780 //========================================
   3781 inline UBool
   3782 UnicodeString::hasShortLength() const {
   3783   return fUnion.fFields.fLengthAndFlags>=0;
   3784 }
   3785 
   3786 inline int32_t
   3787 UnicodeString::getShortLength() const {
   3788   // fLengthAndFlags must be non-negative -> short length >= 0
   3789   // and arithmetic or logical shift does not matter.
   3790   return fUnion.fFields.fLengthAndFlags>>kLengthShift;
   3791 }
   3792 
   3793 inline int32_t
   3794 UnicodeString::length() const {
   3795   return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
   3796 }
   3797 
   3798 inline int32_t
   3799 UnicodeString::getCapacity() const {
   3800   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
   3801     US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
   3802 }
   3803 
   3804 inline int32_t
   3805 UnicodeString::hashCode() const
   3806 { return doHashCode(); }
   3807 
   3808 inline UBool
   3809 UnicodeString::isBogus() const
   3810 { return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
   3811 
   3812 inline UBool
   3813 UnicodeString::isWritable() const
   3814 { return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }
   3815 
   3816 inline UBool
   3817 UnicodeString::isBufferWritable() const
   3818 {
   3819   return (UBool)(
   3820       !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
   3821       (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
   3822 }
   3823 
   3824 inline const UChar *
   3825 UnicodeString::getBuffer() const {
   3826   if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
   3827     return 0;
   3828   } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
   3829     return fUnion.fStackFields.fBuffer;
   3830   } else {
   3831     return fUnion.fFields.fArray;
   3832   }
   3833 }
   3834 
   3835 //========================================
   3836 // Read-only alias methods
   3837 //========================================
   3838 inline int8_t
   3839 UnicodeString::doCompare(int32_t start,
   3840               int32_t thisLength,
   3841               const UnicodeString& srcText,
   3842               int32_t srcStart,
   3843               int32_t srcLength) const
   3844 {
   3845   if(srcText.isBogus()) {
   3846     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
   3847   } else {
   3848     srcText.pinIndices(srcStart, srcLength);
   3849     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
   3850   }
   3851 }
   3852 
   3853 inline UBool
   3854 UnicodeString::operator== (const UnicodeString& text) const
   3855 {
   3856   if(isBogus()) {
   3857     return text.isBogus();
   3858   } else {
   3859     int32_t len = length(), textLength = text.length();
   3860     return !text.isBogus() && len == textLength && doEquals(text, len);
   3861   }
   3862 }
   3863 
   3864 inline UBool
   3865 UnicodeString::operator!= (const UnicodeString& text) const
   3866 { return (! operator==(text)); }
   3867 
   3868 inline UBool
   3869 UnicodeString::operator> (const UnicodeString& text) const
   3870 { return doCompare(0, length(), text, 0, text.length()) == 1; }
   3871 
   3872 inline UBool
   3873 UnicodeString::operator< (const UnicodeString& text) const
   3874 { return doCompare(0, length(), text, 0, text.length()) == -1; }
   3875 
   3876 inline UBool
   3877 UnicodeString::operator>= (const UnicodeString& text) const
   3878 { return doCompare(0, length(), text, 0, text.length()) != -1; }
   3879 
   3880 inline UBool
   3881 UnicodeString::operator<= (const UnicodeString& text) const
   3882 { return doCompare(0, length(), text, 0, text.length()) != 1; }
   3883 
   3884 inline int8_t
   3885 UnicodeString::compare(const UnicodeString& text) const
   3886 { return doCompare(0, length(), text, 0, text.length()); }
   3887 
   3888 inline int8_t
   3889 UnicodeString::compare(int32_t start,
   3890                int32_t _length,
   3891                const UnicodeString& srcText) const
   3892 { return doCompare(start, _length, srcText, 0, srcText.length()); }
   3893 
   3894 inline int8_t
   3895 UnicodeString::compare(const UChar *srcChars,
   3896                int32_t srcLength) const
   3897 { return doCompare(0, length(), srcChars, 0, srcLength); }
   3898 
   3899 inline int8_t
   3900 UnicodeString::compare(int32_t start,
   3901                int32_t _length,
   3902                const UnicodeString& srcText,
   3903                int32_t srcStart,
   3904                int32_t srcLength) const
   3905 { return doCompare(start, _length, srcText, srcStart, srcLength); }
   3906 
   3907 inline int8_t
   3908 UnicodeString::compare(int32_t start,
   3909                int32_t _length,
   3910                const UChar *srcChars) const
   3911 { return doCompare(start, _length, srcChars, 0, _length); }
   3912 
   3913 inline int8_t
   3914 UnicodeString::compare(int32_t start,
   3915                int32_t _length,
   3916                const UChar *srcChars,
   3917                int32_t srcStart,
   3918                int32_t srcLength) const
   3919 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
   3920 
   3921 inline int8_t
   3922 UnicodeString::compareBetween(int32_t start,
   3923                   int32_t limit,
   3924                   const UnicodeString& srcText,
   3925                   int32_t srcStart,
   3926                   int32_t srcLimit) const
   3927 { return doCompare(start, limit - start,
   3928            srcText, srcStart, srcLimit - srcStart); }
   3929 
   3930 inline int8_t
   3931 UnicodeString::doCompareCodePointOrder(int32_t start,
   3932                                        int32_t thisLength,
   3933                                        const UnicodeString& srcText,
   3934                                        int32_t srcStart,
   3935                                        int32_t srcLength) const
   3936 {
   3937   if(srcText.isBogus()) {
   3938     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
   3939   } else {
   3940     srcText.pinIndices(srcStart, srcLength);
   3941     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
   3942   }
   3943 }
   3944 
   3945 inline int8_t
   3946 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
   3947 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
   3948 
   3949 inline int8_t
   3950 UnicodeString::compareCodePointOrder(int32_t start,
   3951                                      int32_t _length,
   3952                                      const UnicodeString& srcText) const
   3953 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
   3954 
   3955 inline int8_t
   3956 UnicodeString::compareCodePointOrder(const UChar *srcChars,
   3957                                      int32_t srcLength) const
   3958 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
   3959 
   3960 inline int8_t
   3961 UnicodeString::compareCodePointOrder(int32_t start,
   3962                                      int32_t _length,
   3963                                      const UnicodeString& srcText,
   3964                                      int32_t srcStart,
   3965                                      int32_t srcLength) const
   3966 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
   3967 
   3968 inline int8_t
   3969 UnicodeString::compareCodePointOrder(int32_t start,
   3970                                      int32_t _length,
   3971                                      const UChar *srcChars) const
   3972 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
   3973 
   3974 inline int8_t
   3975 UnicodeString::compareCodePointOrder(int32_t start,
   3976                                      int32_t _length,
   3977                                      const UChar *srcChars,
   3978                                      int32_t srcStart,
   3979                                      int32_t srcLength) const
   3980 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
   3981 
   3982 inline int8_t
   3983 UnicodeString::compareCodePointOrderBetween(int32_t start,
   3984                                             int32_t limit,
   3985                                             const UnicodeString& srcText,
   3986                                             int32_t srcStart,
   3987                                             int32_t srcLimit) const
   3988 { return doCompareCodePointOrder(start, limit - start,
   3989            srcText, srcStart, srcLimit - srcStart); }
   3990 
   3991 inline int8_t
   3992 UnicodeString::doCaseCompare(int32_t start,
   3993                              int32_t thisLength,
   3994                              const UnicodeString &srcText,
   3995                              int32_t srcStart,
   3996                              int32_t srcLength,
   3997                              uint32_t options) const
   3998 {
   3999   if(srcText.isBogus()) {
   4000     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
   4001   } else {
   4002     srcText.pinIndices(srcStart, srcLength);
   4003     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
   4004   }
   4005 }
   4006 
   4007 inline int8_t
   4008 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
   4009   return doCaseCompare(0, length(), text, 0, text.length(), options);
   4010 }
   4011 
   4012 inline int8_t
   4013 UnicodeString::caseCompare(int32_t start,
   4014                            int32_t _length,
   4015                            const UnicodeString &srcText,
   4016                            uint32_t options) const {
   4017   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
   4018 }
   4019 
   4020 inline int8_t
   4021 UnicodeString::caseCompare(const UChar *srcChars,
   4022                            int32_t srcLength,
   4023                            uint32_t options) const {
   4024   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
   4025 }
   4026 
   4027 inline int8_t
   4028 UnicodeString::caseCompare(int32_t start,
   4029                            int32_t _length,
   4030                            const UnicodeString &srcText,
   4031                            int32_t srcStart,
   4032                            int32_t srcLength,
   4033                            uint32_t options) const {
   4034   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
   4035 }
   4036 
   4037 inline int8_t
   4038 UnicodeString::caseCompare(int32_t start,
   4039                            int32_t _length,
   4040                            const UChar *srcChars,
   4041                            uint32_t options) const {
   4042   return doCaseCompare(start, _length, srcChars, 0, _length, options);
   4043 }
   4044 
   4045 inline int8_t
   4046 UnicodeString::caseCompare(int32_t start,
   4047                            int32_t _length,
   4048                            const UChar *srcChars,
   4049                            int32_t srcStart,
   4050                            int32_t srcLength,
   4051                            uint32_t options) const {
   4052   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
   4053 }
   4054 
   4055 inline int8_t
   4056 UnicodeString::caseCompareBetween(int32_t start,
   4057                                   int32_t limit,
   4058                                   const UnicodeString &srcText,
   4059                                   int32_t srcStart,
   4060                                   int32_t srcLimit,
   4061                                   uint32_t options) const {
   4062   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
   4063 }
   4064 
   4065 inline int32_t
   4066 UnicodeString::indexOf(const UnicodeString& srcText,
   4067                int32_t srcStart,
   4068                int32_t srcLength,
   4069                int32_t start,
   4070                int32_t _length) const
   4071 {
   4072   if(!srcText.isBogus()) {
   4073     srcText.pinIndices(srcStart, srcLength);
   4074     if(srcLength > 0) {
   4075       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
   4076     }
   4077   }
   4078   return -1;
   4079 }
   4080 
   4081 inline int32_t
   4082 UnicodeString::indexOf(const UnicodeString& text) const
   4083 { return indexOf(text, 0, text.length(), 0, length()); }
   4084 
   4085 inline int32_t
   4086 UnicodeString::indexOf(const UnicodeString& text,
   4087                int32_t start) const {
   4088   pinIndex(start);
   4089   return indexOf(text, 0, text.length(), start, length() - start);
   4090 }
   4091 
   4092 inline int32_t
   4093 UnicodeString::indexOf(const UnicodeString& text,
   4094                int32_t start,
   4095                int32_t _length) const
   4096 { return indexOf(text, 0, text.length(), start, _length); }
   4097 
   4098 inline int32_t
   4099 UnicodeString::indexOf(const UChar *srcChars,
   4100                int32_t srcLength,
   4101                int32_t start) const {
   4102   pinIndex(start);
   4103   return indexOf(srcChars, 0, srcLength, start, length() - start);
   4104 }
   4105 
   4106 inline int32_t
   4107 UnicodeString::indexOf(const UChar *srcChars,
   4108                int32_t srcLength,
   4109                int32_t start,
   4110                int32_t _length) const
   4111 { return indexOf(srcChars, 0, srcLength, start, _length); }
   4112 
   4113 inline int32_t
   4114 UnicodeString::indexOf(UChar c,
   4115                int32_t start,
   4116                int32_t _length) const
   4117 { return doIndexOf(c, start, _length); }
   4118 
   4119 inline int32_t
   4120 UnicodeString::indexOf(UChar32 c,
   4121                int32_t start,
   4122                int32_t _length) const
   4123 { return doIndexOf(c, start, _length); }
   4124 
   4125 inline int32_t
   4126 UnicodeString::indexOf(UChar c) const
   4127 { return doIndexOf(c, 0, length()); }
   4128 
   4129 inline int32_t
   4130 UnicodeString::indexOf(UChar32 c) const
   4131 { return indexOf(c, 0, length()); }
   4132 
   4133 inline int32_t
   4134 UnicodeString::indexOf(UChar c,
   4135                int32_t start) const {
   4136   pinIndex(start);
   4137   return doIndexOf(c, start, length() - start);
   4138 }
   4139 
   4140 inline int32_t
   4141 UnicodeString::indexOf(UChar32 c,
   4142                int32_t start) const {
   4143   pinIndex(start);
   4144   return indexOf(c, start, length() - start);
   4145 }
   4146 
   4147 inline int32_t
   4148 UnicodeString::lastIndexOf(const UChar *srcChars,
   4149                int32_t srcLength,
   4150                int32_t start,
   4151                int32_t _length) const
   4152 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
   4153 
   4154 inline int32_t
   4155 UnicodeString::lastIndexOf(const UChar *srcChars,
   4156                int32_t srcLength,
   4157                int32_t start) const {
   4158   pinIndex(start);
   4159   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
   4160 }
   4161 
   4162 inline int32_t
   4163 UnicodeString::lastIndexOf(const UnicodeString& srcText,
   4164                int32_t srcStart,
   4165                int32_t srcLength,
   4166                int32_t start,
   4167                int32_t _length) const
   4168 {
   4169   if(!srcText.isBogus()) {
   4170     srcText.pinIndices(srcStart, srcLength);
   4171     if(srcLength > 0) {
   4172       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
   4173     }
   4174   }
   4175   return -1;
   4176 }
   4177 
   4178 inline int32_t
   4179 UnicodeString::lastIndexOf(const UnicodeString& text,
   4180                int32_t start,
   4181                int32_t _length) const
   4182 { return lastIndexOf(text, 0, text.length(), start, _length); }
   4183 
   4184 inline int32_t
   4185 UnicodeString::lastIndexOf(const UnicodeString& text,
   4186                int32_t start) const {
   4187   pinIndex(start);
   4188   return lastIndexOf(text, 0, text.length(), start, length() - start);
   4189 }
   4190 
   4191 inline int32_t
   4192 UnicodeString::lastIndexOf(const UnicodeString& text) const
   4193 { return lastIndexOf(text, 0, text.length(), 0, length()); }
   4194 
   4195 inline int32_t
   4196 UnicodeString::lastIndexOf(UChar c,
   4197                int32_t start,
   4198                int32_t _length) const
   4199 { return doLastIndexOf(c, start, _length); }
   4200 
   4201 inline int32_t
   4202 UnicodeString::lastIndexOf(UChar32 c,
   4203                int32_t start,
   4204                int32_t _length) const {
   4205   return doLastIndexOf(c, start, _length);
   4206 }
   4207 
   4208 inline int32_t
   4209 UnicodeString::lastIndexOf(UChar c) const
   4210 { return doLastIndexOf(c, 0, length()); }
   4211 
   4212 inline int32_t
   4213 UnicodeString::lastIndexOf(UChar32 c) const {
   4214   return lastIndexOf(c, 0, length());
   4215 }
   4216 
   4217 inline int32_t
   4218 UnicodeString::lastIndexOf(UChar c,
   4219                int32_t start) const {
   4220   pinIndex(start);
   4221   return doLastIndexOf(c, start, length() - start);
   4222 }
   4223 
   4224 inline int32_t
   4225 UnicodeString::lastIndexOf(UChar32 c,
   4226                int32_t start) const {
   4227   pinIndex(start);
   4228   return lastIndexOf(c, start, length() - start);
   4229 }
   4230 
   4231 inline UBool
   4232 UnicodeString::startsWith(const UnicodeString& text) const
   4233 { return compare(0, text.length(), text, 0, text.length()) == 0; }
   4234 
   4235 inline UBool
   4236 UnicodeString::startsWith(const UnicodeString& srcText,
   4237               int32_t srcStart,
   4238               int32_t srcLength) const
   4239 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
   4240 
   4241 inline UBool
   4242 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
   4243   if(srcLength < 0) {
   4244     srcLength = u_strlen(srcChars);
   4245   }
   4246   return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
   4247 }
   4248 
   4249 inline UBool
   4250 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
   4251   if(srcLength < 0) {
   4252     srcLength = u_strlen(srcChars);
   4253   }
   4254   return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
   4255 }
   4256 
   4257 inline UBool
   4258 UnicodeString::endsWith(const UnicodeString& text) const
   4259 { return doCompare(length() - text.length(), text.length(),
   4260            text, 0, text.length()) == 0; }
   4261 
   4262 inline UBool
   4263 UnicodeString::endsWith(const UnicodeString& srcText,
   4264             int32_t srcStart,
   4265             int32_t srcLength) const {
   4266   srcText.pinIndices(srcStart, srcLength);
   4267   return doCompare(length() - srcLength, srcLength,
   4268                    srcText, srcStart, srcLength) == 0;
   4269 }
   4270 
   4271 inline UBool
   4272 UnicodeString::endsWith(const UChar *srcChars,
   4273             int32_t srcLength) const {
   4274   if(srcLength < 0) {
   4275     srcLength = u_strlen(srcChars);
   4276   }
   4277   return doCompare(length() - srcLength, srcLength,
   4278                    srcChars, 0, srcLength) == 0;
   4279 }
   4280 
   4281 inline UBool
   4282 UnicodeString::endsWith(const UChar *srcChars,
   4283             int32_t srcStart,
   4284             int32_t srcLength) const {
   4285   if(srcLength < 0) {
   4286     srcLength = u_strlen(srcChars + srcStart);
   4287   }
   4288   return doCompare(length() - srcLength, srcLength,
   4289                    srcChars, srcStart, srcLength) == 0;
   4290 }
   4291 
   4292 //========================================
   4293 // replace
   4294 //========================================
   4295 inline UnicodeString&
   4296 UnicodeString::replace(int32_t start,
   4297                int32_t _length,
   4298                const UnicodeString& srcText)
   4299 { return doReplace(start, _length, srcText, 0, srcText.length()); }
   4300 
   4301 inline UnicodeString&
   4302 UnicodeString::replace(int32_t start,
   4303                int32_t _length,
   4304                const UnicodeString& srcText,
   4305                int32_t srcStart,
   4306                int32_t srcLength)
   4307 { return doReplace(start, _length, srcText, srcStart, srcLength); }
   4308 
   4309 inline UnicodeString&
   4310 UnicodeString::replace(int32_t start,
   4311                int32_t _length,
   4312                const UChar *srcChars,
   4313                int32_t srcLength)
   4314 { return doReplace(start, _length, srcChars, 0, srcLength); }
   4315 
   4316 inline UnicodeString&
   4317 UnicodeString::replace(int32_t start,
   4318                int32_t _length,
   4319                const UChar *srcChars,
   4320                int32_t srcStart,
   4321                int32_t srcLength)
   4322 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
   4323 
   4324 inline UnicodeString&
   4325 UnicodeString::replace(int32_t start,
   4326                int32_t _length,
   4327                UChar srcChar)
   4328 { return doReplace(start, _length, &srcChar, 0, 1); }
   4329 
   4330 inline UnicodeString&
   4331 UnicodeString::replaceBetween(int32_t start,
   4332                   int32_t limit,
   4333                   const UnicodeString& srcText)
   4334 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
   4335 
   4336 inline UnicodeString&
   4337 UnicodeString::replaceBetween(int32_t start,
   4338                   int32_t limit,
   4339                   const UnicodeString& srcText,
   4340                   int32_t srcStart,
   4341                   int32_t srcLimit)
   4342 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
   4343 
   4344 inline UnicodeString&
   4345 UnicodeString::findAndReplace(const UnicodeString& oldText,
   4346                   const UnicodeString& newText)
   4347 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
   4348             newText, 0, newText.length()); }
   4349 
   4350 inline UnicodeString&
   4351 UnicodeString::findAndReplace(int32_t start,
   4352                   int32_t _length,
   4353                   const UnicodeString& oldText,
   4354                   const UnicodeString& newText)
   4355 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
   4356             newText, 0, newText.length()); }
   4357 
   4358 // ============================
   4359 // extract
   4360 // ============================
   4361 inline void
   4362 UnicodeString::doExtract(int32_t start,
   4363              int32_t _length,
   4364              UnicodeString& target) const
   4365 { target.replace(0, target.length(), *this, start, _length); }
   4366 
   4367 inline void
   4368 UnicodeString::extract(int32_t start,
   4369                int32_t _length,
   4370                UChar *target,
   4371                int32_t targetStart) const
   4372 { doExtract(start, _length, target, targetStart); }
   4373 
   4374 inline void
   4375 UnicodeString::extract(int32_t start,
   4376                int32_t _length,
   4377                UnicodeString& target) const
   4378 { doExtract(start, _length, target); }
   4379 
   4380 #if !UCONFIG_NO_CONVERSION
   4381 
   4382 inline int32_t
   4383 UnicodeString::extract(int32_t start,
   4384                int32_t _length,
   4385                char *dst,
   4386                const char *codepage) const
   4387 
   4388 {
   4389   // This dstSize value will be checked explicitly
   4390   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
   4391 }
   4392 
   4393 #endif
   4394 
   4395 inline void
   4396 UnicodeString::extractBetween(int32_t start,
   4397                   int32_t limit,
   4398                   UChar *dst,
   4399                   int32_t dstStart) const {
   4400   pinIndex(start);
   4401   pinIndex(limit);
   4402   doExtract(start, limit - start, dst, dstStart);
   4403 }
   4404 
   4405 inline UnicodeString
   4406 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
   4407     return tempSubString(start, limit - start);
   4408 }
   4409 
   4410 inline UChar
   4411 UnicodeString::doCharAt(int32_t offset) const
   4412 {
   4413   if((uint32_t)offset < (uint32_t)length()) {
   4414     return getArrayStart()[offset];
   4415   } else {
   4416     return kInvalidUChar;
   4417   }
   4418 }
   4419 
   4420 inline UChar
   4421 UnicodeString::charAt(int32_t offset) const
   4422 { return doCharAt(offset); }
   4423 
   4424 inline UChar
   4425 UnicodeString::operator[] (int32_t offset) const
   4426 { return doCharAt(offset); }
   4427 
   4428 inline UBool
   4429 UnicodeString::isEmpty() const {
   4430   // Arithmetic or logical right shift does not matter: only testing for 0.
   4431   return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
   4432 }
   4433 
   4434 //========================================
   4435 // Write implementation methods
   4436 //========================================
   4437 inline void
   4438 UnicodeString::setZeroLength() {
   4439   fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
   4440 }
   4441 
   4442 inline void
   4443 UnicodeString::setShortLength(int32_t len) {
   4444   // requires 0 <= len <= kMaxShortLength
   4445   fUnion.fFields.fLengthAndFlags =
   4446     (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
   4447 }
   4448 
   4449 inline void
   4450 UnicodeString::setLength(int32_t len) {
   4451   if(len <= kMaxShortLength) {
   4452     setShortLength(len);
   4453   } else {
   4454     fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
   4455     fUnion.fFields.fLength = len;
   4456   }
   4457 }
   4458 
   4459 inline void
   4460 UnicodeString::setToEmpty() {
   4461   fUnion.fFields.fLengthAndFlags = kShortString;
   4462 }
   4463 
   4464 inline void
   4465 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
   4466   setLength(len);
   4467   fUnion.fFields.fArray = array;
   4468   fUnion.fFields.fCapacity = capacity;
   4469 }
   4470 
   4471 inline UnicodeString&
   4472 UnicodeString::operator= (UChar ch)
   4473 { return doReplace(0, length(), &ch, 0, 1); }
   4474 
   4475 inline UnicodeString&
   4476 UnicodeString::operator= (UChar32 ch)
   4477 { return replace(0, length(), ch); }
   4478 
   4479 inline UnicodeString&
   4480 UnicodeString::setTo(const UnicodeString& srcText,
   4481              int32_t srcStart,
   4482              int32_t srcLength)
   4483 {
   4484   unBogus();
   4485   return doReplace(0, length(), srcText, srcStart, srcLength);
   4486 }
   4487 
   4488 inline UnicodeString&
   4489 UnicodeString::setTo(const UnicodeString& srcText,
   4490              int32_t srcStart)
   4491 {
   4492   unBogus();
   4493   srcText.pinIndex(srcStart);
   4494   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
   4495 }
   4496 
   4497 inline UnicodeString&
   4498 UnicodeString::setTo(const UnicodeString& srcText)
   4499 {
   4500   return copyFrom(srcText);
   4501 }
   4502 
   4503 inline UnicodeString&
   4504 UnicodeString::setTo(const UChar *srcChars,
   4505              int32_t srcLength)
   4506 {
   4507   unBogus();
   4508   return doReplace(0, length(), srcChars, 0, srcLength);
   4509 }
   4510 
   4511 inline UnicodeString&
   4512 UnicodeString::setTo(UChar srcChar)
   4513 {
   4514   unBogus();
   4515   return doReplace(0, length(), &srcChar, 0, 1);
   4516 }
   4517 
   4518 inline UnicodeString&
   4519 UnicodeString::setTo(UChar32 srcChar)
   4520 {
   4521   unBogus();
   4522   return replace(0, length(), srcChar);
   4523 }
   4524 
   4525 inline UnicodeString&
   4526 UnicodeString::append(const UnicodeString& srcText,
   4527               int32_t srcStart,
   4528               int32_t srcLength)
   4529 { return doAppend(srcText, srcStart, srcLength); }
   4530 
   4531 inline UnicodeString&
   4532 UnicodeString::append(const UnicodeString& srcText)
   4533 { return doAppend(srcText, 0, srcText.length()); }
   4534 
   4535 inline UnicodeString&
   4536 UnicodeString::append(const UChar *srcChars,
   4537               int32_t srcStart,
   4538               int32_t srcLength)
   4539 { return doAppend(srcChars, srcStart, srcLength); }
   4540 
   4541 inline UnicodeString&
   4542 UnicodeString::append(const UChar *srcChars,
   4543               int32_t srcLength)
   4544 { return doAppend(srcChars, 0, srcLength); }
   4545 
   4546 inline UnicodeString&
   4547 UnicodeString::append(UChar srcChar)
   4548 { return doAppend(&srcChar, 0, 1); }
   4549 
   4550 inline UnicodeString&
   4551 UnicodeString::operator+= (UChar ch)
   4552 { return doAppend(&ch, 0, 1); }
   4553 
   4554 inline UnicodeString&
   4555 UnicodeString::operator+= (UChar32 ch) {
   4556   return append(ch);
   4557 }
   4558 
   4559 inline UnicodeString&
   4560 UnicodeString::operator+= (const UnicodeString& srcText)
   4561 { return doAppend(srcText, 0, srcText.length()); }
   4562 
   4563 inline UnicodeString&
   4564 UnicodeString::insert(int32_t start,
   4565               const UnicodeString& srcText,
   4566               int32_t srcStart,
   4567               int32_t srcLength)
   4568 { return doReplace(start, 0, srcText, srcStart, srcLength); }
   4569 
   4570 inline UnicodeString&
   4571 UnicodeString::insert(int32_t start,
   4572               const UnicodeString& srcText)
   4573 { return doReplace(start, 0, srcText, 0, srcText.length()); }
   4574 
   4575 inline UnicodeString&
   4576 UnicodeString::insert(int32_t start,
   4577               const UChar *srcChars,
   4578               int32_t srcStart,
   4579               int32_t srcLength)
   4580 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
   4581 
   4582 inline UnicodeString&
   4583 UnicodeString::insert(int32_t start,
   4584               const UChar *srcChars,
   4585               int32_t srcLength)
   4586 { return doReplace(start, 0, srcChars, 0, srcLength); }
   4587 
   4588 inline UnicodeString&
   4589 UnicodeString::insert(int32_t start,
   4590               UChar srcChar)
   4591 { return doReplace(start, 0, &srcChar, 0, 1); }
   4592 
   4593 inline UnicodeString&
   4594 UnicodeString::insert(int32_t start,
   4595               UChar32 srcChar)
   4596 { return replace(start, 0, srcChar); }
   4597 
   4598 
   4599 inline UnicodeString&
   4600 UnicodeString::remove()
   4601 {
   4602   // remove() of a bogus string makes the string empty and non-bogus
   4603   if(isBogus()) {
   4604     setToEmpty();
   4605   } else {
   4606     setZeroLength();
   4607   }
   4608   return *this;
   4609 }
   4610 
   4611 inline UnicodeString&
   4612 UnicodeString::remove(int32_t start,
   4613              int32_t _length)
   4614 {
   4615     if(start <= 0 && _length == INT32_MAX) {
   4616         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
   4617         return remove();
   4618     }
   4619     return doReplace(start, _length, NULL, 0, 0);
   4620 }
   4621 
   4622 inline UnicodeString&
   4623 UnicodeString::removeBetween(int32_t start,
   4624                 int32_t limit)
   4625 { return doReplace(start, limit - start, NULL, 0, 0); }
   4626 
   4627 inline UnicodeString &
   4628 UnicodeString::retainBetween(int32_t start, int32_t limit) {
   4629   truncate(limit);
   4630   return doReplace(0, start, NULL, 0, 0);
   4631 }
   4632 
   4633 inline UBool
   4634 UnicodeString::truncate(int32_t targetLength)
   4635 {
   4636   if(isBogus() && targetLength == 0) {
   4637     // truncate(0) of a bogus string makes the string empty and non-bogus
   4638     unBogus();
   4639     return FALSE;
   4640   } else if((uint32_t)targetLength < (uint32_t)length()) {
   4641     setLength(targetLength);
   4642     return TRUE;
   4643   } else {
   4644     return FALSE;
   4645   }
   4646 }
   4647 
   4648 inline UnicodeString&
   4649 UnicodeString::reverse()
   4650 { return doReverse(0, length()); }
   4651 
   4652 inline UnicodeString&
   4653 UnicodeString::reverse(int32_t start,
   4654                int32_t _length)
   4655 { return doReverse(start, _length); }
   4656 
   4657 U_NAMESPACE_END
   4658 
   4659 #endif
   4660