Home | History | Annotate | Download | only in unicode
      1 /*
      2 **********************************************************************
      3 *   Copyright (C) 1998-2011, International Business Machines
      4 *   Corporation and others.  All Rights Reserved.
      5 **********************************************************************
      6 *
      7 * File unistr.h
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   09/25/98    stephen     Creation.
     13 *   11/11/98    stephen     Changed per 11/9 code review.
     14 *   04/20/99    stephen     Overhauled per 4/16 code review.
     15 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
     16 *                           handleReplaceBetween(); other methods unchanged.
     17 *   06/25/01    grhoten     Remove dependency on iostream.
     18 ******************************************************************************
     19 */
     20 
     21 #ifndef UNISTR_H
     22 #define UNISTR_H
     23 
     24 /**
     25  * \file
     26  * \brief C++ API: Unicode String
     27  */
     28 
     29 #include "unicode/utypes.h"
     30 #include "unicode/rep.h"
     31 #include "unicode/std_string.h"
     32 #include "unicode/stringpiece.h"
     33 #include "unicode/bytestream.h"
     34 
     35 struct UConverter;          // unicode/ucnv.h
     36 class  StringThreadTest;
     37 
     38 #ifndef U_COMPARE_CODE_POINT_ORDER
     39 /* see also ustring.h and unorm.h */
     40 /**
     41  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
     42  * Compare strings in code point order instead of code unit order.
     43  * @stable ICU 2.2
     44  */
     45 #define U_COMPARE_CODE_POINT_ORDER  0x8000
     46 #endif
     47 
     48 #ifndef USTRING_H
     49 /**
     50  * \ingroup ustring_ustrlen
     51  */
     52 U_STABLE int32_t U_EXPORT2
     53 u_strlen(const UChar *s);
     54 #endif
     55 
     56 U_NAMESPACE_BEGIN
     57 
     58 class BreakIterator;        // unicode/brkiter.h
     59 class Locale;               // unicode/locid.h
     60 class StringCharacterIterator;
     61 class UnicodeStringAppendable;  // unicode/appendable.h
     62 
     63 /* The <iostream> include has been moved to unicode/ustream.h */
     64 
     65 /**
     66  * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
     67  * which constructs a Unicode string from an invariant-character char * string.
     68  * About invariant characters see utypes.h.
     69  * This constructor has no runtime dependency on conversion code and is
     70  * therefore recommended over ones taking a charset name string
     71  * (where the empty string "" indicates invariant-character conversion).
     72  *
     73  * @stable ICU 3.2
     74  */
     75 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
     76 
     77 /**
     78  * Unicode String literals in C++.
     79  * Dependent on the platform properties, different UnicodeString
     80  * constructors should be used to create a UnicodeString object from
     81  * a string literal.
     82  * The macros are defined for maximum performance.
     83  * They work only for strings that contain "invariant characters", i.e.,
     84  * only latin letters, digits, and some punctuation.
     85  * See utypes.h for details.
     86  *
     87  * The string parameter must be a C string literal.
     88  * The length of the string, not including the terminating
     89  * <code>NUL</code>, must be specified as a constant.
     90  * The U_STRING_DECL macro should be invoked exactly once for one
     91  * such string variable before it is used.
     92  * @stable ICU 2.0
     93  */
     94 #if defined(U_DECLARE_UTF16)
     95 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
     96 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
     97 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
     98 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
     99 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
    100 #else
    101 #   define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
    102 #endif
    103 
    104 /**
    105  * Unicode String literals in C++.
    106  * Dependent on the platform properties, different UnicodeString
    107  * constructors should be used to create a UnicodeString object from
    108  * a string literal.
    109  * The macros are defined for improved performance.
    110  * They work only for strings that contain "invariant characters", i.e.,
    111  * only latin letters, digits, and some punctuation.
    112  * See utypes.h for details.
    113  *
    114  * The string parameter must be a C string literal.
    115  * @stable ICU 2.0
    116  */
    117 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
    118 
    119 /**
    120  * UnicodeString is a string class that stores Unicode characters directly and provides
    121  * similar functionality as the Java String and StringBuffer classes.
    122  * It is a concrete implementation of the abstract class Replaceable (for transliteration).
    123  *
    124  * The UnicodeString class is not suitable for subclassing.
    125  *
    126  * <p>For an overview of Unicode strings in C and C++ see the
    127  * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
    128  *
    129  * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
    130  * A Unicode character may be stored with either one code unit
    131  * (the most common case) or with a matched pair of special code units
    132  * ("surrogates"). The data type for code units is UChar.
    133  * For single-character handling, a Unicode character code <em>point</em> is a value
    134  * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
    135  *
    136  * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
    137  * This is the same as with multi-byte char* strings in traditional string handling.
    138  * Operations on partial strings typically do not test for code point boundaries.
    139  * If necessary, the user needs to take care of such boundaries by testing for the code unit
    140  * values or by using functions like
    141  * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
    142  * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
    143  *
    144  * UnicodeString methods are more lenient with regard to input parameter values
    145  * than other ICU APIs. In particular:
    146  * - If indexes are out of bounds for a UnicodeString object
    147  *   (<0 or >length()) then they are "pinned" to the nearest boundary.
    148  * - If primitive string pointer values (e.g., const UChar * or char *)
    149  *   for input strings are NULL, then those input string parameters are treated
    150  *   as if they pointed to an empty string.
    151  *   However, this is <em>not</em> the case for char * parameters for charset names
    152  *   or other IDs.
    153  * - Most UnicodeString methods do not take a UErrorCode parameter because
    154  *   there are usually very few opportunities for failure other than a shortage
    155  *   of memory, error codes in low-level C++ string methods would be inconvenient,
    156  *   and the error code as the last parameter (ICU convention) would prevent
    157  *   the use of default parameter values.
    158  *   Instead, such methods set the UnicodeString into a "bogus" state
    159  *   (see isBogus()) if an error occurs.
    160  *
    161  * In string comparisons, two UnicodeString objects that are both "bogus"
    162  * compare equal (to be transitive and prevent endless loops in sorting),
    163  * and a "bogus" string compares less than any non-"bogus" one.
    164  *
    165  * Const UnicodeString methods are thread-safe. Multiple threads can use
    166  * const methods on the same UnicodeString object simultaneously,
    167  * but non-const methods must not be called concurrently (in multiple threads)
    168  * with any other (const or non-const) methods.
    169  *
    170  * Similarly, const UnicodeString & parameters are thread-safe.
    171  * One object may be passed in as such a parameter concurrently in multiple threads.
    172  * This includes the const UnicodeString & parameters for
    173  * copy construction, assignment, and cloning.
    174  *
    175  * <p>UnicodeString uses several storage methods.
    176  * String contents can be stored inside the UnicodeString object itself,
    177  * in an allocated and shared buffer, or in an outside buffer that is "aliased".
    178  * Most of this is done transparently, but careful aliasing in particular provides
    179  * significant performance improvements.
    180  * Also, the internal buffer is accessible via special functions.
    181  * For details see the
    182  * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
    183  *
    184  * @see utf.h
    185  * @see CharacterIterator
    186  * @stable ICU 2.0
    187  */
    188 class U_COMMON_API UnicodeString : public Replaceable
    189 {
    190 public:
    191 
    192   /**
    193    * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
    194    * which constructs a Unicode string from an invariant-character char * string.
    195    * Use the macro US_INV instead of the full qualification for this value.
    196    *
    197    * @see US_INV
    198    * @stable ICU 3.2
    199    */
    200   enum EInvariant {
    201     /**
    202      * @see EInvariant
    203      * @stable ICU 3.2
    204      */
    205     kInvariant
    206   };
    207 
    208   //========================================
    209   // Read-only operations
    210   //========================================
    211 
    212   /* Comparison - bitwise only - for international comparison use collation */
    213 
    214   /**
    215    * Equality operator. Performs only bitwise comparison.
    216    * @param text The UnicodeString to compare to this one.
    217    * @return TRUE if <TT>text</TT> contains the same characters as this one,
    218    * FALSE otherwise.
    219    * @stable ICU 2.0
    220    */
    221   inline UBool operator== (const UnicodeString& text) const;
    222 
    223   /**
    224    * Inequality operator. Performs only bitwise comparison.
    225    * @param text The UnicodeString to compare to this one.
    226    * @return FALSE if <TT>text</TT> contains the same characters as this one,
    227    * TRUE otherwise.
    228    * @stable ICU 2.0
    229    */
    230   inline UBool operator!= (const UnicodeString& text) const;
    231 
    232   /**
    233    * Greater than operator. Performs only bitwise comparison.
    234    * @param text The UnicodeString to compare to this one.
    235    * @return TRUE if the characters in this are bitwise
    236    * greater than the characters in <code>text</code>, FALSE otherwise
    237    * @stable ICU 2.0
    238    */
    239   inline UBool operator> (const UnicodeString& text) const;
    240 
    241   /**
    242    * Less than operator. Performs only bitwise comparison.
    243    * @param text The UnicodeString to compare to this one.
    244    * @return TRUE if the characters in this are bitwise
    245    * less than the characters in <code>text</code>, FALSE otherwise
    246    * @stable ICU 2.0
    247    */
    248   inline UBool operator< (const UnicodeString& text) const;
    249 
    250   /**
    251    * Greater than or equal operator. Performs only bitwise comparison.
    252    * @param text The UnicodeString to compare to this one.
    253    * @return TRUE if the characters in this are bitwise
    254    * greater than or equal to the characters in <code>text</code>, FALSE otherwise
    255    * @stable ICU 2.0
    256    */
    257   inline UBool operator>= (const UnicodeString& text) const;
    258 
    259   /**
    260    * Less than or equal operator. Performs only bitwise comparison.
    261    * @param text The UnicodeString to compare to this one.
    262    * @return TRUE if the characters in this are bitwise
    263    * less than or equal to the characters in <code>text</code>, FALSE otherwise
    264    * @stable ICU 2.0
    265    */
    266   inline UBool operator<= (const UnicodeString& text) const;
    267 
    268   /**
    269    * Compare the characters bitwise in this UnicodeString to
    270    * the characters in <code>text</code>.
    271    * @param text The UnicodeString to compare to this one.
    272    * @return The result of bitwise character comparison: 0 if this
    273    * contains the same characters as <code>text</code>, -1 if the characters in
    274    * this are bitwise less than the characters in <code>text</code>, +1 if the
    275    * characters in this are bitwise greater than the characters
    276    * in <code>text</code>.
    277    * @stable ICU 2.0
    278    */
    279   inline int8_t compare(const UnicodeString& text) const;
    280 
    281   /**
    282    * Compare the characters bitwise in the range
    283    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
    284    * in <TT>text</TT>
    285    * @param start the offset at which the compare operation begins
    286    * @param length the number of characters of text to compare.
    287    * @param text the other text to be compared against this string.
    288    * @return The result of bitwise character comparison: 0 if this
    289    * contains the same characters as <code>text</code>, -1 if the characters in
    290    * this are bitwise less than the characters in <code>text</code>, +1 if the
    291    * characters in this are bitwise greater than the characters
    292    * in <code>text</code>.
    293    * @stable ICU 2.0
    294    */
    295   inline int8_t compare(int32_t start,
    296          int32_t length,
    297          const UnicodeString& text) const;
    298 
    299   /**
    300    * Compare the characters bitwise in the range
    301    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
    302    * in <TT>srcText</TT> in the range
    303    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
    304    * @param start the offset at which the compare operation begins
    305    * @param length the number of characters in this to compare.
    306    * @param srcText the text to be compared
    307    * @param srcStart the offset into <TT>srcText</TT> to start comparison
    308    * @param srcLength the number of characters in <TT>src</TT> to compare
    309    * @return The result of bitwise character comparison: 0 if this
    310    * contains the same characters as <code>srcText</code>, -1 if the characters in
    311    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
    312    * characters in this are bitwise greater than the characters
    313    * in <code>srcText</code>.
    314    * @stable ICU 2.0
    315    */
    316    inline int8_t compare(int32_t start,
    317          int32_t length,
    318          const UnicodeString& srcText,
    319          int32_t srcStart,
    320          int32_t srcLength) const;
    321 
    322   /**
    323    * Compare the characters bitwise in this UnicodeString with the first
    324    * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
    325    * @param srcChars The characters to compare to this UnicodeString.
    326    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
    327    * @return The result of bitwise character comparison: 0 if this
    328    * contains the same characters as <code>srcChars</code>, -1 if the characters in
    329    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
    330    * characters in this are bitwise greater than the characters
    331    * in <code>srcChars</code>.
    332    * @stable ICU 2.0
    333    */
    334   inline int8_t compare(const UChar *srcChars,
    335          int32_t srcLength) const;
    336 
    337   /**
    338    * Compare the characters bitwise in the range
    339    * [<TT>start</TT>, <TT>start + length</TT>) with the first
    340    * <TT>length</TT> characters in <TT>srcChars</TT>
    341    * @param start the offset at which the compare operation begins
    342    * @param length the number of characters to compare.
    343    * @param srcChars the characters to be compared
    344    * @return The result of bitwise character comparison: 0 if this
    345    * contains the same characters as <code>srcChars</code>, -1 if the characters in
    346    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
    347    * characters in this are bitwise greater than the characters
    348    * in <code>srcChars</code>.
    349    * @stable ICU 2.0
    350    */
    351   inline int8_t compare(int32_t start,
    352          int32_t length,
    353          const UChar *srcChars) const;
    354 
    355   /**
    356    * Compare the characters bitwise in the range
    357    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
    358    * in <TT>srcChars</TT> in the range
    359    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
    360    * @param start the offset at which the compare operation begins
    361    * @param length the number of characters in this to compare
    362    * @param srcChars the characters to be compared
    363    * @param srcStart the offset into <TT>srcChars</TT> to start comparison
    364    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
    365    * @return The result of bitwise character comparison: 0 if this
    366    * contains the same characters as <code>srcChars</code>, -1 if the characters in
    367    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
    368    * characters in this are bitwise greater than the characters
    369    * in <code>srcChars</code>.
    370    * @stable ICU 2.0
    371    */
    372   inline int8_t compare(int32_t start,
    373          int32_t length,
    374          const UChar *srcChars,
    375          int32_t srcStart,
    376          int32_t srcLength) const;
    377 
    378   /**
    379    * Compare the characters bitwise in the range
    380    * [<TT>start</TT>, <TT>limit</TT>) with the characters
    381    * in <TT>srcText</TT> in the range
    382    * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
    383    * @param start the offset at which the compare operation begins
    384    * @param limit the offset immediately following the compare operation
    385    * @param srcText the text to be compared
    386    * @param srcStart the offset into <TT>srcText</TT> to start comparison
    387    * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
    388    * @return The result of bitwise character comparison: 0 if this
    389    * contains the same characters as <code>srcText</code>, -1 if the characters in
    390    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
    391    * characters in this are bitwise greater than the characters
    392    * in <code>srcText</code>.
    393    * @stable ICU 2.0
    394    */
    395   inline int8_t compareBetween(int32_t start,
    396             int32_t limit,
    397             const UnicodeString& srcText,
    398             int32_t srcStart,
    399             int32_t srcLimit) const;
    400 
    401   /**
    402    * Compare two Unicode strings in code point order.
    403    * The result may be different from the results of compare(), operator<, etc.
    404    * if supplementary characters are present:
    405    *
    406    * In UTF-16, supplementary characters (with code points U+10000 and above) are
    407    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    408    * which means that they compare as less than some other BMP characters like U+feff.
    409    * This function compares Unicode strings in code point order.
    410    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    411    *
    412    * @param text Another string to compare this one to.
    413    * @return a negative/zero/positive integer corresponding to whether
    414    * this string is less than/equal to/greater than the second one
    415    * in code point order
    416    * @stable ICU 2.0
    417    */
    418   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
    419 
    420   /**
    421    * Compare two Unicode strings in code point order.
    422    * The result may be different from the results of compare(), operator<, etc.
    423    * if supplementary characters are present:
    424    *
    425    * In UTF-16, supplementary characters (with code points U+10000 and above) are
    426    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    427    * which means that they compare as less than some other BMP characters like U+feff.
    428    * This function compares Unicode strings in code point order.
    429    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    430    *
    431    * @param start The start offset in this string at which the compare operation begins.
    432    * @param length The number of code units from this string to compare.
    433    * @param srcText Another string to compare this one to.
    434    * @return a negative/zero/positive integer corresponding to whether
    435    * this string is less than/equal to/greater than the second one
    436    * in code point order
    437    * @stable ICU 2.0
    438    */
    439   inline int8_t compareCodePointOrder(int32_t start,
    440                                       int32_t length,
    441                                       const UnicodeString& srcText) const;
    442 
    443   /**
    444    * Compare two Unicode strings in code point order.
    445    * The result may be different from the results of compare(), operator<, etc.
    446    * if supplementary characters are present:
    447    *
    448    * In UTF-16, supplementary characters (with code points U+10000 and above) are
    449    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    450    * which means that they compare as less than some other BMP characters like U+feff.
    451    * This function compares Unicode strings in code point order.
    452    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    453    *
    454    * @param start The start offset in this string at which the compare operation begins.
    455    * @param length The number of code units from this string to compare.
    456    * @param srcText Another string to compare this one to.
    457    * @param srcStart The start offset in that string at which the compare operation begins.
    458    * @param srcLength The number of code units from that string to compare.
    459    * @return a negative/zero/positive integer corresponding to whether
    460    * this string is less than/equal to/greater than the second one
    461    * in code point order
    462    * @stable ICU 2.0
    463    */
    464    inline int8_t compareCodePointOrder(int32_t start,
    465                                        int32_t length,
    466                                        const UnicodeString& srcText,
    467                                        int32_t srcStart,
    468                                        int32_t srcLength) const;
    469 
    470   /**
    471    * Compare two Unicode strings in code point order.
    472    * The result may be different from the results of compare(), operator<, etc.
    473    * if supplementary characters are present:
    474    *
    475    * In UTF-16, supplementary characters (with code points U+10000 and above) are
    476    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    477    * which means that they compare as less than some other BMP characters like U+feff.
    478    * This function compares Unicode strings in code point order.
    479    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    480    *
    481    * @param srcChars A pointer to another string to compare this one to.
    482    * @param srcLength The number of code units from that string to compare.
    483    * @return a negative/zero/positive integer corresponding to whether
    484    * this string is less than/equal to/greater than the second one
    485    * in code point order
    486    * @stable ICU 2.0
    487    */
    488   inline int8_t compareCodePointOrder(const UChar *srcChars,
    489                                       int32_t srcLength) const;
    490 
    491   /**
    492    * Compare two Unicode strings in code point order.
    493    * The result may be different from the results of compare(), operator<, etc.
    494    * if supplementary characters are present:
    495    *
    496    * In UTF-16, supplementary characters (with code points U+10000 and above) are
    497    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    498    * which means that they compare as less than some other BMP characters like U+feff.
    499    * This function compares Unicode strings in code point order.
    500    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    501    *
    502    * @param start The start offset in this string at which the compare operation begins.
    503    * @param length The number of code units from this string to compare.
    504    * @param srcChars A pointer to another string to compare this one to.
    505    * @return a negative/zero/positive integer corresponding to whether
    506    * this string is less than/equal to/greater than the second one
    507    * in code point order
    508    * @stable ICU 2.0
    509    */
    510   inline int8_t compareCodePointOrder(int32_t start,
    511                                       int32_t length,
    512                                       const UChar *srcChars) const;
    513 
    514   /**
    515    * Compare two Unicode strings in code point order.
    516    * The result may be different from the results of compare(), operator<, etc.
    517    * if supplementary characters are present:
    518    *
    519    * In UTF-16, supplementary characters (with code points U+10000 and above) are
    520    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    521    * which means that they compare as less than some other BMP characters like U+feff.
    522    * This function compares Unicode strings in code point order.
    523    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    524    *
    525    * @param start The start offset in this string at which the compare operation begins.
    526    * @param length The number of code units from this string to compare.
    527    * @param srcChars A pointer to another string to compare this one to.
    528    * @param srcStart The start offset in that string at which the compare operation begins.
    529    * @param srcLength The number of code units from that string to compare.
    530    * @return a negative/zero/positive integer corresponding to whether
    531    * this string is less than/equal to/greater than the second one
    532    * in code point order
    533    * @stable ICU 2.0
    534    */
    535   inline int8_t compareCodePointOrder(int32_t start,
    536                                       int32_t length,
    537                                       const UChar *srcChars,
    538                                       int32_t srcStart,
    539                                       int32_t srcLength) const;
    540 
    541   /**
    542    * Compare two Unicode strings in code point order.
    543    * The result may be different from the results of compare(), operator<, etc.
    544    * if supplementary characters are present:
    545    *
    546    * In UTF-16, supplementary characters (with code points U+10000 and above) are
    547    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
    548    * which means that they compare as less than some other BMP characters like U+feff.
    549    * This function compares Unicode strings in code point order.
    550    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
    551    *
    552    * @param start The start offset in this string at which the compare operation begins.
    553    * @param limit The offset after the last code unit from this string to compare.
    554    * @param srcText Another string to compare this one to.
    555    * @param srcStart The start offset in that string at which the compare operation begins.
    556    * @param srcLimit The offset after the last code unit from that string to compare.
    557    * @return a negative/zero/positive integer corresponding to whether
    558    * this string is less than/equal to/greater than the second one
    559    * in code point order
    560    * @stable ICU 2.0
    561    */
    562   inline int8_t compareCodePointOrderBetween(int32_t start,
    563                                              int32_t limit,
    564                                              const UnicodeString& srcText,
    565                                              int32_t srcStart,
    566                                              int32_t srcLimit) const;
    567 
    568   /**
    569    * Compare two strings case-insensitively using full case folding.
    570    * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
    571    *
    572    * @param text Another string to compare this one to.
    573    * @param options A bit set of options:
    574    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    575    *     Comparison in code unit order with default case folding.
    576    *
    577    *   - U_COMPARE_CODE_POINT_ORDER
    578    *     Set to choose code point order instead of code unit order
    579    *     (see u_strCompare for details).
    580    *
    581    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    582    *
    583    * @return A negative, zero, or positive integer indicating the comparison result.
    584    * @stable ICU 2.0
    585    */
    586   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
    587 
    588   /**
    589    * Compare two strings case-insensitively using full case folding.
    590    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
    591    *
    592    * @param start The start offset in this string at which the compare operation begins.
    593    * @param length The number of code units from this string to compare.
    594    * @param srcText Another string to compare this one to.
    595    * @param options A bit set of options:
    596    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    597    *     Comparison in code unit order with default case folding.
    598    *
    599    *   - U_COMPARE_CODE_POINT_ORDER
    600    *     Set to choose code point order instead of code unit order
    601    *     (see u_strCompare for details).
    602    *
    603    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    604    *
    605    * @return A negative, zero, or positive integer indicating the comparison result.
    606    * @stable ICU 2.0
    607    */
    608   inline int8_t caseCompare(int32_t start,
    609          int32_t length,
    610          const UnicodeString& srcText,
    611          uint32_t options) const;
    612 
    613   /**
    614    * Compare two strings case-insensitively using full case folding.
    615    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
    616    *
    617    * @param start The start offset in this string at which the compare operation begins.
    618    * @param length The number of code units from this string to compare.
    619    * @param srcText Another string to compare this one to.
    620    * @param srcStart The start offset in that string at which the compare operation begins.
    621    * @param srcLength The number of code units from that string to compare.
    622    * @param options A bit set of options:
    623    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    624    *     Comparison in code unit order with default case folding.
    625    *
    626    *   - U_COMPARE_CODE_POINT_ORDER
    627    *     Set to choose code point order instead of code unit order
    628    *     (see u_strCompare for details).
    629    *
    630    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    631    *
    632    * @return A negative, zero, or positive integer indicating the comparison result.
    633    * @stable ICU 2.0
    634    */
    635   inline int8_t caseCompare(int32_t start,
    636          int32_t length,
    637          const UnicodeString& srcText,
    638          int32_t srcStart,
    639          int32_t srcLength,
    640          uint32_t options) const;
    641 
    642   /**
    643    * Compare two strings case-insensitively using full case folding.
    644    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
    645    *
    646    * @param srcChars A pointer to another string to compare this one to.
    647    * @param srcLength The number of code units from that string to compare.
    648    * @param options A bit set of options:
    649    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    650    *     Comparison in code unit order with default case folding.
    651    *
    652    *   - U_COMPARE_CODE_POINT_ORDER
    653    *     Set to choose code point order instead of code unit order
    654    *     (see u_strCompare for details).
    655    *
    656    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    657    *
    658    * @return A negative, zero, or positive integer indicating the comparison result.
    659    * @stable ICU 2.0
    660    */
    661   inline int8_t caseCompare(const UChar *srcChars,
    662          int32_t srcLength,
    663          uint32_t options) const;
    664 
    665   /**
    666    * Compare two strings case-insensitively using full case folding.
    667    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
    668    *
    669    * @param start The start offset in this string at which the compare operation begins.
    670    * @param length The number of code units from this string to compare.
    671    * @param srcChars A pointer to another string to compare this one to.
    672    * @param options A bit set of options:
    673    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    674    *     Comparison in code unit order with default case folding.
    675    *
    676    *   - U_COMPARE_CODE_POINT_ORDER
    677    *     Set to choose code point order instead of code unit order
    678    *     (see u_strCompare for details).
    679    *
    680    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    681    *
    682    * @return A negative, zero, or positive integer indicating the comparison result.
    683    * @stable ICU 2.0
    684    */
    685   inline int8_t caseCompare(int32_t start,
    686          int32_t length,
    687          const UChar *srcChars,
    688          uint32_t options) const;
    689 
    690   /**
    691    * Compare two strings case-insensitively using full case folding.
    692    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
    693    *
    694    * @param start The start offset in this string at which the compare operation begins.
    695    * @param length The number of code units from this string to compare.
    696    * @param srcChars A pointer to another string to compare this one to.
    697    * @param srcStart The start offset in that string at which the compare operation begins.
    698    * @param srcLength The number of code units from that string to compare.
    699    * @param options A bit set of options:
    700    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    701    *     Comparison in code unit order with default case folding.
    702    *
    703    *   - U_COMPARE_CODE_POINT_ORDER
    704    *     Set to choose code point order instead of code unit order
    705    *     (see u_strCompare for details).
    706    *
    707    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    708    *
    709    * @return A negative, zero, or positive integer indicating the comparison result.
    710    * @stable ICU 2.0
    711    */
    712   inline int8_t caseCompare(int32_t start,
    713          int32_t length,
    714          const UChar *srcChars,
    715          int32_t srcStart,
    716          int32_t srcLength,
    717          uint32_t options) const;
    718 
    719   /**
    720    * Compare two strings case-insensitively using full case folding.
    721    * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
    722    *
    723    * @param start The start offset in this string at which the compare operation begins.
    724    * @param limit The offset after the last code unit from this string to compare.
    725    * @param srcText Another string to compare this one to.
    726    * @param srcStart The start offset in that string at which the compare operation begins.
    727    * @param srcLimit The offset after the last code unit from that string to compare.
    728    * @param options A bit set of options:
    729    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
    730    *     Comparison in code unit order with default case folding.
    731    *
    732    *   - U_COMPARE_CODE_POINT_ORDER
    733    *     Set to choose code point order instead of code unit order
    734    *     (see u_strCompare for details).
    735    *
    736    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
    737    *
    738    * @return A negative, zero, or positive integer indicating the comparison result.
    739    * @stable ICU 2.0
    740    */
    741   inline int8_t caseCompareBetween(int32_t start,
    742             int32_t limit,
    743             const UnicodeString& srcText,
    744             int32_t srcStart,
    745             int32_t srcLimit,
    746             uint32_t options) const;
    747 
    748   /**
    749    * Determine if this starts with the characters in <TT>text</TT>
    750    * @param text The text to match.
    751    * @return TRUE if this starts with the characters in <TT>text</TT>,
    752    * FALSE otherwise
    753    * @stable ICU 2.0
    754    */
    755   inline UBool startsWith(const UnicodeString& text) const;
    756 
    757   /**
    758    * Determine if this starts with the characters in <TT>srcText</TT>
    759    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
    760    * @param srcText The text to match.
    761    * @param srcStart the offset into <TT>srcText</TT> to start matching
    762    * @param srcLength the number of characters in <TT>srcText</TT> to match
    763    * @return TRUE if this starts with the characters in <TT>text</TT>,
    764    * FALSE otherwise
    765    * @stable ICU 2.0
    766    */
    767   inline UBool startsWith(const UnicodeString& srcText,
    768             int32_t srcStart,
    769             int32_t srcLength) const;
    770 
    771   /**
    772    * Determine if this starts with the characters in <TT>srcChars</TT>
    773    * @param srcChars The characters to match.
    774    * @param srcLength the number of characters in <TT>srcChars</TT>
    775    * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
    776    * FALSE otherwise
    777    * @stable ICU 2.0
    778    */
    779   inline UBool startsWith(const UChar *srcChars,
    780             int32_t srcLength) const;
    781 
    782   /**
    783    * Determine if this ends with the characters in <TT>srcChars</TT>
    784    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
    785    * @param srcChars The characters to match.
    786    * @param srcStart the offset into <TT>srcText</TT> to start matching
    787    * @param srcLength the number of characters in <TT>srcChars</TT> to match
    788    * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
    789    * @stable ICU 2.0
    790    */
    791   inline UBool startsWith(const UChar *srcChars,
    792             int32_t srcStart,
    793             int32_t srcLength) const;
    794 
    795   /**
    796    * Determine if this ends with the characters in <TT>text</TT>
    797    * @param text The text to match.
    798    * @return TRUE if this ends with the characters in <TT>text</TT>,
    799    * FALSE otherwise
    800    * @stable ICU 2.0
    801    */
    802   inline UBool endsWith(const UnicodeString& text) const;
    803 
    804   /**
    805    * Determine if this ends with the characters in <TT>srcText</TT>
    806    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
    807    * @param srcText The text to match.
    808    * @param srcStart the offset into <TT>srcText</TT> to start matching
    809    * @param srcLength the number of characters in <TT>srcText</TT> to match
    810    * @return TRUE if this ends with the characters in <TT>text</TT>,
    811    * FALSE otherwise
    812    * @stable ICU 2.0
    813    */
    814   inline UBool endsWith(const UnicodeString& srcText,
    815           int32_t srcStart,
    816           int32_t srcLength) const;
    817 
    818   /**
    819    * Determine if this ends with the characters in <TT>srcChars</TT>
    820    * @param srcChars The characters to match.
    821    * @param srcLength the number of characters in <TT>srcChars</TT>
    822    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
    823    * FALSE otherwise
    824    * @stable ICU 2.0
    825    */
    826   inline UBool endsWith(const UChar *srcChars,
    827           int32_t srcLength) const;
    828 
    829   /**
    830    * Determine if this ends with the characters in <TT>srcChars</TT>
    831    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
    832    * @param srcChars The characters to match.
    833    * @param srcStart the offset into <TT>srcText</TT> to start matching
    834    * @param srcLength the number of characters in <TT>srcChars</TT> to match
    835    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
    836    * FALSE otherwise
    837    * @stable ICU 2.0
    838    */
    839   inline UBool endsWith(const UChar *srcChars,
    840           int32_t srcStart,
    841           int32_t srcLength) const;
    842 
    843 
    844   /* Searching - bitwise only */
    845 
    846   /**
    847    * Locate in this the first occurrence of the characters in <TT>text</TT>,
    848    * using bitwise comparison.
    849    * @param text The text to search for.
    850    * @return The offset into this of the start of <TT>text</TT>,
    851    * or -1 if not found.
    852    * @stable ICU 2.0
    853    */
    854   inline int32_t indexOf(const UnicodeString& text) const;
    855 
    856   /**
    857    * Locate in this the first occurrence of the characters in <TT>text</TT>
    858    * starting at offset <TT>start</TT>, using bitwise comparison.
    859    * @param text The text to search for.
    860    * @param start The offset at which searching will start.
    861    * @return The offset into this of the start of <TT>text</TT>,
    862    * or -1 if not found.
    863    * @stable ICU 2.0
    864    */
    865   inline int32_t indexOf(const UnicodeString& text,
    866               int32_t start) const;
    867 
    868   /**
    869    * Locate in this the first occurrence in the range
    870    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
    871    * in <TT>text</TT>, using bitwise comparison.
    872    * @param text The text to search for.
    873    * @param start The offset at which searching will start.
    874    * @param length The number of characters to search
    875    * @return The offset into this of the start of <TT>text</TT>,
    876    * or -1 if not found.
    877    * @stable ICU 2.0
    878    */
    879   inline int32_t indexOf(const UnicodeString& text,
    880               int32_t start,
    881               int32_t length) const;
    882 
    883   /**
    884    * Locate in this the first occurrence in the range
    885    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
    886    *  in <TT>srcText</TT> in the range
    887    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
    888    * using bitwise comparison.
    889    * @param srcText The text to search for.
    890    * @param srcStart the offset into <TT>srcText</TT> at which
    891    * to start matching
    892    * @param srcLength the number of characters in <TT>srcText</TT> to match
    893    * @param start the offset into this at which to start matching
    894    * @param length the number of characters in this to search
    895    * @return The offset into this of the start of <TT>text</TT>,
    896    * or -1 if not found.
    897    * @stable ICU 2.0
    898    */
    899   inline int32_t indexOf(const UnicodeString& srcText,
    900               int32_t srcStart,
    901               int32_t srcLength,
    902               int32_t start,
    903               int32_t length) const;
    904 
    905   /**
    906    * Locate in this the first occurrence of the characters in
    907    * <TT>srcChars</TT>
    908    * starting at offset <TT>start</TT>, using bitwise comparison.
    909    * @param srcChars The text to search for.
    910    * @param srcLength the number of characters in <TT>srcChars</TT> to match
    911    * @param start the offset into this at which to start matching
    912    * @return The offset into this of the start of <TT>text</TT>,
    913    * or -1 if not found.
    914    * @stable ICU 2.0
    915    */
    916   inline int32_t indexOf(const UChar *srcChars,
    917               int32_t srcLength,
    918               int32_t start) const;
    919 
    920   /**
    921    * Locate in this the first occurrence in the range
    922    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
    923    * in <TT>srcChars</TT>, using bitwise comparison.
    924    * @param srcChars The text to search for.
    925    * @param srcLength the number of characters in <TT>srcChars</TT>
    926    * @param start The offset at which searching will start.
    927    * @param length The number of characters to search
    928    * @return The offset into this of the start of <TT>srcChars</TT>,
    929    * or -1 if not found.
    930    * @stable ICU 2.0
    931    */
    932   inline int32_t indexOf(const UChar *srcChars,
    933               int32_t srcLength,
    934               int32_t start,
    935               int32_t length) const;
    936 
    937   /**
    938    * Locate in this the first occurrence in the range
    939    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
    940    * in <TT>srcChars</TT> in the range
    941    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
    942    * using bitwise comparison.
    943    * @param srcChars The text to search for.
    944    * @param srcStart the offset into <TT>srcChars</TT> at which
    945    * to start matching
    946    * @param srcLength the number of characters in <TT>srcChars</TT> to match
    947    * @param start the offset into this at which to start matching
    948    * @param length the number of characters in this to search
    949    * @return The offset into this of the start of <TT>text</TT>,
    950    * or -1 if not found.
    951    * @stable ICU 2.0
    952    */
    953   int32_t indexOf(const UChar *srcChars,
    954               int32_t srcStart,
    955               int32_t srcLength,
    956               int32_t start,
    957               int32_t length) const;
    958 
    959   /**
    960    * Locate in this the first occurrence of the BMP code point <code>c</code>,
    961    * using bitwise comparison.
    962    * @param c The code unit to search for.
    963    * @return The offset into this of <TT>c</TT>, or -1 if not found.
    964    * @stable ICU 2.0
    965    */
    966   inline int32_t indexOf(UChar c) const;
    967 
    968   /**
    969    * Locate in this the first occurrence of the code point <TT>c</TT>,
    970    * using bitwise comparison.
    971    *
    972    * @param c The code point to search for.
    973    * @return The offset into this of <TT>c</TT>, or -1 if not found.
    974    * @stable ICU 2.0
    975    */
    976   inline int32_t indexOf(UChar32 c) const;
    977 
    978   /**
    979    * Locate in this the first occurrence of the BMP code point <code>c</code>,
    980    * starting at offset <TT>start</TT>, using bitwise comparison.
    981    * @param c The code unit to search for.
    982    * @param start The offset at which searching will start.
    983    * @return The offset into this of <TT>c</TT>, or -1 if not found.
    984    * @stable ICU 2.0
    985    */
    986   inline int32_t indexOf(UChar c,
    987               int32_t start) const;
    988 
    989   /**
    990    * Locate in this the first occurrence of the code point <TT>c</TT>
    991    * starting at offset <TT>start</TT>, using bitwise comparison.
    992    *
    993    * @param c The code point to search for.
    994    * @param start The offset at which searching will start.
    995    * @return The offset into this of <TT>c</TT>, or -1 if not found.
    996    * @stable ICU 2.0
    997    */
    998   inline int32_t indexOf(UChar32 c,
    999               int32_t start) const;
   1000 
   1001   /**
   1002    * Locate in this the first occurrence of the BMP code point <code>c</code>
   1003    * in the range [<TT>start</TT>, <TT>start + length</TT>),
   1004    * using bitwise comparison.
   1005    * @param c The code unit to search for.
   1006    * @param start the offset into this at which to start matching
   1007    * @param length the number of characters in this to search
   1008    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1009    * @stable ICU 2.0
   1010    */
   1011   inline int32_t indexOf(UChar c,
   1012               int32_t start,
   1013               int32_t length) const;
   1014 
   1015   /**
   1016    * Locate in this the first occurrence of the code point <TT>c</TT>
   1017    * in the range [<TT>start</TT>, <TT>start + length</TT>),
   1018    * using bitwise comparison.
   1019    *
   1020    * @param c The code point to search for.
   1021    * @param start the offset into this at which to start matching
   1022    * @param length the number of characters in this to search
   1023    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1024    * @stable ICU 2.0
   1025    */
   1026   inline int32_t indexOf(UChar32 c,
   1027               int32_t start,
   1028               int32_t length) const;
   1029 
   1030   /**
   1031    * Locate in this the last occurrence of the characters in <TT>text</TT>,
   1032    * using bitwise comparison.
   1033    * @param text The text to search for.
   1034    * @return The offset into this of the start of <TT>text</TT>,
   1035    * or -1 if not found.
   1036    * @stable ICU 2.0
   1037    */
   1038   inline int32_t lastIndexOf(const UnicodeString& text) const;
   1039 
   1040   /**
   1041    * Locate in this the last occurrence of the characters in <TT>text</TT>
   1042    * starting at offset <TT>start</TT>, using bitwise comparison.
   1043    * @param text The text to search for.
   1044    * @param start The offset at which searching will start.
   1045    * @return The offset into this of the start of <TT>text</TT>,
   1046    * or -1 if not found.
   1047    * @stable ICU 2.0
   1048    */
   1049   inline int32_t lastIndexOf(const UnicodeString& text,
   1050               int32_t start) const;
   1051 
   1052   /**
   1053    * Locate in this the last occurrence in the range
   1054    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   1055    * in <TT>text</TT>, using bitwise comparison.
   1056    * @param text The text to search for.
   1057    * @param start The offset at which searching will start.
   1058    * @param length The number of characters to search
   1059    * @return The offset into this of the start of <TT>text</TT>,
   1060    * or -1 if not found.
   1061    * @stable ICU 2.0
   1062    */
   1063   inline int32_t lastIndexOf(const UnicodeString& text,
   1064               int32_t start,
   1065               int32_t length) const;
   1066 
   1067   /**
   1068    * Locate in this the last occurrence in the range
   1069    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   1070    * in <TT>srcText</TT> in the range
   1071    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
   1072    * using bitwise comparison.
   1073    * @param srcText The text to search for.
   1074    * @param srcStart the offset into <TT>srcText</TT> at which
   1075    * to start matching
   1076    * @param srcLength the number of characters in <TT>srcText</TT> to match
   1077    * @param start the offset into this at which to start matching
   1078    * @param length the number of characters in this to search
   1079    * @return The offset into this of the start of <TT>text</TT>,
   1080    * or -1 if not found.
   1081    * @stable ICU 2.0
   1082    */
   1083   inline int32_t lastIndexOf(const UnicodeString& srcText,
   1084               int32_t srcStart,
   1085               int32_t srcLength,
   1086               int32_t start,
   1087               int32_t length) const;
   1088 
   1089   /**
   1090    * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
   1091    * starting at offset <TT>start</TT>, using bitwise comparison.
   1092    * @param srcChars The text to search for.
   1093    * @param srcLength the number of characters in <TT>srcChars</TT> to match
   1094    * @param start the offset into this at which to start matching
   1095    * @return The offset into this of the start of <TT>text</TT>,
   1096    * or -1 if not found.
   1097    * @stable ICU 2.0
   1098    */
   1099   inline int32_t lastIndexOf(const UChar *srcChars,
   1100               int32_t srcLength,
   1101               int32_t start) const;
   1102 
   1103   /**
   1104    * Locate in this the last occurrence in the range
   1105    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   1106    * in <TT>srcChars</TT>, using bitwise comparison.
   1107    * @param srcChars The text to search for.
   1108    * @param srcLength the number of characters in <TT>srcChars</TT>
   1109    * @param start The offset at which searching will start.
   1110    * @param length The number of characters to search
   1111    * @return The offset into this of the start of <TT>srcChars</TT>,
   1112    * or -1 if not found.
   1113    * @stable ICU 2.0
   1114    */
   1115   inline int32_t lastIndexOf(const UChar *srcChars,
   1116               int32_t srcLength,
   1117               int32_t start,
   1118               int32_t length) const;
   1119 
   1120   /**
   1121    * Locate in this the last occurrence in the range
   1122    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
   1123    * in <TT>srcChars</TT> in the range
   1124    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
   1125    * using bitwise comparison.
   1126    * @param srcChars The text to search for.
   1127    * @param srcStart the offset into <TT>srcChars</TT> at which
   1128    * to start matching
   1129    * @param srcLength the number of characters in <TT>srcChars</TT> to match
   1130    * @param start the offset into this at which to start matching
   1131    * @param length the number of characters in this to search
   1132    * @return The offset into this of the start of <TT>text</TT>,
   1133    * or -1 if not found.
   1134    * @stable ICU 2.0
   1135    */
   1136   int32_t lastIndexOf(const UChar *srcChars,
   1137               int32_t srcStart,
   1138               int32_t srcLength,
   1139               int32_t start,
   1140               int32_t length) const;
   1141 
   1142   /**
   1143    * Locate in this the last occurrence of the BMP code point <code>c</code>,
   1144    * using bitwise comparison.
   1145    * @param c The code unit to search for.
   1146    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1147    * @stable ICU 2.0
   1148    */
   1149   inline int32_t lastIndexOf(UChar c) const;
   1150 
   1151   /**
   1152    * Locate in this the last occurrence of the code point <TT>c</TT>,
   1153    * using bitwise comparison.
   1154    *
   1155    * @param c The code point to search for.
   1156    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1157    * @stable ICU 2.0
   1158    */
   1159   inline int32_t lastIndexOf(UChar32 c) const;
   1160 
   1161   /**
   1162    * Locate in this the last occurrence of the BMP code point <code>c</code>
   1163    * starting at offset <TT>start</TT>, using bitwise comparison.
   1164    * @param c The code unit to search for.
   1165    * @param start The offset at which searching will start.
   1166    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1167    * @stable ICU 2.0
   1168    */
   1169   inline int32_t lastIndexOf(UChar c,
   1170               int32_t start) const;
   1171 
   1172   /**
   1173    * Locate in this the last occurrence of the code point <TT>c</TT>
   1174    * starting at offset <TT>start</TT>, using bitwise comparison.
   1175    *
   1176    * @param c The code point to search for.
   1177    * @param start The offset at which searching will start.
   1178    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1179    * @stable ICU 2.0
   1180    */
   1181   inline int32_t lastIndexOf(UChar32 c,
   1182               int32_t start) const;
   1183 
   1184   /**
   1185    * Locate in this the last occurrence of the BMP code point <code>c</code>
   1186    * in the range [<TT>start</TT>, <TT>start + length</TT>),
   1187    * using bitwise comparison.
   1188    * @param c The code unit to search for.
   1189    * @param start the offset into this at which to start matching
   1190    * @param length the number of characters in this to search
   1191    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1192    * @stable ICU 2.0
   1193    */
   1194   inline int32_t lastIndexOf(UChar c,
   1195               int32_t start,
   1196               int32_t length) const;
   1197 
   1198   /**
   1199    * Locate in this the last occurrence of the code point <TT>c</TT>
   1200    * in the range [<TT>start</TT>, <TT>start + length</TT>),
   1201    * using bitwise comparison.
   1202    *
   1203    * @param c The code point to search for.
   1204    * @param start the offset into this at which to start matching
   1205    * @param length the number of characters in this to search
   1206    * @return The offset into this of <TT>c</TT>, or -1 if not found.
   1207    * @stable ICU 2.0
   1208    */
   1209   inline int32_t lastIndexOf(UChar32 c,
   1210               int32_t start,
   1211               int32_t length) const;
   1212 
   1213 
   1214   /* Character access */
   1215 
   1216   /**
   1217    * Return the code unit at offset <tt>offset</tt>.
   1218    * If the offset is not valid (0..length()-1) then U+ffff is returned.
   1219    * @param offset a valid offset into the text
   1220    * @return the code unit at offset <tt>offset</tt>
   1221    *         or 0xffff if the offset is not valid for this string
   1222    * @stable ICU 2.0
   1223    */
   1224   inline UChar charAt(int32_t offset) const;
   1225 
   1226   /**
   1227    * Return the code unit at offset <tt>offset</tt>.
   1228    * If the offset is not valid (0..length()-1) then U+ffff is returned.
   1229    * @param offset a valid offset into the text
   1230    * @return the code unit at offset <tt>offset</tt>
   1231    * @stable ICU 2.0
   1232    */
   1233   inline UChar operator[] (int32_t offset) const;
   1234 
   1235   /**
   1236    * Return the code point that contains the code unit
   1237    * at offset <tt>offset</tt>.
   1238    * If the offset is not valid (0..length()-1) then U+ffff is returned.
   1239    * @param offset a valid offset into the text
   1240    * that indicates the text offset of any of the code units
   1241    * that will be assembled into a code point (21-bit value) and returned
   1242    * @return the code point of text at <tt>offset</tt>
   1243    *         or 0xffff if the offset is not valid for this string
   1244    * @stable ICU 2.0
   1245    */
   1246   inline UChar32 char32At(int32_t offset) const;
   1247 
   1248   /**
   1249    * Adjust a random-access offset so that
   1250    * it points to the beginning of a Unicode character.
   1251    * The offset that is passed in points to
   1252    * any code unit of a code point,
   1253    * while the returned offset will point to the first code unit
   1254    * of the same code point.
   1255    * In UTF-16, if the input offset points to a second surrogate
   1256    * of a surrogate pair, then the returned offset will point
   1257    * to the first surrogate.
   1258    * @param offset a valid offset into one code point of the text
   1259    * @return offset of the first code unit of the same code point
   1260    * @see U16_SET_CP_START
   1261    * @stable ICU 2.0
   1262    */
   1263   inline int32_t getChar32Start(int32_t offset) const;
   1264 
   1265   /**
   1266    * Adjust a random-access offset so that
   1267    * it points behind a Unicode character.
   1268    * The offset that is passed in points behind
   1269    * any code unit of a code point,
   1270    * while the returned offset will point behind the last code unit
   1271    * of the same code point.
   1272    * In UTF-16, if the input offset points behind the first surrogate
   1273    * (i.e., to the second surrogate)
   1274    * of a surrogate pair, then the returned offset will point
   1275    * behind the second surrogate (i.e., to the first surrogate).
   1276    * @param offset a valid offset after any code unit of a code point of the text
   1277    * @return offset of the first code unit after the same code point
   1278    * @see U16_SET_CP_LIMIT
   1279    * @stable ICU 2.0
   1280    */
   1281   inline int32_t getChar32Limit(int32_t offset) const;
   1282 
   1283   /**
   1284    * Move the code unit index along the string by delta code points.
   1285    * Interpret the input index as a code unit-based offset into the string,
   1286    * move the index forward or backward by delta code points, and
   1287    * return the resulting index.
   1288    * The input index should point to the first code unit of a code point,
   1289    * if there is more than one.
   1290    *
   1291    * Both input and output indexes are code unit-based as for all
   1292    * string indexes/offsets in ICU (and other libraries, like MBCS char*).
   1293    * If delta<0 then the index is moved backward (toward the start of the string).
   1294    * If delta>0 then the index is moved forward (toward the end of the string).
   1295    *
   1296    * This behaves like CharacterIterator::move32(delta, kCurrent).
   1297    *
   1298    * Behavior for out-of-bounds indexes:
   1299    * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
   1300    * if the input index<0 then it is pinned to 0;
   1301    * if it is index>length() then it is pinned to length().
   1302    * Afterwards, the index is moved by <code>delta</code> code points
   1303    * forward or backward,
   1304    * but no further backward than to 0 and no further forward than to length().
   1305    * The resulting index return value will be in between 0 and length(), inclusively.
   1306    *
   1307    * Examples:
   1308    * <pre>
   1309    * // s has code points 'a' U+10000 'b' U+10ffff U+2029
   1310    * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
   1311    *
   1312    * // initial index: position of U+10000
   1313    * int32_t index=1;
   1314    *
   1315    * // the following examples will all result in index==4, position of U+10ffff
   1316    *
   1317    * // skip 2 code points from some position in the string
   1318    * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
   1319    *
   1320    * // go to the 3rd code point from the start of s (0-based)
   1321    * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
   1322    *
   1323    * // go to the next-to-last code point of s
   1324    * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
   1325    * </pre>
   1326    *
   1327    * @param index input code unit index
   1328    * @param delta (signed) code point count to move the index forward or backward
   1329    *        in the string
   1330    * @return the resulting code unit index
   1331    * @stable ICU 2.0
   1332    */
   1333   int32_t moveIndex32(int32_t index, int32_t delta) const;
   1334 
   1335   /* Substring extraction */
   1336 
   1337   /**
   1338    * Copy the characters in the range
   1339    * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
   1340    * beginning at <tt>dstStart</tt>.
   1341    * If the string aliases to <code>dst</code> itself as an external buffer,
   1342    * then extract() will not copy the contents.
   1343    *
   1344    * @param start offset of first character which will be copied into the array
   1345    * @param length the number of characters to extract
   1346    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
   1347    * must be at least (<tt>dstStart + length</tt>).
   1348    * @param dstStart the offset in <TT>dst</TT> where the first character
   1349    * will be extracted
   1350    * @stable ICU 2.0
   1351    */
   1352   inline void extract(int32_t start,
   1353            int32_t length,
   1354            UChar *dst,
   1355            int32_t dstStart = 0) const;
   1356 
   1357   /**
   1358    * Copy the contents of the string into dest.
   1359    * This is a convenience function that
   1360    * checks if there is enough space in dest,
   1361    * extracts the entire string if possible,
   1362    * and NUL-terminates dest if possible.
   1363    *
   1364    * If the string fits into dest but cannot be NUL-terminated
   1365    * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
   1366    * If the string itself does not fit into dest
   1367    * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
   1368    *
   1369    * If the string aliases to <code>dest</code> itself as an external buffer,
   1370    * then extract() will not copy the contents.
   1371    *
   1372    * @param dest Destination string buffer.
   1373    * @param destCapacity Number of UChars available at dest.
   1374    * @param errorCode ICU error code.
   1375    * @return length()
   1376    * @stable ICU 2.0
   1377    */
   1378   int32_t
   1379   extract(UChar *dest, int32_t destCapacity,
   1380           UErrorCode &errorCode) const;
   1381 
   1382   /**
   1383    * Copy the characters in the range
   1384    * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
   1385    * <tt>target</tt>.
   1386    * @param start offset of first character which will be copied
   1387    * @param length the number of characters to extract
   1388    * @param target UnicodeString into which to copy characters.
   1389    * @return A reference to <TT>target</TT>
   1390    * @stable ICU 2.0
   1391    */
   1392   inline void extract(int32_t start,
   1393            int32_t length,
   1394            UnicodeString& target) const;
   1395 
   1396   /**
   1397    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
   1398    * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
   1399    * @param start offset of first character which will be copied into the array
   1400    * @param limit offset immediately following the last character to be copied
   1401    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
   1402    * must be at least (<tt>dstStart + (limit - start)</tt>).
   1403    * @param dstStart the offset in <TT>dst</TT> where the first character
   1404    * will be extracted
   1405    * @stable ICU 2.0
   1406    */
   1407   inline void extractBetween(int32_t start,
   1408               int32_t limit,
   1409               UChar *dst,
   1410               int32_t dstStart = 0) const;
   1411 
   1412   /**
   1413    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
   1414    * into the UnicodeString <tt>target</tt>.  Replaceable API.
   1415    * @param start offset of first character which will be copied
   1416    * @param limit offset immediately following the last character to be copied
   1417    * @param target UnicodeString into which to copy characters.
   1418    * @return A reference to <TT>target</TT>
   1419    * @stable ICU 2.0
   1420    */
   1421   virtual void extractBetween(int32_t start,
   1422               int32_t limit,
   1423               UnicodeString& target) const;
   1424 
   1425   /**
   1426    * Copy the characters in the range
   1427    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
   1428    * All characters must be invariant (see utypes.h).
   1429    * Use US_INV as the last, signature-distinguishing parameter.
   1430    *
   1431    * This function does not write any more than <code>targetLength</code>
   1432    * characters but returns the length of the entire output string
   1433    * so that one can allocate a larger buffer and call the function again
   1434    * if necessary.
   1435    * The output string is NUL-terminated if possible.
   1436    *
   1437    * @param start offset of first character which will be copied
   1438    * @param startLength the number of characters to extract
   1439    * @param target the target buffer for extraction, can be NULL
   1440    *               if targetLength is 0
   1441    * @param targetCapacity the length of the target buffer
   1442    * @param inv Signature-distinguishing paramater, use US_INV.
   1443    * @return the output string length, not including the terminating NUL
   1444    * @stable ICU 3.2
   1445    */
   1446   int32_t extract(int32_t start,
   1447            int32_t startLength,
   1448            char *target,
   1449            int32_t targetCapacity,
   1450            enum EInvariant inv) const;
   1451 
   1452 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
   1453 
   1454   /**
   1455    * Copy the characters in the range
   1456    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
   1457    * in the platform's default codepage.
   1458    * This function does not write any more than <code>targetLength</code>
   1459    * characters but returns the length of the entire output string
   1460    * so that one can allocate a larger buffer and call the function again
   1461    * if necessary.
   1462    * The output string is NUL-terminated if possible.
   1463    *
   1464    * @param start offset of first character which will be copied
   1465    * @param startLength the number of characters to extract
   1466    * @param target the target buffer for extraction
   1467    * @param targetLength the length of the target buffer
   1468    * If <TT>target</TT> is NULL, then the number of bytes required for
   1469    * <TT>target</TT> is returned.
   1470    * @return the output string length, not including the terminating NUL
   1471    * @stable ICU 2.0
   1472    */
   1473   int32_t extract(int32_t start,
   1474            int32_t startLength,
   1475            char *target,
   1476            uint32_t targetLength) const;
   1477 
   1478 #endif
   1479 
   1480 #if !UCONFIG_NO_CONVERSION
   1481 
   1482   /**
   1483    * Copy the characters in the range
   1484    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
   1485    * in a specified codepage.
   1486    * The output string is NUL-terminated.
   1487    *
   1488    * Recommendation: For invariant-character strings use
   1489    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
   1490    * because it avoids object code dependencies of UnicodeString on
   1491    * the conversion code.
   1492    *
   1493    * @param start offset of first character which will be copied
   1494    * @param startLength the number of characters to extract
   1495    * @param target the target buffer for extraction
   1496    * @param codepage the desired codepage for the characters.  0 has
   1497    * the special meaning of the default codepage
   1498    * If <code>codepage</code> is an empty string (<code>""</code>),
   1499    * then a simple conversion is performed on the codepage-invariant
   1500    * subset ("invariant characters") of the platform encoding. See utypes.h.
   1501    * If <TT>target</TT> is NULL, then the number of bytes required for
   1502    * <TT>target</TT> is returned. It is assumed that the target is big enough
   1503    * to fit all of the characters.
   1504    * @return the output string length, not including the terminating NUL
   1505    * @stable ICU 2.0
   1506    */
   1507   inline int32_t extract(int32_t start,
   1508                  int32_t startLength,
   1509                  char *target,
   1510                  const char *codepage = 0) const;
   1511 
   1512   /**
   1513    * Copy the characters in the range
   1514    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
   1515    * in a specified codepage.
   1516    * This function does not write any more than <code>targetLength</code>
   1517    * characters but returns the length of the entire output string
   1518    * so that one can allocate a larger buffer and call the function again
   1519    * if necessary.
   1520    * The output string is NUL-terminated if possible.
   1521    *
   1522    * Recommendation: For invariant-character strings use
   1523    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
   1524    * because it avoids object code dependencies of UnicodeString on
   1525    * the conversion code.
   1526    *
   1527    * @param start offset of first character which will be copied
   1528    * @param startLength the number of characters to extract
   1529    * @param target the target buffer for extraction
   1530    * @param targetLength the length of the target buffer
   1531    * @param codepage the desired codepage for the characters.  0 has
   1532    * the special meaning of the default codepage
   1533    * If <code>codepage</code> is an empty string (<code>""</code>),
   1534    * then a simple conversion is performed on the codepage-invariant
   1535    * subset ("invariant characters") of the platform encoding. See utypes.h.
   1536    * If <TT>target</TT> is NULL, then the number of bytes required for
   1537    * <TT>target</TT> is returned.
   1538    * @return the output string length, not including the terminating NUL
   1539    * @stable ICU 2.0
   1540    */
   1541   int32_t extract(int32_t start,
   1542            int32_t startLength,
   1543            char *target,
   1544            uint32_t targetLength,
   1545            const char *codepage) const;
   1546 
   1547   /**
   1548    * Convert the UnicodeString into a codepage string using an existing UConverter.
   1549    * The output string is NUL-terminated if possible.
   1550    *
   1551    * This function avoids the overhead of opening and closing a converter if
   1552    * multiple strings are extracted.
   1553    *
   1554    * @param dest destination string buffer, can be NULL if destCapacity==0
   1555    * @param destCapacity the number of chars available at dest
   1556    * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
   1557    *        or NULL for the default converter
   1558    * @param errorCode normal ICU error code
   1559    * @return the length of the output string, not counting the terminating NUL;
   1560    *         if the length is greater than destCapacity, then the string will not fit
   1561    *         and a buffer of the indicated length would need to be passed in
   1562    * @stable ICU 2.0
   1563    */
   1564   int32_t extract(char *dest, int32_t destCapacity,
   1565                   UConverter *cnv,
   1566                   UErrorCode &errorCode) const;
   1567 
   1568 #endif
   1569 
   1570   /**
   1571    * Create a temporary substring for the specified range.
   1572    * Unlike the substring constructor and setTo() functions,
   1573    * the object returned here will be a read-only alias (using getBuffer())
   1574    * rather than copying the text.
   1575    * As a result, this substring operation is much faster but requires
   1576    * that the original string not be modified or deleted during the lifetime
   1577    * of the returned substring object.
   1578    * @param start offset of the first character visible in the substring
   1579    * @param length length of the substring
   1580    * @return a read-only alias UnicodeString object for the substring
   1581    * @stable ICU 4.4
   1582    */
   1583   UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
   1584 
   1585   /**
   1586    * Create a temporary substring for the specified range.
   1587    * Same as tempSubString(start, length) except that the substring range
   1588    * is specified as a (start, limit) pair (with an exclusive limit index)
   1589    * rather than a (start, length) pair.
   1590    * @param start offset of the first character visible in the substring
   1591    * @param limit offset immediately following the last character visible in the substring
   1592    * @return a read-only alias UnicodeString object for the substring
   1593    * @stable ICU 4.4
   1594    */
   1595   inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
   1596 
   1597   /**
   1598    * Convert the UnicodeString to UTF-8 and write the result
   1599    * to a ByteSink. This is called by toUTF8String().
   1600    * Unpaired surrogates are replaced with U+FFFD.
   1601    * Calls u_strToUTF8WithSub().
   1602    *
   1603    * @param sink A ByteSink to which the UTF-8 version of the string is written.
   1604    *             sink.Flush() is called at the end.
   1605    * @stable ICU 4.2
   1606    * @see toUTF8String
   1607    */
   1608   void toUTF8(ByteSink &sink) const;
   1609 
   1610 #if U_HAVE_STD_STRING
   1611 
   1612   /**
   1613    * Convert the UnicodeString to UTF-8 and append the result
   1614    * to a standard string.
   1615    * Unpaired surrogates are replaced with U+FFFD.
   1616    * Calls toUTF8().
   1617    *
   1618    * @param result A standard string (or a compatible object)
   1619    *        to which the UTF-8 version of the string is appended.
   1620    * @return The string object.
   1621    * @stable ICU 4.2
   1622    * @see toUTF8
   1623    */
   1624   template<typename StringClass>
   1625   StringClass &toUTF8String(StringClass &result) const {
   1626     StringByteSink<StringClass> sbs(&result);
   1627     toUTF8(sbs);
   1628     return result;
   1629   }
   1630 
   1631 #endif
   1632 
   1633   /**
   1634    * Convert the UnicodeString to UTF-32.
   1635    * Unpaired surrogates are replaced with U+FFFD.
   1636    * Calls u_strToUTF32WithSub().
   1637    *
   1638    * @param utf32 destination string buffer, can be NULL if capacity==0
   1639    * @param capacity the number of UChar32s available at utf32
   1640    * @param errorCode Standard ICU error code. Its input value must
   1641    *                  pass the U_SUCCESS() test, or else the function returns
   1642    *                  immediately. Check for U_FAILURE() on output or use with
   1643    *                  function chaining. (See User Guide for details.)
   1644    * @return The length of the UTF-32 string.
   1645    * @see fromUTF32
   1646    * @stable ICU 4.2
   1647    */
   1648   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
   1649 
   1650   /* Length operations */
   1651 
   1652   /**
   1653    * Return the length of the UnicodeString object.
   1654    * The length is the number of UChar code units are in the UnicodeString.
   1655    * If you want the number of code points, please use countChar32().
   1656    * @return the length of the UnicodeString object
   1657    * @see countChar32
   1658    * @stable ICU 2.0
   1659    */
   1660   inline int32_t length(void) const;
   1661 
   1662   /**
   1663    * Count Unicode code points in the length UChar code units of the string.
   1664    * A code point may occupy either one or two UChar code units.
   1665    * Counting code points involves reading all code units.
   1666    *
   1667    * This functions is basically the inverse of moveIndex32().
   1668    *
   1669    * @param start the index of the first code unit to check
   1670    * @param length the number of UChar code units to check
   1671    * @return the number of code points in the specified code units
   1672    * @see length
   1673    * @stable ICU 2.0
   1674    */
   1675   int32_t
   1676   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
   1677 
   1678   /**
   1679    * Check if the length UChar code units of the string
   1680    * contain more Unicode code points than a certain number.
   1681    * This is more efficient than counting all code points in this part of the string
   1682    * and comparing that number with a threshold.
   1683    * This function may not need to scan the string at all if the length
   1684    * falls within a certain range, and
   1685    * never needs to count more than 'number+1' code points.
   1686    * Logically equivalent to (countChar32(start, length)>number).
   1687    * A Unicode code point may occupy either one or two UChar code units.
   1688    *
   1689    * @param start the index of the first code unit to check (0 for the entire string)
   1690    * @param length the number of UChar code units to check
   1691    *               (use INT32_MAX for the entire string; remember that start/length
   1692    *                values are pinned)
   1693    * @param number The number of code points in the (sub)string is compared against
   1694    *               the 'number' parameter.
   1695    * @return Boolean value for whether the string contains more Unicode code points
   1696    *         than 'number'. Same as (u_countChar32(s, length)>number).
   1697    * @see countChar32
   1698    * @see u_strHasMoreChar32Than
   1699    * @stable ICU 2.4
   1700    */
   1701   UBool
   1702   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
   1703 
   1704   /**
   1705    * Determine if this string is empty.
   1706    * @return TRUE if this string contains 0 characters, FALSE otherwise.
   1707    * @stable ICU 2.0
   1708    */
   1709   inline UBool isEmpty(void) const;
   1710 
   1711   /**
   1712    * Return the capacity of the internal buffer of the UnicodeString object.
   1713    * This is useful together with the getBuffer functions.
   1714    * See there for details.
   1715    *
   1716    * @return the number of UChars available in the internal buffer
   1717    * @see getBuffer
   1718    * @stable ICU 2.0
   1719    */
   1720   inline int32_t getCapacity(void) const;
   1721 
   1722   /* Other operations */
   1723 
   1724   /**
   1725    * Generate a hash code for this object.
   1726    * @return The hash code of this UnicodeString.
   1727    * @stable ICU 2.0
   1728    */
   1729   inline int32_t hashCode(void) const;
   1730 
   1731   /**
   1732    * Determine if this object contains a valid string.
   1733    * A bogus string has no value. It is different from an empty string,
   1734    * although in both cases isEmpty() returns TRUE and length() returns 0.
   1735    * setToBogus() and isBogus() can be used to indicate that no string value is available.
   1736    * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
   1737    * length() returns 0.
   1738    *
   1739    * @return TRUE if the string is valid, FALSE otherwise
   1740    * @see setToBogus()
   1741    * @stable ICU 2.0
   1742    */
   1743   inline UBool isBogus(void) const;
   1744 
   1745 
   1746   //========================================
   1747   // Write operations
   1748   //========================================
   1749 
   1750   /* Assignment operations */
   1751 
   1752   /**
   1753    * Assignment operator.  Replace the characters in this UnicodeString
   1754    * with the characters from <TT>srcText</TT>.
   1755    * @param srcText The text containing the characters to replace
   1756    * @return a reference to this
   1757    * @stable ICU 2.0
   1758    */
   1759   UnicodeString &operator=(const UnicodeString &srcText);
   1760 
   1761   /**
   1762    * Almost the same as the assignment operator.
   1763    * Replace the characters in this UnicodeString
   1764    * with the characters from <code>srcText</code>.
   1765    *
   1766    * This function works the same for all strings except for ones that
   1767    * are readonly aliases.
   1768    * Starting with ICU 2.4, the assignment operator and the copy constructor
   1769    * allocate a new buffer and copy the buffer contents even for readonly aliases.
   1770    * This function implements the old, more efficient but less safe behavior
   1771    * of making this string also a readonly alias to the same buffer.
   1772    * The fastCopyFrom function must be used only if it is known that the lifetime of
   1773    * this UnicodeString is at least as long as the lifetime of the aliased buffer
   1774    * including its contents, for example for strings from resource bundles
   1775    * or aliases to string contents.
   1776    *
   1777    * @param src The text containing the characters to replace.
   1778    * @return a reference to this
   1779    * @stable ICU 2.4
   1780    */
   1781   UnicodeString &fastCopyFrom(const UnicodeString &src);
   1782 
   1783   /**
   1784    * Assignment operator.  Replace the characters in this UnicodeString
   1785    * with the code unit <TT>ch</TT>.
   1786    * @param ch the code unit to replace
   1787    * @return a reference to this
   1788    * @stable ICU 2.0
   1789    */
   1790   inline UnicodeString& operator= (UChar ch);
   1791 
   1792   /**
   1793    * Assignment operator.  Replace the characters in this UnicodeString
   1794    * with the code point <TT>ch</TT>.
   1795    * @param ch the code point to replace
   1796    * @return a reference to this
   1797    * @stable ICU 2.0
   1798    */
   1799   inline UnicodeString& operator= (UChar32 ch);
   1800 
   1801   /**
   1802    * Set the text in the UnicodeString object to the characters
   1803    * in <TT>srcText</TT> in the range
   1804    * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
   1805    * <TT>srcText</TT> is not modified.
   1806    * @param srcText the source for the new characters
   1807    * @param srcStart the offset into <TT>srcText</TT> where new characters
   1808    * will be obtained
   1809    * @return a reference to this
   1810    * @stable ICU 2.2
   1811    */
   1812   inline UnicodeString& setTo(const UnicodeString& srcText,
   1813                int32_t srcStart);
   1814 
   1815   /**
   1816    * Set the text in the UnicodeString object to the characters
   1817    * in <TT>srcText</TT> in the range
   1818    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   1819    * <TT>srcText</TT> is not modified.
   1820    * @param srcText the source for the new characters
   1821    * @param srcStart the offset into <TT>srcText</TT> where new characters
   1822    * will be obtained
   1823    * @param srcLength the number of characters in <TT>srcText</TT> in the
   1824    * replace string.
   1825    * @return a reference to this
   1826    * @stable ICU 2.0
   1827    */
   1828   inline UnicodeString& setTo(const UnicodeString& srcText,
   1829                int32_t srcStart,
   1830                int32_t srcLength);
   1831 
   1832   /**
   1833    * Set the text in the UnicodeString object to the characters in
   1834    * <TT>srcText</TT>.
   1835    * <TT>srcText</TT> is not modified.
   1836    * @param srcText the source for the new characters
   1837    * @return a reference to this
   1838    * @stable ICU 2.0
   1839    */
   1840   inline UnicodeString& setTo(const UnicodeString& srcText);
   1841 
   1842   /**
   1843    * Set the characters in the UnicodeString object to the characters
   1844    * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
   1845    * @param srcChars the source for the new characters
   1846    * @param srcLength the number of Unicode characters in srcChars.
   1847    * @return a reference to this
   1848    * @stable ICU 2.0
   1849    */
   1850   inline UnicodeString& setTo(const UChar *srcChars,
   1851                int32_t srcLength);
   1852 
   1853   /**
   1854    * Set the characters in the UnicodeString object to the code unit
   1855    * <TT>srcChar</TT>.
   1856    * @param srcChar the code unit which becomes the UnicodeString's character
   1857    * content
   1858    * @return a reference to this
   1859    * @stable ICU 2.0
   1860    */
   1861   UnicodeString& setTo(UChar srcChar);
   1862 
   1863   /**
   1864    * Set the characters in the UnicodeString object to the code point
   1865    * <TT>srcChar</TT>.
   1866    * @param srcChar the code point which becomes the UnicodeString's character
   1867    * content
   1868    * @return a reference to this
   1869    * @stable ICU 2.0
   1870    */
   1871   UnicodeString& setTo(UChar32 srcChar);
   1872 
   1873   /**
   1874    * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
   1875    * The text will be used for the UnicodeString object, but
   1876    * it will not be released when the UnicodeString is destroyed.
   1877    * This has copy-on-write semantics:
   1878    * When the string is modified, then the buffer is first copied into
   1879    * newly allocated memory.
   1880    * The aliased buffer is never modified.
   1881    * In an assignment to another UnicodeString, the text will be aliased again,
   1882    * so that both strings then alias the same readonly-text.
   1883    *
   1884    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
   1885    *                     This must be true if <code>textLength==-1</code>.
   1886    * @param text The characters to alias for the UnicodeString.
   1887    * @param textLength The number of Unicode characters in <code>text</code> to alias.
   1888    *                   If -1, then this constructor will determine the length
   1889    *                   by calling <code>u_strlen()</code>.
   1890    * @return a reference to this
   1891    * @stable ICU 2.0
   1892    */
   1893   UnicodeString &setTo(UBool isTerminated,
   1894                        const UChar *text,
   1895                        int32_t textLength);
   1896 
   1897   /**
   1898    * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
   1899    * The text will be used for the UnicodeString object, but
   1900    * it will not be released when the UnicodeString is destroyed.
   1901    * This has write-through semantics:
   1902    * For as long as the capacity of the buffer is sufficient, write operations
   1903    * will directly affect the buffer. When more capacity is necessary, then
   1904    * a new buffer will be allocated and the contents copied as with regularly
   1905    * constructed strings.
   1906    * In an assignment to another UnicodeString, the buffer will be copied.
   1907    * The extract(UChar *dst) function detects whether the dst pointer is the same
   1908    * as the string buffer itself and will in this case not copy the contents.
   1909    *
   1910    * @param buffer The characters to alias for the UnicodeString.
   1911    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
   1912    * @param buffCapacity The size of <code>buffer</code> in UChars.
   1913    * @return a reference to this
   1914    * @stable ICU 2.0
   1915    */
   1916   UnicodeString &setTo(UChar *buffer,
   1917                        int32_t buffLength,
   1918                        int32_t buffCapacity);
   1919 
   1920   /**
   1921    * Make this UnicodeString object invalid.
   1922    * The string will test TRUE with isBogus().
   1923    *
   1924    * A bogus string has no value. It is different from an empty string.
   1925    * It can be used to indicate that no string value is available.
   1926    * getBuffer() and getTerminatedBuffer() return NULL, and
   1927    * length() returns 0.
   1928    *
   1929    * This utility function is used throughout the UnicodeString
   1930    * implementation to indicate that a UnicodeString operation failed,
   1931    * and may be used in other functions,
   1932    * especially but not exclusively when such functions do not
   1933    * take a UErrorCode for simplicity.
   1934    *
   1935    * The following methods, and no others, will clear a string object's bogus flag:
   1936    * - remove()
   1937    * - remove(0, INT32_MAX)
   1938    * - truncate(0)
   1939    * - operator=() (assignment operator)
   1940    * - setTo(...)
   1941    *
   1942    * The simplest ways to turn a bogus string into an empty one
   1943    * is to use the remove() function.
   1944    * Examples for other functions that are equivalent to "set to empty string":
   1945    * \code
   1946    * if(s.isBogus()) {
   1947    *   s.remove();           // set to an empty string (remove all), or
   1948    *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
   1949    *   s.truncate(0);        // set to an empty string (complete truncation), or
   1950    *   s=UnicodeString();    // assign an empty string, or
   1951    *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
   1952    *   static const UChar nul=0;
   1953    *   s.setTo(&nul, 0);     // set to an empty C Unicode string
   1954    * }
   1955    * \endcode
   1956    *
   1957    * @see isBogus()
   1958    * @stable ICU 2.0
   1959    */
   1960   void setToBogus();
   1961 
   1962   /**
   1963    * Set the character at the specified offset to the specified character.
   1964    * @param offset A valid offset into the text of the character to set
   1965    * @param ch The new character
   1966    * @return A reference to this
   1967    * @stable ICU 2.0
   1968    */
   1969   UnicodeString& setCharAt(int32_t offset,
   1970                UChar ch);
   1971 
   1972 
   1973   /* Append operations */
   1974 
   1975   /**
   1976    * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
   1977    * object.
   1978    * @param ch the code unit to be appended
   1979    * @return a reference to this
   1980    * @stable ICU 2.0
   1981    */
   1982  inline  UnicodeString& operator+= (UChar ch);
   1983 
   1984   /**
   1985    * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
   1986    * object.
   1987    * @param ch the code point to be appended
   1988    * @return a reference to this
   1989    * @stable ICU 2.0
   1990    */
   1991  inline  UnicodeString& operator+= (UChar32 ch);
   1992 
   1993   /**
   1994    * Append operator. Append the characters in <TT>srcText</TT> to the
   1995    * UnicodeString object. <TT>srcText</TT> is not modified.
   1996    * @param srcText the source for the new characters
   1997    * @return a reference to this
   1998    * @stable ICU 2.0
   1999    */
   2000   inline UnicodeString& operator+= (const UnicodeString& srcText);
   2001 
   2002   /**
   2003    * Append the characters
   2004    * in <TT>srcText</TT> in the range
   2005    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
   2006    * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
   2007    * is not modified.
   2008    * @param srcText the source for the new characters
   2009    * @param srcStart the offset into <TT>srcText</TT> where new characters
   2010    * will be obtained
   2011    * @param srcLength the number of characters in <TT>srcText</TT> in
   2012    * the append string
   2013    * @return a reference to this
   2014    * @stable ICU 2.0
   2015    */
   2016   inline UnicodeString& append(const UnicodeString& srcText,
   2017             int32_t srcStart,
   2018             int32_t srcLength);
   2019 
   2020   /**
   2021    * Append the characters in <TT>srcText</TT> to the UnicodeString object.
   2022    * <TT>srcText</TT> is not modified.
   2023    * @param srcText the source for the new characters
   2024    * @return a reference to this
   2025    * @stable ICU 2.0
   2026    */
   2027   inline UnicodeString& append(const UnicodeString& srcText);
   2028 
   2029   /**
   2030    * Append the characters in <TT>srcChars</TT> in the range
   2031    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
   2032    * object at offset
   2033    * <TT>start</TT>. <TT>srcChars</TT> is not modified.
   2034    * @param srcChars the source for the new characters
   2035    * @param srcStart the offset into <TT>srcChars</TT> where new characters
   2036    * will be obtained
   2037    * @param srcLength the number of characters in <TT>srcChars</TT> in
   2038    *                  the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
   2039    * @return a reference to this
   2040    * @stable ICU 2.0
   2041    */
   2042   inline UnicodeString& append(const UChar *srcChars,
   2043             int32_t srcStart,
   2044             int32_t srcLength);
   2045 
   2046   /**
   2047    * Append the characters in <TT>srcChars</TT> to the UnicodeString object
   2048    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
   2049    * @param srcChars the source for the new characters
   2050    * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
   2051    *                  can be -1 if <TT>srcChars</TT> is NUL-terminated
   2052    * @return a reference to this
   2053    * @stable ICU 2.0
   2054    */
   2055   inline UnicodeString& append(const UChar *srcChars,
   2056             int32_t srcLength);
   2057 
   2058   /**
   2059    * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
   2060    * @param srcChar the code unit to append
   2061    * @return a reference to this
   2062    * @stable ICU 2.0
   2063    */
   2064   inline UnicodeString& append(UChar srcChar);
   2065 
   2066   /**
   2067    * Append the code point <TT>srcChar</TT> to the UnicodeString object.
   2068    * @param srcChar the code point to append
   2069    * @return a reference to this
   2070    * @stable ICU 2.0
   2071    */
   2072   inline UnicodeString& append(UChar32 srcChar);
   2073 
   2074 
   2075   /* Insert operations */
   2076 
   2077   /**
   2078    * Insert the characters in <TT>srcText</TT> in the range
   2079    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
   2080    * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
   2081    * @param start the offset where the insertion begins
   2082    * @param srcText the source for the new characters
   2083    * @param srcStart the offset into <TT>srcText</TT> where new characters
   2084    * will be obtained
   2085    * @param srcLength the number of characters in <TT>srcText</TT> in
   2086    * the insert string
   2087    * @return a reference to this
   2088    * @stable ICU 2.0
   2089    */
   2090   inline UnicodeString& insert(int32_t start,
   2091             const UnicodeString& srcText,
   2092             int32_t srcStart,
   2093             int32_t srcLength);
   2094 
   2095   /**
   2096    * Insert the characters in <TT>srcText</TT> into the UnicodeString object
   2097    * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
   2098    * @param start the offset where the insertion begins
   2099    * @param srcText the source for the new characters
   2100    * @return a reference to this
   2101    * @stable ICU 2.0
   2102    */
   2103   inline UnicodeString& insert(int32_t start,
   2104             const UnicodeString& srcText);
   2105 
   2106   /**
   2107    * Insert the characters in <TT>srcChars</TT> in the range
   2108    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
   2109    *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
   2110    * @param start the offset at which the insertion begins
   2111    * @param srcChars the source for the new characters
   2112    * @param srcStart the offset into <TT>srcChars</TT> where new characters
   2113    * will be obtained
   2114    * @param srcLength the number of characters in <TT>srcChars</TT>
   2115    * in the insert string
   2116    * @return a reference to this
   2117    * @stable ICU 2.0
   2118    */
   2119   inline UnicodeString& insert(int32_t start,
   2120             const UChar *srcChars,
   2121             int32_t srcStart,
   2122             int32_t srcLength);
   2123 
   2124   /**
   2125    * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
   2126    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
   2127    * @param start the offset where the insertion begins
   2128    * @param srcChars the source for the new characters
   2129    * @param srcLength the number of Unicode characters in srcChars.
   2130    * @return a reference to this
   2131    * @stable ICU 2.0
   2132    */
   2133   inline UnicodeString& insert(int32_t start,
   2134             const UChar *srcChars,
   2135             int32_t srcLength);
   2136 
   2137   /**
   2138    * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
   2139    * offset <TT>start</TT>.
   2140    * @param start the offset at which the insertion occurs
   2141    * @param srcChar the code unit to insert
   2142    * @return a reference to this
   2143    * @stable ICU 2.0
   2144    */
   2145   inline UnicodeString& insert(int32_t start,
   2146             UChar srcChar);
   2147 
   2148   /**
   2149    * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
   2150    * offset <TT>start</TT>.
   2151    * @param start the offset at which the insertion occurs
   2152    * @param srcChar the code point to insert
   2153    * @return a reference to this
   2154    * @stable ICU 2.0
   2155    */
   2156   inline UnicodeString& insert(int32_t start,
   2157             UChar32 srcChar);
   2158 
   2159 
   2160   /* Replace operations */
   2161 
   2162   /**
   2163    * Replace the characters in the range
   2164    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
   2165    * <TT>srcText</TT> in the range
   2166    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
   2167    * <TT>srcText</TT> is not modified.
   2168    * @param start the offset at which the replace operation begins
   2169    * @param length the number of characters to replace. The character at
   2170    * <TT>start + length</TT> is not modified.
   2171    * @param srcText the source for the new characters
   2172    * @param srcStart the offset into <TT>srcText</TT> where new characters
   2173    * will be obtained
   2174    * @param srcLength the number of characters in <TT>srcText</TT> in
   2175    * the replace string
   2176    * @return a reference to this
   2177    * @stable ICU 2.0
   2178    */
   2179   UnicodeString& replace(int32_t start,
   2180              int32_t length,
   2181              const UnicodeString& srcText,
   2182              int32_t srcStart,
   2183              int32_t srcLength);
   2184 
   2185   /**
   2186    * Replace the characters in the range
   2187    * [<TT>start</TT>, <TT>start + length</TT>)
   2188    * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
   2189    *  not modified.
   2190    * @param start the offset at which the replace operation begins
   2191    * @param length the number of characters to replace. The character at
   2192    * <TT>start + length</TT> is not modified.
   2193    * @param srcText the source for the new characters
   2194    * @return a reference to this
   2195    * @stable ICU 2.0
   2196    */
   2197   UnicodeString& replace(int32_t start,
   2198              int32_t length,
   2199              const UnicodeString& srcText);
   2200 
   2201   /**
   2202    * Replace the characters in the range
   2203    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
   2204    * <TT>srcChars</TT> in the range
   2205    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
   2206    * is not modified.
   2207    * @param start the offset at which the replace operation begins
   2208    * @param length the number of characters to replace.  The character at
   2209    * <TT>start + length</TT> is not modified.
   2210    * @param srcChars the source for the new characters
   2211    * @param srcStart the offset into <TT>srcChars</TT> where new characters
   2212    * will be obtained
   2213    * @param srcLength the number of characters in <TT>srcChars</TT>
   2214    * in the replace string
   2215    * @return a reference to this
   2216    * @stable ICU 2.0
   2217    */
   2218   UnicodeString& replace(int32_t start,
   2219              int32_t length,
   2220              const UChar *srcChars,
   2221              int32_t srcStart,
   2222              int32_t srcLength);
   2223 
   2224   /**
   2225    * Replace the characters in the range
   2226    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
   2227    * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
   2228    * @param start the offset at which the replace operation begins
   2229    * @param length number of characters to replace.  The character at
   2230    * <TT>start + length</TT> is not modified.
   2231    * @param srcChars the source for the new characters
   2232    * @param srcLength the number of Unicode characters in srcChars
   2233    * @return a reference to this
   2234    * @stable ICU 2.0
   2235    */
   2236   inline UnicodeString& replace(int32_t start,
   2237              int32_t length,
   2238              const UChar *srcChars,
   2239              int32_t srcLength);
   2240 
   2241   /**
   2242    * Replace the characters in the range
   2243    * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
   2244    * <TT>srcChar</TT>.
   2245    * @param start the offset at which the replace operation begins
   2246    * @param length the number of characters to replace.  The character at
   2247    * <TT>start + length</TT> is not modified.
   2248    * @param srcChar the new code unit
   2249    * @return a reference to this
   2250    * @stable ICU 2.0
   2251    */
   2252   inline UnicodeString& replace(int32_t start,
   2253              int32_t length,
   2254              UChar srcChar);
   2255 
   2256   /**
   2257    * Replace the characters in the range
   2258    * [<TT>start</TT>, <TT>start + length</TT>) with the code point
   2259    * <TT>srcChar</TT>.
   2260    * @param start the offset at which the replace operation begins
   2261    * @param length the number of characters to replace.  The character at
   2262    * <TT>start + length</TT> is not modified.
   2263    * @param srcChar the new code point
   2264    * @return a reference to this
   2265    * @stable ICU 2.0
   2266    */
   2267   inline UnicodeString& replace(int32_t start,
   2268              int32_t length,
   2269              UChar32 srcChar);
   2270 
   2271   /**
   2272    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
   2273    * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
   2274    * @param start the offset at which the replace operation begins
   2275    * @param limit the offset immediately following the replace range
   2276    * @param srcText the source for the new characters
   2277    * @return a reference to this
   2278    * @stable ICU 2.0
   2279    */
   2280   inline UnicodeString& replaceBetween(int32_t start,
   2281                 int32_t limit,
   2282                 const UnicodeString& srcText);
   2283 
   2284   /**
   2285    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
   2286    * with the characters in <TT>srcText</TT> in the range
   2287    * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
   2288    * @param start the offset at which the replace operation begins
   2289    * @param limit the offset immediately following the replace range
   2290    * @param srcText the source for the new characters
   2291    * @param srcStart the offset into <TT>srcChars</TT> where new characters
   2292    * will be obtained
   2293    * @param srcLimit the offset immediately following the range to copy
   2294    * in <TT>srcText</TT>
   2295    * @return a reference to this
   2296    * @stable ICU 2.0
   2297    */
   2298   inline UnicodeString& replaceBetween(int32_t start,
   2299                 int32_t limit,
   2300                 const UnicodeString& srcText,
   2301                 int32_t srcStart,
   2302                 int32_t srcLimit);
   2303 
   2304   /**
   2305    * Replace a substring of this object with the given text.
   2306    * @param start the beginning index, inclusive; <code>0 <= start
   2307    * <= limit</code>.
   2308    * @param limit the ending index, exclusive; <code>start <= limit
   2309    * <= length()</code>.
   2310    * @param text the text to replace characters <code>start</code>
   2311    * to <code>limit - 1</code>
   2312    * @stable ICU 2.0
   2313    */
   2314   virtual void handleReplaceBetween(int32_t start,
   2315                                     int32_t limit,
   2316                                     const UnicodeString& text);
   2317 
   2318   /**
   2319    * Replaceable API
   2320    * @return TRUE if it has MetaData
   2321    * @stable ICU 2.4
   2322    */
   2323   virtual UBool hasMetaData() const;
   2324 
   2325   /**
   2326    * Copy a substring of this object, retaining attribute (out-of-band)
   2327    * information.  This method is used to duplicate or reorder substrings.
   2328    * The destination index must not overlap the source range.
   2329    *
   2330    * @param start the beginning index, inclusive; <code>0 <= start <=
   2331    * limit</code>.
   2332    * @param limit the ending index, exclusive; <code>start <= limit <=
   2333    * length()</code>.
   2334    * @param dest the destination index.  The characters from
   2335    * <code>start..limit-1</code> will be copied to <code>dest</code>.
   2336    * Implementations of this method may assume that <code>dest <= start ||
   2337    * dest >= limit</code>.
   2338    * @stable ICU 2.0
   2339    */
   2340   virtual void copy(int32_t start, int32_t limit, int32_t dest);
   2341 
   2342   /* Search and replace operations */
   2343 
   2344   /**
   2345    * Replace all occurrences of characters in oldText with the characters
   2346    * in newText
   2347    * @param oldText the text containing the search text
   2348    * @param newText the text containing the replacement text
   2349    * @return a reference to this
   2350    * @stable ICU 2.0
   2351    */
   2352   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
   2353                 const UnicodeString& newText);
   2354 
   2355   /**
   2356    * Replace all occurrences of characters in oldText with characters
   2357    * in newText
   2358    * in the range [<TT>start</TT>, <TT>start + length</TT>).
   2359    * @param start the start of the range in which replace will performed
   2360    * @param length the length of the range in which replace will be performed
   2361    * @param oldText the text containing the search text
   2362    * @param newText the text containing the replacement text
   2363    * @return a reference to this
   2364    * @stable ICU 2.0
   2365    */
   2366   inline UnicodeString& findAndReplace(int32_t start,
   2367                 int32_t length,
   2368                 const UnicodeString& oldText,
   2369                 const UnicodeString& newText);
   2370 
   2371   /**
   2372    * Replace all occurrences of characters in oldText in the range
   2373    * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
   2374    * in newText in the range
   2375    * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
   2376    * in the range [<TT>start</TT>, <TT>start + length</TT>).
   2377    * @param start the start of the range in which replace will performed
   2378    * @param length the length of the range in which replace will be performed
   2379    * @param oldText the text containing the search text
   2380    * @param oldStart the start of the search range in <TT>oldText</TT>
   2381    * @param oldLength the length of the search range in <TT>oldText</TT>
   2382    * @param newText the text containing the replacement text
   2383    * @param newStart the start of the replacement range in <TT>newText</TT>
   2384    * @param newLength the length of the replacement range in <TT>newText</TT>
   2385    * @return a reference to this
   2386    * @stable ICU 2.0
   2387    */
   2388   UnicodeString& findAndReplace(int32_t start,
   2389                 int32_t length,
   2390                 const UnicodeString& oldText,
   2391                 int32_t oldStart,
   2392                 int32_t oldLength,
   2393                 const UnicodeString& newText,
   2394                 int32_t newStart,
   2395                 int32_t newLength);
   2396 
   2397 
   2398   /* Remove operations */
   2399 
   2400   /**
   2401    * Remove all characters from the UnicodeString object.
   2402    * @return a reference to this
   2403    * @stable ICU 2.0
   2404    */
   2405   inline UnicodeString& remove(void);
   2406 
   2407   /**
   2408    * Remove the characters in the range
   2409    * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
   2410    * @param start the offset of the first character to remove
   2411    * @param length the number of characters to remove
   2412    * @return a reference to this
   2413    * @stable ICU 2.0
   2414    */
   2415   inline UnicodeString& remove(int32_t start,
   2416                                int32_t length = (int32_t)INT32_MAX);
   2417 
   2418   /**
   2419    * Remove the characters in the range
   2420    * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
   2421    * @param start the offset of the first character to remove
   2422    * @param limit the offset immediately following the range to remove
   2423    * @return a reference to this
   2424    * @stable ICU 2.0
   2425    */
   2426   inline UnicodeString& removeBetween(int32_t start,
   2427                                       int32_t limit = (int32_t)INT32_MAX);
   2428 
   2429   /**
   2430    * Retain only the characters in the range
   2431    * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
   2432    * Removes characters before <code>start</code> and at and after <code>limit</code>.
   2433    * @param start the offset of the first character to retain
   2434    * @param limit the offset immediately following the range to retain
   2435    * @return a reference to this
   2436    * @stable ICU 4.4
   2437    */
   2438   inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
   2439 
   2440   /* Length operations */
   2441 
   2442   /**
   2443    * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
   2444    * If the length of this UnicodeString is less than targetLength,
   2445    * length() - targetLength copies of padChar will be added to the
   2446    * beginning of this UnicodeString.
   2447    * @param targetLength the desired length of the string
   2448    * @param padChar the character to use for padding. Defaults to
   2449    * space (U+0020)
   2450    * @return TRUE if the text was padded, FALSE otherwise.
   2451    * @stable ICU 2.0
   2452    */
   2453   UBool padLeading(int32_t targetLength,
   2454                     UChar padChar = 0x0020);
   2455 
   2456   /**
   2457    * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
   2458    * If the length of this UnicodeString is less than targetLength,
   2459    * length() - targetLength copies of padChar will be added to the
   2460    * end of this UnicodeString.
   2461    * @param targetLength the desired length of the string
   2462    * @param padChar the character to use for padding. Defaults to
   2463    * space (U+0020)
   2464    * @return TRUE if the text was padded, FALSE otherwise.
   2465    * @stable ICU 2.0
   2466    */
   2467   UBool padTrailing(int32_t targetLength,
   2468                      UChar padChar = 0x0020);
   2469 
   2470   /**
   2471    * Truncate this UnicodeString to the <TT>targetLength</TT>.
   2472    * @param targetLength the desired length of this UnicodeString.
   2473    * @return TRUE if the text was truncated, FALSE otherwise
   2474    * @stable ICU 2.0
   2475    */
   2476   inline UBool truncate(int32_t targetLength);
   2477 
   2478   /**
   2479    * Trims leading and trailing whitespace from this UnicodeString.
   2480    * @return a reference to this
   2481    * @stable ICU 2.0
   2482    */
   2483   UnicodeString& trim(void);
   2484 
   2485 
   2486   /* Miscellaneous operations */
   2487 
   2488   /**
   2489    * Reverse this UnicodeString in place.
   2490    * @return a reference to this
   2491    * @stable ICU 2.0
   2492    */
   2493   inline UnicodeString& reverse(void);
   2494 
   2495   /**
   2496    * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
   2497    * this UnicodeString.
   2498    * @param start the start of the range to reverse
   2499    * @param length the number of characters to to reverse
   2500    * @return a reference to this
   2501    * @stable ICU 2.0
   2502    */
   2503   inline UnicodeString& reverse(int32_t start,
   2504              int32_t length);
   2505 
   2506   /**
   2507    * Convert the characters in this to UPPER CASE following the conventions of
   2508    * the default locale.
   2509    * @return A reference to this.
   2510    * @stable ICU 2.0
   2511    */
   2512   UnicodeString& toUpper(void);
   2513 
   2514   /**
   2515    * Convert the characters in this to UPPER CASE following the conventions of
   2516    * a specific locale.
   2517    * @param locale The locale containing the conventions to use.
   2518    * @return A reference to this.
   2519    * @stable ICU 2.0
   2520    */
   2521   UnicodeString& toUpper(const Locale& locale);
   2522 
   2523   /**
   2524    * Convert the characters in this to lower case following the conventions of
   2525    * the default locale.
   2526    * @return A reference to this.
   2527    * @stable ICU 2.0
   2528    */
   2529   UnicodeString& toLower(void);
   2530 
   2531   /**
   2532    * Convert the characters in this to lower case following the conventions of
   2533    * a specific locale.
   2534    * @param locale The locale containing the conventions to use.
   2535    * @return A reference to this.
   2536    * @stable ICU 2.0
   2537    */
   2538   UnicodeString& toLower(const Locale& locale);
   2539 
   2540 #if !UCONFIG_NO_BREAK_ITERATION
   2541 
   2542   /**
   2543    * Titlecase this string, convenience function using the default locale.
   2544    *
   2545    * Casing is locale-dependent and context-sensitive.
   2546    * Titlecasing uses a break iterator to find the first characters of words
   2547    * that are to be titlecased. It titlecases those characters and lowercases
   2548    * all others.
   2549    *
   2550    * The titlecase break iterator can be provided to customize for arbitrary
   2551    * styles, using rules and dictionaries beyond the standard iterators.
   2552    * It may be more efficient to always provide an iterator to avoid
   2553    * opening and closing one for each string.
   2554    * The standard titlecase iterator for the root locale implements the
   2555    * algorithm of Unicode TR 21.
   2556    *
   2557    * This function uses only the setText(), first() and next() methods of the
   2558    * provided break iterator.
   2559    *
   2560    * @param titleIter A break iterator to find the first characters of words
   2561    *                  that are to be titlecased.
   2562    *                  If none is provided (0), then a standard titlecase
   2563    *                  break iterator is opened.
   2564    *                  Otherwise the provided iterator is set to the string's text.
   2565    * @return A reference to this.
   2566    * @stable ICU 2.1
   2567    */
   2568   UnicodeString &toTitle(BreakIterator *titleIter);
   2569 
   2570   /**
   2571    * Titlecase this string.
   2572    *
   2573    * Casing is locale-dependent and context-sensitive.
   2574    * Titlecasing uses a break iterator to find the first characters of words
   2575    * that are to be titlecased. It titlecases those characters and lowercases
   2576    * all others.
   2577    *
   2578    * The titlecase break iterator can be provided to customize for arbitrary
   2579    * styles, using rules and dictionaries beyond the standard iterators.
   2580    * It may be more efficient to always provide an iterator to avoid
   2581    * opening and closing one for each string.
   2582    * The standard titlecase iterator for the root locale implements the
   2583    * algorithm of Unicode TR 21.
   2584    *
   2585    * This function uses only the setText(), first() and next() methods of the
   2586    * provided break iterator.
   2587    *
   2588    * @param titleIter A break iterator to find the first characters of words
   2589    *                  that are to be titlecased.
   2590    *                  If none is provided (0), then a standard titlecase
   2591    *                  break iterator is opened.
   2592    *                  Otherwise the provided iterator is set to the string's text.
   2593    * @param locale    The locale to consider.
   2594    * @return A reference to this.
   2595    * @stable ICU 2.1
   2596    */
   2597   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
   2598 
   2599   /**
   2600    * Titlecase this string, with options.
   2601    *
   2602    * Casing is locale-dependent and context-sensitive.
   2603    * Titlecasing uses a break iterator to find the first characters of words
   2604    * that are to be titlecased. It titlecases those characters and lowercases
   2605    * all others. (This can be modified with options.)
   2606    *
   2607    * The titlecase break iterator can be provided to customize for arbitrary
   2608    * styles, using rules and dictionaries beyond the standard iterators.
   2609    * It may be more efficient to always provide an iterator to avoid
   2610    * opening and closing one for each string.
   2611    * The standard titlecase iterator for the root locale implements the
   2612    * algorithm of Unicode TR 21.
   2613    *
   2614    * This function uses only the setText(), first() and next() methods of the
   2615    * provided break iterator.
   2616    *
   2617    * @param titleIter A break iterator to find the first characters of words
   2618    *                  that are to be titlecased.
   2619    *                  If none is provided (0), then a standard titlecase
   2620    *                  break iterator is opened.
   2621    *                  Otherwise the provided iterator is set to the string's text.
   2622    * @param locale    The locale to consider.
   2623    * @param options Options bit set, see ucasemap_open().
   2624    * @return A reference to this.
   2625    * @see U_TITLECASE_NO_LOWERCASE
   2626    * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
   2627    * @see ucasemap_open
   2628    * @stable ICU 3.8
   2629    */
   2630   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
   2631 
   2632 #endif
   2633 
   2634   /**
   2635    * Case-fold the characters in this string.
   2636    * Case-folding is locale-independent and not context-sensitive,
   2637    * but there is an option for whether to include or exclude mappings for dotted I
   2638    * and dotless i that are marked with 'I' in CaseFolding.txt.
   2639    * The result may be longer or shorter than the original.
   2640    *
   2641    * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
   2642    * @return A reference to this.
   2643    * @stable ICU 2.0
   2644    */
   2645   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
   2646 
   2647   //========================================
   2648   // Access to the internal buffer
   2649   //========================================
   2650 
   2651   /**
   2652    * Get a read/write pointer to the internal buffer.
   2653    * The buffer is guaranteed to be large enough for at least minCapacity UChars,
   2654    * writable, and is still owned by the UnicodeString object.
   2655    * Calls to getBuffer(minCapacity) must not be nested, and
   2656    * must be matched with calls to releaseBuffer(newLength).
   2657    * If the string buffer was read-only or shared,
   2658    * then it will be reallocated and copied.
   2659    *
   2660    * An attempted nested call will return 0, and will not further modify the
   2661    * state of the UnicodeString object.
   2662    * It also returns 0 if the string is bogus.
   2663    *
   2664    * The actual capacity of the string buffer may be larger than minCapacity.
   2665    * getCapacity() returns the actual capacity.
   2666    * For many operations, the full capacity should be used to avoid reallocations.
   2667    *
   2668    * While the buffer is "open" between getBuffer(minCapacity)
   2669    * and releaseBuffer(newLength), the following applies:
   2670    * - The string length is set to 0.
   2671    * - Any read API call on the UnicodeString object will behave like on a 0-length string.
   2672    * - Any write API call on the UnicodeString object is disallowed and will have no effect.
   2673    * - You can read from and write to the returned buffer.
   2674    * - The previous string contents will still be in the buffer;
   2675    *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
   2676    *   If the length() was greater than minCapacity, then any contents after minCapacity
   2677    *   may be lost.
   2678    *   The buffer contents is not NUL-terminated by getBuffer().
   2679    *   If length()<getCapacity() then you can terminate it by writing a NUL
   2680    *   at index length().
   2681    * - You must call releaseBuffer(newLength) before and in order to
   2682    *   return to normal UnicodeString operation.
   2683    *
   2684    * @param minCapacity the minimum number of UChars that are to be available
   2685    *        in the buffer, starting at the returned pointer;
   2686    *        default to the current string capacity if minCapacity==-1
   2687    * @return a writable pointer to the internal string buffer,
   2688    *         or 0 if an error occurs (nested calls, out of memory)
   2689    *
   2690    * @see releaseBuffer
   2691    * @see getTerminatedBuffer()
   2692    * @stable ICU 2.0
   2693    */
   2694   UChar *getBuffer(int32_t minCapacity);
   2695 
   2696   /**
   2697    * Release a read/write buffer on a UnicodeString object with an
   2698    * "open" getBuffer(minCapacity).
   2699    * This function must be called in a matched pair with getBuffer(minCapacity).
   2700    * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
   2701    *
   2702    * It will set the string length to newLength, at most to the current capacity.
   2703    * If newLength==-1 then it will set the length according to the
   2704    * first NUL in the buffer, or to the capacity if there is no NUL.
   2705    *
   2706    * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
   2707    *
   2708    * @param newLength the new length of the UnicodeString object;
   2709    *        defaults to the current capacity if newLength is greater than that;
   2710    *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
   2711    *        the current capacity of the string
   2712    *
   2713    * @see getBuffer(int32_t minCapacity)
   2714    * @stable ICU 2.0
   2715    */
   2716   void releaseBuffer(int32_t newLength=-1);
   2717 
   2718   /**
   2719    * Get a read-only pointer to the internal buffer.
   2720    * This can be called at any time on a valid UnicodeString.
   2721    *
   2722    * It returns 0 if the string is bogus, or
   2723    * during an "open" getBuffer(minCapacity).
   2724    *
   2725    * It can be called as many times as desired.
   2726    * The pointer that it returns will remain valid until the UnicodeString object is modified,
   2727    * at which time the pointer is semantically invalidated and must not be used any more.
   2728    *
   2729    * The capacity of the buffer can be determined with getCapacity().
   2730    * The part after length() may or may not be initialized and valid,
   2731    * depending on the history of the UnicodeString object.
   2732    *
   2733    * The buffer contents is (probably) not NUL-terminated.
   2734    * You can check if it is with
   2735    * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
   2736    * (See getTerminatedBuffer().)
   2737    *
   2738    * The buffer may reside in read-only memory. Its contents must not
   2739    * be modified.
   2740    *
   2741    * @return a read-only pointer to the internal string buffer,
   2742    *         or 0 if the string is empty or bogus
   2743    *
   2744    * @see getBuffer(int32_t minCapacity)
   2745    * @see getTerminatedBuffer()
   2746    * @stable ICU 2.0
   2747    */
   2748   inline const UChar *getBuffer() const;
   2749 
   2750   /**
   2751    * Get a read-only pointer to the internal buffer,
   2752    * making sure that it is NUL-terminated.
   2753    * This can be called at any time on a valid UnicodeString.
   2754    *
   2755    * It returns 0 if the string is bogus, or
   2756    * during an "open" getBuffer(minCapacity), or if the buffer cannot
   2757    * be NUL-terminated (because memory allocation failed).
   2758    *
   2759    * It can be called as many times as desired.
   2760    * The pointer that it returns will remain valid until the UnicodeString object is modified,
   2761    * at which time the pointer is semantically invalidated and must not be used any more.
   2762    *
   2763    * The capacity of the buffer can be determined with getCapacity().
   2764    * The part after length()+1 may or may not be initialized and valid,
   2765    * depending on the history of the UnicodeString object.
   2766    *
   2767    * The buffer contents is guaranteed to be NUL-terminated.
   2768    * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
   2769    * is written.
   2770    * For this reason, this function is not const, unlike getBuffer().
   2771    * Note that a UnicodeString may also contain NUL characters as part of its contents.
   2772    *
   2773    * The buffer may reside in read-only memory. Its contents must not
   2774    * be modified.
   2775    *
   2776    * @return a read-only pointer to the internal string buffer,
   2777    *         or 0 if the string is empty or bogus
   2778    *
   2779    * @see getBuffer(int32_t minCapacity)
   2780    * @see getBuffer()
   2781    * @stable ICU 2.2
   2782    */
   2783   inline const UChar *getTerminatedBuffer();
   2784 
   2785   //========================================
   2786   // Constructors
   2787   //========================================
   2788 
   2789   /** Construct an empty UnicodeString.
   2790    * @stable ICU 2.0
   2791    */
   2792   UnicodeString();
   2793 
   2794   /**
   2795    * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
   2796    * @param capacity the number of UChars this UnicodeString should hold
   2797    * before a resize is necessary; if count is greater than 0 and count
   2798    * code points c take up more space than capacity, then capacity is adjusted
   2799    * accordingly.
   2800    * @param c is used to initially fill the string
   2801    * @param count specifies how many code points c are to be written in the
   2802    *              string
   2803    * @stable ICU 2.0
   2804    */
   2805   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
   2806 
   2807   /**
   2808    * Single UChar (code unit) constructor.
   2809    * @param ch the character to place in the UnicodeString
   2810    * @stable ICU 2.0
   2811    */
   2812   UnicodeString(UChar ch);
   2813 
   2814   /**
   2815    * Single UChar32 (code point) constructor.
   2816    * @param ch the character to place in the UnicodeString
   2817    * @stable ICU 2.0
   2818    */
   2819   UnicodeString(UChar32 ch);
   2820 
   2821   /**
   2822    * UChar* constructor.
   2823    * @param text The characters to place in the UnicodeString.  <TT>text</TT>
   2824    * must be NULL (U+0000) terminated.
   2825    * @stable ICU 2.0
   2826    */
   2827   UnicodeString(const UChar *text);
   2828 
   2829   /**
   2830    * UChar* constructor.
   2831    * @param text The characters to place in the UnicodeString.
   2832    * @param textLength The number of Unicode characters in <TT>text</TT>
   2833    * to copy.
   2834    * @stable ICU 2.0
   2835    */
   2836   UnicodeString(const UChar *text,
   2837         int32_t textLength);
   2838 
   2839   /**
   2840    * Readonly-aliasing UChar* constructor.
   2841    * The text will be used for the UnicodeString object, but
   2842    * it will not be released when the UnicodeString is destroyed.
   2843    * This has copy-on-write semantics:
   2844    * When the string is modified, then the buffer is first copied into
   2845    * newly allocated memory.
   2846    * The aliased buffer is never modified.
   2847    * In an assignment to another UnicodeString, the text will be aliased again,
   2848    * so that both strings then alias the same readonly-text.
   2849    *
   2850    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
   2851    *                     This must be true if <code>textLength==-1</code>.
   2852    * @param text The characters to alias for the UnicodeString.
   2853    * @param textLength The number of Unicode characters in <code>text</code> to alias.
   2854    *                   If -1, then this constructor will determine the length
   2855    *                   by calling <code>u_strlen()</code>.
   2856    * @stable ICU 2.0
   2857    */
   2858   UnicodeString(UBool isTerminated,
   2859                 const UChar *text,
   2860                 int32_t textLength);
   2861 
   2862   /**
   2863    * Writable-aliasing UChar* constructor.
   2864    * The text will be used for the UnicodeString object, but
   2865    * it will not be released when the UnicodeString is destroyed.
   2866    * This has write-through semantics:
   2867    * For as long as the capacity of the buffer is sufficient, write operations
   2868    * will directly affect the buffer. When more capacity is necessary, then
   2869    * a new buffer will be allocated and the contents copied as with regularly
   2870    * constructed strings.
   2871    * In an assignment to another UnicodeString, the buffer will be copied.
   2872    * The extract(UChar *dst) function detects whether the dst pointer is the same
   2873    * as the string buffer itself and will in this case not copy the contents.
   2874    *
   2875    * @param buffer The characters to alias for the UnicodeString.
   2876    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
   2877    * @param buffCapacity The size of <code>buffer</code> in UChars.
   2878    * @stable ICU 2.0
   2879    */
   2880   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
   2881 
   2882 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
   2883 
   2884   /**
   2885    * char* constructor.
   2886    * @param codepageData an array of bytes, null-terminated,
   2887    *                     in the platform's default codepage.
   2888    * @stable ICU 2.0
   2889    */
   2890   UnicodeString(const char *codepageData);
   2891 
   2892   /**
   2893    * char* constructor.
   2894    * @param codepageData an array of bytes in the platform's default codepage.
   2895    * @param dataLength The number of bytes in <TT>codepageData</TT>.
   2896    * @stable ICU 2.0
   2897    */
   2898   UnicodeString(const char *codepageData, int32_t dataLength);
   2899 
   2900 #endif
   2901 
   2902 #if !UCONFIG_NO_CONVERSION
   2903 
   2904   /**
   2905    * char* constructor.
   2906    * @param codepageData an array of bytes, null-terminated
   2907    * @param codepage the encoding of <TT>codepageData</TT>.  The special
   2908    * value 0 for <TT>codepage</TT> indicates that the text is in the
   2909    * platform's default codepage.
   2910    *
   2911    * If <code>codepage</code> is an empty string (<code>""</code>),
   2912    * then a simple conversion is performed on the codepage-invariant
   2913    * subset ("invariant characters") of the platform encoding. See utypes.h.
   2914    * Recommendation: For invariant-character strings use the constructor
   2915    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
   2916    * because it avoids object code dependencies of UnicodeString on
   2917    * the conversion code.
   2918    *
   2919    * @stable ICU 2.0
   2920    */
   2921   UnicodeString(const char *codepageData, const char *codepage);
   2922 
   2923   /**
   2924    * char* constructor.
   2925    * @param codepageData an array of bytes.
   2926    * @param dataLength The number of bytes in <TT>codepageData</TT>.
   2927    * @param codepage the encoding of <TT>codepageData</TT>.  The special
   2928    * value 0 for <TT>codepage</TT> indicates that the text is in the
   2929    * platform's default codepage.
   2930    * If <code>codepage</code> is an empty string (<code>""</code>),
   2931    * then a simple conversion is performed on the codepage-invariant
   2932    * subset ("invariant characters") of the platform encoding. See utypes.h.
   2933    * Recommendation: For invariant-character strings use the constructor
   2934    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
   2935    * because it avoids object code dependencies of UnicodeString on
   2936    * the conversion code.
   2937    *
   2938    * @stable ICU 2.0
   2939    */
   2940   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
   2941 
   2942   /**
   2943    * char * / UConverter constructor.
   2944    * This constructor uses an existing UConverter object to
   2945    * convert the codepage string to Unicode and construct a UnicodeString
   2946    * from that.
   2947    *
   2948    * The converter is reset at first.
   2949    * If the error code indicates a failure before this constructor is called,
   2950    * or if an error occurs during conversion or construction,
   2951    * then the string will be bogus.
   2952    *
   2953    * This function avoids the overhead of opening and closing a converter if
   2954    * multiple strings are constructed.
   2955    *
   2956    * @param src input codepage string
   2957    * @param srcLength length of the input string, can be -1 for NUL-terminated strings
   2958    * @param cnv converter object (ucnv_resetToUnicode() will be called),
   2959    *        can be NULL for the default converter
   2960    * @param errorCode normal ICU error code
   2961    * @stable ICU 2.0
   2962    */
   2963   UnicodeString(
   2964         const char *src, int32_t srcLength,
   2965         UConverter *cnv,
   2966         UErrorCode &errorCode);
   2967 
   2968 #endif
   2969 
   2970   /**
   2971    * Constructs a Unicode string from an invariant-character char * string.
   2972    * About invariant characters see utypes.h.
   2973    * This constructor has no runtime dependency on conversion code and is
   2974    * therefore recommended over ones taking a charset name string
   2975    * (where the empty string "" indicates invariant-character conversion).
   2976    *
   2977    * Use the macro US_INV as the third, signature-distinguishing parameter.
   2978    *
   2979    * For example:
   2980    * \code
   2981    * void fn(const char *s) {
   2982    *   UnicodeString ustr(s, -1, US_INV);
   2983    *   // use ustr ...
   2984    * }
   2985    * \endcode
   2986    *
   2987    * @param src String using only invariant characters.
   2988    * @param length Length of src, or -1 if NUL-terminated.
   2989    * @param inv Signature-distinguishing paramater, use US_INV.
   2990    *
   2991    * @see US_INV
   2992    * @stable ICU 3.2
   2993    */
   2994   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
   2995 
   2996 
   2997   /**
   2998    * Copy constructor.
   2999    * @param that The UnicodeString object to copy.
   3000    * @stable ICU 2.0
   3001    */
   3002   UnicodeString(const UnicodeString& that);
   3003 
   3004   /**
   3005    * 'Substring' constructor from tail of source string.
   3006    * @param src The UnicodeString object to copy.
   3007    * @param srcStart The offset into <tt>src</tt> at which to start copying.
   3008    * @stable ICU 2.2
   3009    */
   3010   UnicodeString(const UnicodeString& src, int32_t srcStart);
   3011 
   3012   /**
   3013    * 'Substring' constructor from subrange of source string.
   3014    * @param src The UnicodeString object to copy.
   3015    * @param srcStart The offset into <tt>src</tt> at which to start copying.
   3016    * @param srcLength The number of characters from <tt>src</tt> to copy.
   3017    * @stable ICU 2.2
   3018    */
   3019   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
   3020 
   3021   /**
   3022    * Clone this object, an instance of a subclass of Replaceable.
   3023    * Clones can be used concurrently in multiple threads.
   3024    * If a subclass does not implement clone(), or if an error occurs,
   3025    * then NULL is returned.
   3026    * The clone functions in all subclasses return a pointer to a Replaceable
   3027    * because some compilers do not support covariant (same-as-this)
   3028    * return types; cast to the appropriate subclass if necessary.
   3029    * The caller must delete the clone.
   3030    *
   3031    * @return a clone of this object
   3032    *
   3033    * @see Replaceable::clone
   3034    * @see getDynamicClassID
   3035    * @stable ICU 2.6
   3036    */
   3037   virtual Replaceable *clone() const;
   3038 
   3039   /** Destructor.
   3040    * @stable ICU 2.0
   3041    */
   3042   virtual ~UnicodeString();
   3043 
   3044   /**
   3045    * Create a UnicodeString from a UTF-8 string.
   3046    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
   3047    * Calls u_strFromUTF8WithSub().
   3048    *
   3049    * @param utf8 UTF-8 input string.
   3050    *             Note that a StringPiece can be implicitly constructed
   3051    *             from a std::string or a NUL-terminated const char * string.
   3052    * @return A UnicodeString with equivalent UTF-16 contents.
   3053    * @see toUTF8
   3054    * @see toUTF8String
   3055    * @stable ICU 4.2
   3056    */
   3057   static UnicodeString fromUTF8(const StringPiece &utf8);
   3058 
   3059   /**
   3060    * Create a UnicodeString from a UTF-32 string.
   3061    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
   3062    * Calls u_strFromUTF32WithSub().
   3063    *
   3064    * @param utf32 UTF-32 input string. Must not be NULL.
   3065    * @param length Length of the input string, or -1 if NUL-terminated.
   3066    * @return A UnicodeString with equivalent UTF-16 contents.
   3067    * @see toUTF32
   3068    * @stable ICU 4.2
   3069    */
   3070   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
   3071 
   3072   /* Miscellaneous operations */
   3073 
   3074   /**
   3075    * Unescape a string of characters and return a string containing
   3076    * the result.  The following escape sequences are recognized:
   3077    *
   3078    * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
   3079    * \\Uhhhhhhhh   8 hex digits
   3080    * \\xhh         1-2 hex digits
   3081    * \\ooo         1-3 octal digits; o in [0-7]
   3082    * \\cX          control-X; X is masked with 0x1F
   3083    *
   3084    * as well as the standard ANSI C escapes:
   3085    *
   3086    * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
   3087    * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
   3088    * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
   3089    *
   3090    * Anything else following a backslash is generically escaped.  For
   3091    * example, "[a\\-z]" returns "[a-z]".
   3092    *
   3093    * If an escape sequence is ill-formed, this method returns an empty
   3094    * string.  An example of an ill-formed sequence is "\\u" followed by
   3095    * fewer than 4 hex digits.
   3096    *
   3097    * This function is similar to u_unescape() but not identical to it.
   3098    * The latter takes a source char*, so it does escape recognition
   3099    * and also invariant conversion.
   3100    *
   3101    * @return a string with backslash escapes interpreted, or an
   3102    * empty string on error.
   3103    * @see UnicodeString#unescapeAt()
   3104    * @see u_unescape()
   3105    * @see u_unescapeAt()
   3106    * @stable ICU 2.0
   3107    */
   3108   UnicodeString unescape() const;
   3109 
   3110   /**
   3111    * Unescape a single escape sequence and return the represented
   3112    * character.  See unescape() for a listing of the recognized escape
   3113    * sequences.  The character at offset-1 is assumed (without
   3114    * checking) to be a backslash.  If the escape sequence is
   3115    * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
   3116    * returned.
   3117    *
   3118    * @param offset an input output parameter.  On input, it is the
   3119    * offset into this string where the escape sequence is located,
   3120    * after the initial backslash.  On output, it is advanced after the
   3121    * last character parsed.  On error, it is not advanced at all.
   3122    * @return the character represented by the escape sequence at
   3123    * offset, or (UChar32)0xFFFFFFFF on error.
   3124    * @see UnicodeString#unescape()
   3125    * @see u_unescape()
   3126    * @see u_unescapeAt()
   3127    * @stable ICU 2.0
   3128    */
   3129   UChar32 unescapeAt(int32_t &offset) const;
   3130 
   3131   /**
   3132    * ICU "poor man's RTTI", returns a UClassID for this class.
   3133    *
   3134    * @stable ICU 2.2
   3135    */
   3136   static UClassID U_EXPORT2 getStaticClassID();
   3137 
   3138   /**
   3139    * ICU "poor man's RTTI", returns a UClassID for the actual class.
   3140    *
   3141    * @stable ICU 2.2
   3142    */
   3143   virtual UClassID getDynamicClassID() const;
   3144 
   3145   //========================================
   3146   // Implementation methods
   3147   //========================================
   3148 
   3149 protected:
   3150   /**
   3151    * Implement Replaceable::getLength() (see jitterbug 1027).
   3152    * @stable ICU 2.4
   3153    */
   3154   virtual int32_t getLength() const;
   3155 
   3156   /**
   3157    * The change in Replaceable to use virtual getCharAt() allows
   3158    * UnicodeString::charAt() to be inline again (see jitterbug 709).
   3159    * @stable ICU 2.4
   3160    */
   3161   virtual UChar getCharAt(int32_t offset) const;
   3162 
   3163   /**
   3164    * The change in Replaceable to use virtual getChar32At() allows
   3165    * UnicodeString::char32At() to be inline again (see jitterbug 709).
   3166    * @stable ICU 2.4
   3167    */
   3168   virtual UChar32 getChar32At(int32_t offset) const;
   3169 
   3170 private:
   3171   // For char* constructors. Could be made public.
   3172   UnicodeString &setToUTF8(const StringPiece &utf8);
   3173   // For extract(char*).
   3174   // We could make a toUTF8(target, capacity, errorCode) public but not
   3175   // this version: New API will be cleaner if we make callers create substrings
   3176   // rather than having start+length on every method,
   3177   // and it should take a UErrorCode&.
   3178   int32_t
   3179   toUTF8(int32_t start, int32_t len,
   3180          char *target, int32_t capacity) const;
   3181 
   3182 
   3183   inline int8_t
   3184   doCompare(int32_t start,
   3185            int32_t length,
   3186            const UnicodeString& srcText,
   3187            int32_t srcStart,
   3188            int32_t srcLength) const;
   3189 
   3190   int8_t doCompare(int32_t start,
   3191            int32_t length,
   3192            const UChar *srcChars,
   3193            int32_t srcStart,
   3194            int32_t srcLength) const;
   3195 
   3196   inline int8_t
   3197   doCompareCodePointOrder(int32_t start,
   3198                           int32_t length,
   3199                           const UnicodeString& srcText,
   3200                           int32_t srcStart,
   3201                           int32_t srcLength) const;
   3202 
   3203   int8_t doCompareCodePointOrder(int32_t start,
   3204                                  int32_t length,
   3205                                  const UChar *srcChars,
   3206                                  int32_t srcStart,
   3207                                  int32_t srcLength) const;
   3208 
   3209   inline int8_t
   3210   doCaseCompare(int32_t start,
   3211                 int32_t length,
   3212                 const UnicodeString &srcText,
   3213                 int32_t srcStart,
   3214                 int32_t srcLength,
   3215                 uint32_t options) const;
   3216 
   3217   int8_t
   3218   doCaseCompare(int32_t start,
   3219                 int32_t length,
   3220                 const UChar *srcChars,
   3221                 int32_t srcStart,
   3222                 int32_t srcLength,
   3223                 uint32_t options) const;
   3224 
   3225   int32_t doIndexOf(UChar c,
   3226             int32_t start,
   3227             int32_t length) const;
   3228 
   3229   int32_t doIndexOf(UChar32 c,
   3230                         int32_t start,
   3231                         int32_t length) const;
   3232 
   3233   int32_t doLastIndexOf(UChar c,
   3234                 int32_t start,
   3235                 int32_t length) const;
   3236 
   3237   int32_t doLastIndexOf(UChar32 c,
   3238                             int32_t start,
   3239                             int32_t length) const;
   3240 
   3241   void doExtract(int32_t start,
   3242          int32_t length,
   3243          UChar *dst,
   3244          int32_t dstStart) const;
   3245 
   3246   inline void doExtract(int32_t start,
   3247          int32_t length,
   3248          UnicodeString& target) const;
   3249 
   3250   inline UChar doCharAt(int32_t offset)  const;
   3251 
   3252   UnicodeString& doReplace(int32_t start,
   3253                int32_t length,
   3254                const UnicodeString& srcText,
   3255                int32_t srcStart,
   3256                int32_t srcLength);
   3257 
   3258   UnicodeString& doReplace(int32_t start,
   3259                int32_t length,
   3260                const UChar *srcChars,
   3261                int32_t srcStart,
   3262                int32_t srcLength);
   3263 
   3264   UnicodeString& doReverse(int32_t start,
   3265                int32_t length);
   3266 
   3267   // calculate hash code
   3268   int32_t doHashCode(void) const;
   3269 
   3270   // get pointer to start of array
   3271   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
   3272   inline UChar* getArrayStart(void);
   3273   inline const UChar* getArrayStart(void) const;
   3274 
   3275   // A UnicodeString object (not necessarily its current buffer)
   3276   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
   3277   inline UBool isWritable() const;
   3278 
   3279   // Is the current buffer writable?
   3280   inline UBool isBufferWritable() const;
   3281 
   3282   // None of the following does releaseArray().
   3283   inline void setLength(int32_t len);        // sets only fShortLength and fLength
   3284   inline void setToEmpty();                  // sets fFlags=kShortString
   3285   inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
   3286 
   3287   // allocate the array; result may be fStackBuffer
   3288   // sets refCount to 1 if appropriate
   3289   // sets fArray, fCapacity, and fFlags
   3290   // returns boolean for success or failure
   3291   UBool allocate(int32_t capacity);
   3292 
   3293   // release the array if owned
   3294   void releaseArray(void);
   3295 
   3296   // turn a bogus string into an empty one
   3297   void unBogus();
   3298 
   3299   // implements assigment operator, copy constructor, and fastCopyFrom()
   3300   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
   3301 
   3302   // Pin start and limit to acceptable values.
   3303   inline void pinIndex(int32_t& start) const;
   3304   inline void pinIndices(int32_t& start,
   3305                          int32_t& length) const;
   3306 
   3307 #if !UCONFIG_NO_CONVERSION
   3308 
   3309   /* Internal extract() using UConverter. */
   3310   int32_t doExtract(int32_t start, int32_t length,
   3311                     char *dest, int32_t destCapacity,
   3312                     UConverter *cnv,
   3313                     UErrorCode &errorCode) const;
   3314 
   3315   /*
   3316    * Real constructor for converting from codepage data.
   3317    * It assumes that it is called with !fRefCounted.
   3318    *
   3319    * If <code>codepage==0</code>, then the default converter
   3320    * is used for the platform encoding.
   3321    * If <code>codepage</code> is an empty string (<code>""</code>),
   3322    * then a simple conversion is performed on the codepage-invariant
   3323    * subset ("invariant characters") of the platform encoding. See utypes.h.
   3324    */
   3325   void doCodepageCreate(const char *codepageData,
   3326                         int32_t dataLength,
   3327                         const char *codepage);
   3328 
   3329   /*
   3330    * Worker function for creating a UnicodeString from
   3331    * a codepage string using a UConverter.
   3332    */
   3333   void
   3334   doCodepageCreate(const char *codepageData,
   3335                    int32_t dataLength,
   3336                    UConverter *converter,
   3337                    UErrorCode &status);
   3338 
   3339 #endif
   3340 
   3341   /*
   3342    * This function is called when write access to the array
   3343    * is necessary.
   3344    *
   3345    * We need to make a copy of the array if
   3346    * the buffer is read-only, or
   3347    * the buffer is refCounted (shared), and refCount>1, or
   3348    * the buffer is too small.
   3349    *
   3350    * Return FALSE if memory could not be allocated.
   3351    */
   3352   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
   3353                             int32_t growCapacity = -1,
   3354                             UBool doCopyArray = TRUE,
   3355                             int32_t **pBufferToDelete = 0,
   3356                             UBool forceClone = FALSE);
   3357 
   3358   // common function for case mappings
   3359   UnicodeString &
   3360   caseMap(BreakIterator *titleIter,
   3361           const char *locale,
   3362           uint32_t options,
   3363           int32_t toWhichCase);
   3364 
   3365   // ref counting
   3366   void addRef(void);
   3367   int32_t removeRef(void);
   3368   int32_t refCount(void) const;
   3369 
   3370   // constants
   3371   enum {
   3372     // Set the stack buffer size so that sizeof(UnicodeString) is,
   3373     // naturally (without padding), a multiple of sizeof(pointer).
   3374     US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
   3375     kInvalidUChar=0xffff, // invalid UChar index
   3376     kGrowSize=128, // grow size for this buffer
   3377     kInvalidHashCode=0, // invalid hash code
   3378     kEmptyHashCode=1, // hash code for empty string
   3379 
   3380     // bit flag values for fFlags
   3381     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
   3382     kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
   3383     kRefCounted=4,      // there is a refCount field before the characters in fArray
   3384     kBufferIsReadonly=8,// do not write to this buffer
   3385     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
   3386                         // and releaseBuffer(newLength) must be called
   3387 
   3388     // combined values for convenience
   3389     kShortString=kUsingStackBuffer,
   3390     kLongString=kRefCounted,
   3391     kReadonlyAlias=kBufferIsReadonly,
   3392     kWritableAlias=0
   3393   };
   3394 
   3395   friend class StringThreadTest;
   3396   friend class UnicodeStringAppendable;
   3397 
   3398   union StackBufferOrFields;        // forward declaration necessary before friend declaration
   3399   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
   3400 
   3401   /*
   3402    * The following are all the class fields that are stored
   3403    * in each UnicodeString object.
   3404    * Note that UnicodeString has virtual functions,
   3405    * therefore there is an implicit vtable pointer
   3406    * as the first real field.
   3407    * The fields should be aligned such that no padding is necessary.
   3408    * On 32-bit machines, the size should be 32 bytes,
   3409    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
   3410    *
   3411    * We use a hack to achieve this.
   3412    *
   3413    * With at least some compilers, each of the following is forced to
   3414    * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
   3415    * rounded up with additional padding if the fields do not already fit that requirement:
   3416    * - sizeof(class UnicodeString)
   3417    * - offsetof(UnicodeString, fUnion)
   3418    * - sizeof(fUnion)
   3419    * - sizeof(fFields)
   3420    *
   3421    * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
   3422    * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
   3423    * (Padding at the end of fFields is ok:
   3424    * As long as there is no padding after fStackBuffer, it is not wasted space.)
   3425    *
   3426    * We further assume that the compiler does not reorder the fields,
   3427    * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
   3428    * with at most some padding (but no other field) in between.
   3429    * (Padding there would be wasted space, but functionally harmless.)
   3430    *
   3431    * We use a few more sizeof(pointer)'s chunks of space with
   3432    * fRestOfStackBuffer, fShortLength and fFlags,
   3433    * to get up exactly to the intended sizeof(UnicodeString).
   3434    */
   3435   // (implicit) *vtable;
   3436   union StackBufferOrFields {
   3437     // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
   3438     // else fFields is used
   3439     UChar fStackBuffer[8];  // buffer for short strings, together with fRestOfStackBuffer
   3440     struct {
   3441       UChar   *fArray;    // the Unicode data
   3442       int32_t fCapacity;  // capacity of fArray (in UChars)
   3443       int32_t fLength;    // number of characters in fArray if >127; else undefined
   3444     } fFields;
   3445   } fUnion;
   3446   UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
   3447   int8_t fShortLength;  // 0..127: length  <0: real length is in fUnion.fFields.fLength
   3448   uint8_t fFlags;       // bit flags: see constants above
   3449 };
   3450 
   3451 /**
   3452  * Create a new UnicodeString with the concatenation of two others.
   3453  *
   3454  * @param s1 The first string to be copied to the new one.
   3455  * @param s2 The second string to be copied to the new one, after s1.
   3456  * @return UnicodeString(s1).append(s2)
   3457  * @stable ICU 2.8
   3458  */
   3459 U_COMMON_API UnicodeString U_EXPORT2
   3460 operator+ (const UnicodeString &s1, const UnicodeString &s2);
   3461 
   3462 //========================================
   3463 // Inline members
   3464 //========================================
   3465 
   3466 //========================================
   3467 // Privates
   3468 //========================================
   3469 
   3470 inline void
   3471 UnicodeString::pinIndex(int32_t& start) const
   3472 {
   3473   // pin index
   3474   if(start < 0) {
   3475     start = 0;
   3476   } else if(start > length()) {
   3477     start = length();
   3478   }
   3479 }
   3480 
   3481 inline void
   3482 UnicodeString::pinIndices(int32_t& start,
   3483                           int32_t& _length) const
   3484 {
   3485   // pin indices
   3486   int32_t len = length();
   3487   if(start < 0) {
   3488     start = 0;
   3489   } else if(start > len) {
   3490     start = len;
   3491   }
   3492   if(_length < 0) {
   3493     _length = 0;
   3494   } else if(_length > (len - start)) {
   3495     _length = (len - start);
   3496   }
   3497 }
   3498 
   3499 inline UChar*
   3500 UnicodeString::getArrayStart()
   3501 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
   3502 
   3503 inline const UChar*
   3504 UnicodeString::getArrayStart() const
   3505 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
   3506 
   3507 //========================================
   3508 // Read-only implementation methods
   3509 //========================================
   3510 inline int32_t
   3511 UnicodeString::length() const
   3512 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
   3513 
   3514 inline int32_t
   3515 UnicodeString::getCapacity() const
   3516 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
   3517 
   3518 inline int32_t
   3519 UnicodeString::hashCode() const
   3520 { return doHashCode(); }
   3521 
   3522 inline UBool
   3523 UnicodeString::isBogus() const
   3524 { return (UBool)(fFlags & kIsBogus); }
   3525 
   3526 inline UBool
   3527 UnicodeString::isWritable() const
   3528 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
   3529 
   3530 inline UBool
   3531 UnicodeString::isBufferWritable() const
   3532 {
   3533   return (UBool)(
   3534       !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
   3535       (!(fFlags&kRefCounted) || refCount()==1));
   3536 }
   3537 
   3538 inline const UChar *
   3539 UnicodeString::getBuffer() const {
   3540   if(fFlags&(kIsBogus|kOpenGetBuffer)) {
   3541     return 0;
   3542   } else if(fFlags&kUsingStackBuffer) {
   3543     return fUnion.fStackBuffer;
   3544   } else {
   3545     return fUnion.fFields.fArray;
   3546   }
   3547 }
   3548 
   3549 //========================================
   3550 // Read-only alias methods
   3551 //========================================
   3552 inline int8_t
   3553 UnicodeString::doCompare(int32_t start,
   3554               int32_t thisLength,
   3555               const UnicodeString& srcText,
   3556               int32_t srcStart,
   3557               int32_t srcLength) const
   3558 {
   3559   if(srcText.isBogus()) {
   3560     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
   3561   } else {
   3562     srcText.pinIndices(srcStart, srcLength);
   3563     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
   3564   }
   3565 }
   3566 
   3567 inline UBool
   3568 UnicodeString::operator== (const UnicodeString& text) const
   3569 {
   3570   if(isBogus()) {
   3571     return text.isBogus();
   3572   } else {
   3573     int32_t len = length(), textLength = text.length();
   3574     return
   3575       !text.isBogus() &&
   3576       len == textLength &&
   3577       doCompare(0, len, text, 0, textLength) == 0;
   3578   }
   3579 }
   3580 
   3581 inline UBool
   3582 UnicodeString::operator!= (const UnicodeString& text) const
   3583 { return (! operator==(text)); }
   3584 
   3585 inline UBool
   3586 UnicodeString::operator> (const UnicodeString& text) const
   3587 { return doCompare(0, length(), text, 0, text.length()) == 1; }
   3588 
   3589 inline UBool
   3590 UnicodeString::operator< (const UnicodeString& text) const
   3591 { return doCompare(0, length(), text, 0, text.length()) == -1; }
   3592 
   3593 inline UBool
   3594 UnicodeString::operator>= (const UnicodeString& text) const
   3595 { return doCompare(0, length(), text, 0, text.length()) != -1; }
   3596 
   3597 inline UBool
   3598 UnicodeString::operator<= (const UnicodeString& text) const
   3599 { return doCompare(0, length(), text, 0, text.length()) != 1; }
   3600 
   3601 inline int8_t
   3602 UnicodeString::compare(const UnicodeString& text) const
   3603 { return doCompare(0, length(), text, 0, text.length()); }
   3604 
   3605 inline int8_t
   3606 UnicodeString::compare(int32_t start,
   3607                int32_t _length,
   3608                const UnicodeString& srcText) const
   3609 { return doCompare(start, _length, srcText, 0, srcText.length()); }
   3610 
   3611 inline int8_t
   3612 UnicodeString::compare(const UChar *srcChars,
   3613                int32_t srcLength) const
   3614 { return doCompare(0, length(), srcChars, 0, srcLength); }
   3615 
   3616 inline int8_t
   3617 UnicodeString::compare(int32_t start,
   3618                int32_t _length,
   3619                const UnicodeString& srcText,
   3620                int32_t srcStart,
   3621                int32_t srcLength) const
   3622 { return doCompare(start, _length, srcText, srcStart, srcLength); }
   3623 
   3624 inline int8_t
   3625 UnicodeString::compare(int32_t start,
   3626                int32_t _length,
   3627                const UChar *srcChars) const
   3628 { return doCompare(start, _length, srcChars, 0, _length); }
   3629 
   3630 inline int8_t
   3631 UnicodeString::compare(int32_t start,
   3632                int32_t _length,
   3633                const UChar *srcChars,
   3634                int32_t srcStart,
   3635                int32_t srcLength) const
   3636 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
   3637 
   3638 inline int8_t
   3639 UnicodeString::compareBetween(int32_t start,
   3640                   int32_t limit,
   3641                   const UnicodeString& srcText,
   3642                   int32_t srcStart,
   3643                   int32_t srcLimit) const
   3644 { return doCompare(start, limit - start,
   3645            srcText, srcStart, srcLimit - srcStart); }
   3646 
   3647 inline int8_t
   3648 UnicodeString::doCompareCodePointOrder(int32_t start,
   3649                                        int32_t thisLength,
   3650                                        const UnicodeString& srcText,
   3651                                        int32_t srcStart,
   3652                                        int32_t srcLength) const
   3653 {
   3654   if(srcText.isBogus()) {
   3655     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
   3656   } else {
   3657     srcText.pinIndices(srcStart, srcLength);
   3658     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
   3659   }
   3660 }
   3661 
   3662 inline int8_t
   3663 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
   3664 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
   3665 
   3666 inline int8_t
   3667 UnicodeString::compareCodePointOrder(int32_t start,
   3668                                      int32_t _length,
   3669                                      const UnicodeString& srcText) const
   3670 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
   3671 
   3672 inline int8_t
   3673 UnicodeString::compareCodePointOrder(const UChar *srcChars,
   3674                                      int32_t srcLength) const
   3675 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
   3676 
   3677 inline int8_t
   3678 UnicodeString::compareCodePointOrder(int32_t start,
   3679                                      int32_t _length,
   3680                                      const UnicodeString& srcText,
   3681                                      int32_t srcStart,
   3682                                      int32_t srcLength) const
   3683 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
   3684 
   3685 inline int8_t
   3686 UnicodeString::compareCodePointOrder(int32_t start,
   3687                                      int32_t _length,
   3688                                      const UChar *srcChars) const
   3689 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
   3690 
   3691 inline int8_t
   3692 UnicodeString::compareCodePointOrder(int32_t start,
   3693                                      int32_t _length,
   3694                                      const UChar *srcChars,
   3695                                      int32_t srcStart,
   3696                                      int32_t srcLength) const
   3697 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
   3698 
   3699 inline int8_t
   3700 UnicodeString::compareCodePointOrderBetween(int32_t start,
   3701                                             int32_t limit,
   3702                                             const UnicodeString& srcText,
   3703                                             int32_t srcStart,
   3704                                             int32_t srcLimit) const
   3705 { return doCompareCodePointOrder(start, limit - start,
   3706            srcText, srcStart, srcLimit - srcStart); }
   3707 
   3708 inline int8_t
   3709 UnicodeString::doCaseCompare(int32_t start,
   3710                              int32_t thisLength,
   3711                              const UnicodeString &srcText,
   3712                              int32_t srcStart,
   3713                              int32_t srcLength,
   3714                              uint32_t options) const
   3715 {
   3716   if(srcText.isBogus()) {
   3717     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
   3718   } else {
   3719     srcText.pinIndices(srcStart, srcLength);
   3720     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
   3721   }
   3722 }
   3723 
   3724 inline int8_t
   3725 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
   3726   return doCaseCompare(0, length(), text, 0, text.length(), options);
   3727 }
   3728 
   3729 inline int8_t
   3730 UnicodeString::caseCompare(int32_t start,
   3731                            int32_t _length,
   3732                            const UnicodeString &srcText,
   3733                            uint32_t options) const {
   3734   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
   3735 }
   3736 
   3737 inline int8_t
   3738 UnicodeString::caseCompare(const UChar *srcChars,
   3739                            int32_t srcLength,
   3740                            uint32_t options) const {
   3741   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
   3742 }
   3743 
   3744 inline int8_t
   3745 UnicodeString::caseCompare(int32_t start,
   3746                            int32_t _length,
   3747                            const UnicodeString &srcText,
   3748                            int32_t srcStart,
   3749                            int32_t srcLength,
   3750                            uint32_t options) const {
   3751   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
   3752 }
   3753 
   3754 inline int8_t
   3755 UnicodeString::caseCompare(int32_t start,
   3756                            int32_t _length,
   3757                            const UChar *srcChars,
   3758                            uint32_t options) const {
   3759   return doCaseCompare(start, _length, srcChars, 0, _length, options);
   3760 }
   3761 
   3762 inline int8_t
   3763 UnicodeString::caseCompare(int32_t start,
   3764                            int32_t _length,
   3765                            const UChar *srcChars,
   3766                            int32_t srcStart,
   3767                            int32_t srcLength,
   3768                            uint32_t options) const {
   3769   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
   3770 }
   3771 
   3772 inline int8_t
   3773 UnicodeString::caseCompareBetween(int32_t start,
   3774                                   int32_t limit,
   3775                                   const UnicodeString &srcText,
   3776                                   int32_t srcStart,
   3777                                   int32_t srcLimit,
   3778                                   uint32_t options) const {
   3779   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
   3780 }
   3781 
   3782 inline int32_t
   3783 UnicodeString::indexOf(const UnicodeString& srcText,
   3784                int32_t srcStart,
   3785                int32_t srcLength,
   3786                int32_t start,
   3787                int32_t _length) const
   3788 {
   3789   if(!srcText.isBogus()) {
   3790     srcText.pinIndices(srcStart, srcLength);
   3791     if(srcLength > 0) {
   3792       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
   3793     }
   3794   }
   3795   return -1;
   3796 }
   3797 
   3798 inline int32_t
   3799 UnicodeString::indexOf(const UnicodeString& text) const
   3800 { return indexOf(text, 0, text.length(), 0, length()); }
   3801 
   3802 inline int32_t
   3803 UnicodeString::indexOf(const UnicodeString& text,
   3804                int32_t start) const {
   3805   pinIndex(start);
   3806   return indexOf(text, 0, text.length(), start, length() - start);
   3807 }
   3808 
   3809 inline int32_t
   3810 UnicodeString::indexOf(const UnicodeString& text,
   3811                int32_t start,
   3812                int32_t _length) const
   3813 { return indexOf(text, 0, text.length(), start, _length); }
   3814 
   3815 inline int32_t
   3816 UnicodeString::indexOf(const UChar *srcChars,
   3817                int32_t srcLength,
   3818                int32_t start) const {
   3819   pinIndex(start);
   3820   return indexOf(srcChars, 0, srcLength, start, length() - start);
   3821 }
   3822 
   3823 inline int32_t
   3824 UnicodeString::indexOf(const UChar *srcChars,
   3825                int32_t srcLength,
   3826                int32_t start,
   3827                int32_t _length) const
   3828 { return indexOf(srcChars, 0, srcLength, start, _length); }
   3829 
   3830 inline int32_t
   3831 UnicodeString::indexOf(UChar c,
   3832                int32_t start,
   3833                int32_t _length) const
   3834 { return doIndexOf(c, start, _length); }
   3835 
   3836 inline int32_t
   3837 UnicodeString::indexOf(UChar32 c,
   3838                int32_t start,
   3839                int32_t _length) const
   3840 { return doIndexOf(c, start, _length); }
   3841 
   3842 inline int32_t
   3843 UnicodeString::indexOf(UChar c) const
   3844 { return doIndexOf(c, 0, length()); }
   3845 
   3846 inline int32_t
   3847 UnicodeString::indexOf(UChar32 c) const
   3848 { return indexOf(c, 0, length()); }
   3849 
   3850 inline int32_t
   3851 UnicodeString::indexOf(UChar c,
   3852                int32_t start) const {
   3853   pinIndex(start);
   3854   return doIndexOf(c, start, length() - start);
   3855 }
   3856 
   3857 inline int32_t
   3858 UnicodeString::indexOf(UChar32 c,
   3859                int32_t start) const {
   3860   pinIndex(start);
   3861   return indexOf(c, start, length() - start);
   3862 }
   3863 
   3864 inline int32_t
   3865 UnicodeString::lastIndexOf(const UChar *srcChars,
   3866                int32_t srcLength,
   3867                int32_t start,
   3868                int32_t _length) const
   3869 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
   3870 
   3871 inline int32_t
   3872 UnicodeString::lastIndexOf(const UChar *srcChars,
   3873                int32_t srcLength,
   3874                int32_t start) const {
   3875   pinIndex(start);
   3876   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
   3877 }
   3878 
   3879 inline int32_t
   3880 UnicodeString::lastIndexOf(const UnicodeString& srcText,
   3881                int32_t srcStart,
   3882                int32_t srcLength,
   3883                int32_t start,
   3884                int32_t _length) const
   3885 {
   3886   if(!srcText.isBogus()) {
   3887     srcText.pinIndices(srcStart, srcLength);
   3888     if(srcLength > 0) {
   3889       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
   3890     }
   3891   }
   3892   return -1;
   3893 }
   3894 
   3895 inline int32_t
   3896 UnicodeString::lastIndexOf(const UnicodeString& text,
   3897                int32_t start,
   3898                int32_t _length) const
   3899 { return lastIndexOf(text, 0, text.length(), start, _length); }
   3900 
   3901 inline int32_t
   3902 UnicodeString::lastIndexOf(const UnicodeString& text,
   3903                int32_t start) const {
   3904   pinIndex(start);
   3905   return lastIndexOf(text, 0, text.length(), start, length() - start);
   3906 }
   3907 
   3908 inline int32_t
   3909 UnicodeString::lastIndexOf(const UnicodeString& text) const
   3910 { return lastIndexOf(text, 0, text.length(), 0, length()); }
   3911 
   3912 inline int32_t
   3913 UnicodeString::lastIndexOf(UChar c,
   3914                int32_t start,
   3915                int32_t _length) const
   3916 { return doLastIndexOf(c, start, _length); }
   3917 
   3918 inline int32_t
   3919 UnicodeString::lastIndexOf(UChar32 c,
   3920                int32_t start,
   3921                int32_t _length) const {
   3922   return doLastIndexOf(c, start, _length);
   3923 }
   3924 
   3925 inline int32_t
   3926 UnicodeString::lastIndexOf(UChar c) const
   3927 { return doLastIndexOf(c, 0, length()); }
   3928 
   3929 inline int32_t
   3930 UnicodeString::lastIndexOf(UChar32 c) const {
   3931   return lastIndexOf(c, 0, length());
   3932 }
   3933 
   3934 inline int32_t
   3935 UnicodeString::lastIndexOf(UChar c,
   3936                int32_t start) const {
   3937   pinIndex(start);
   3938   return doLastIndexOf(c, start, length() - start);
   3939 }
   3940 
   3941 inline int32_t
   3942 UnicodeString::lastIndexOf(UChar32 c,
   3943                int32_t start) const {
   3944   pinIndex(start);
   3945   return lastIndexOf(c, start, length() - start);
   3946 }
   3947 
   3948 inline UBool
   3949 UnicodeString::startsWith(const UnicodeString& text) const
   3950 { return compare(0, text.length(), text, 0, text.length()) == 0; }
   3951 
   3952 inline UBool
   3953 UnicodeString::startsWith(const UnicodeString& srcText,
   3954               int32_t srcStart,
   3955               int32_t srcLength) const
   3956 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
   3957 
   3958 inline UBool
   3959 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
   3960   if(srcLength < 0) {
   3961     srcLength = u_strlen(srcChars);
   3962   }
   3963   return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
   3964 }
   3965 
   3966 inline UBool
   3967 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
   3968   if(srcLength < 0) {
   3969     srcLength = u_strlen(srcChars);
   3970   }
   3971   return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
   3972 }
   3973 
   3974 inline UBool
   3975 UnicodeString::endsWith(const UnicodeString& text) const
   3976 { return doCompare(length() - text.length(), text.length(),
   3977            text, 0, text.length()) == 0; }
   3978 
   3979 inline UBool
   3980 UnicodeString::endsWith(const UnicodeString& srcText,
   3981             int32_t srcStart,
   3982             int32_t srcLength) const {
   3983   srcText.pinIndices(srcStart, srcLength);
   3984   return doCompare(length() - srcLength, srcLength,
   3985                    srcText, srcStart, srcLength) == 0;
   3986 }
   3987 
   3988 inline UBool
   3989 UnicodeString::endsWith(const UChar *srcChars,
   3990             int32_t srcLength) const {
   3991   if(srcLength < 0) {
   3992     srcLength = u_strlen(srcChars);
   3993   }
   3994   return doCompare(length() - srcLength, srcLength,
   3995                    srcChars, 0, srcLength) == 0;
   3996 }
   3997 
   3998 inline UBool
   3999 UnicodeString::endsWith(const UChar *srcChars,
   4000             int32_t srcStart,
   4001             int32_t srcLength) const {
   4002   if(srcLength < 0) {
   4003     srcLength = u_strlen(srcChars + srcStart);
   4004   }
   4005   return doCompare(length() - srcLength, srcLength,
   4006                    srcChars, srcStart, srcLength) == 0;
   4007 }
   4008 
   4009 //========================================
   4010 // replace
   4011 //========================================
   4012 inline UnicodeString&
   4013 UnicodeString::replace(int32_t start,
   4014                int32_t _length,
   4015                const UnicodeString& srcText)
   4016 { return doReplace(start, _length, srcText, 0, srcText.length()); }
   4017 
   4018 inline UnicodeString&
   4019 UnicodeString::replace(int32_t start,
   4020                int32_t _length,
   4021                const UnicodeString& srcText,
   4022                int32_t srcStart,
   4023                int32_t srcLength)
   4024 { return doReplace(start, _length, srcText, srcStart, srcLength); }
   4025 
   4026 inline UnicodeString&
   4027 UnicodeString::replace(int32_t start,
   4028                int32_t _length,
   4029                const UChar *srcChars,
   4030                int32_t srcLength)
   4031 { return doReplace(start, _length, srcChars, 0, srcLength); }
   4032 
   4033 inline UnicodeString&
   4034 UnicodeString::replace(int32_t start,
   4035                int32_t _length,
   4036                const UChar *srcChars,
   4037                int32_t srcStart,
   4038                int32_t srcLength)
   4039 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
   4040 
   4041 inline UnicodeString&
   4042 UnicodeString::replace(int32_t start,
   4043                int32_t _length,
   4044                UChar srcChar)
   4045 { return doReplace(start, _length, &srcChar, 0, 1); }
   4046 
   4047 inline UnicodeString&
   4048 UnicodeString::replace(int32_t start,
   4049                int32_t _length,
   4050                UChar32 srcChar) {
   4051   UChar buffer[U16_MAX_LENGTH];
   4052   int32_t count = 0;
   4053   UBool isError = FALSE;
   4054   U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
   4055   return doReplace(start, _length, buffer, 0, count);
   4056 }
   4057 
   4058 inline UnicodeString&
   4059 UnicodeString::replaceBetween(int32_t start,
   4060                   int32_t limit,
   4061                   const UnicodeString& srcText)
   4062 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
   4063 
   4064 inline UnicodeString&
   4065 UnicodeString::replaceBetween(int32_t start,
   4066                   int32_t limit,
   4067                   const UnicodeString& srcText,
   4068                   int32_t srcStart,
   4069                   int32_t srcLimit)
   4070 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
   4071 
   4072 inline UnicodeString&
   4073 UnicodeString::findAndReplace(const UnicodeString& oldText,
   4074                   const UnicodeString& newText)
   4075 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
   4076             newText, 0, newText.length()); }
   4077 
   4078 inline UnicodeString&
   4079 UnicodeString::findAndReplace(int32_t start,
   4080                   int32_t _length,
   4081                   const UnicodeString& oldText,
   4082                   const UnicodeString& newText)
   4083 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
   4084             newText, 0, newText.length()); }
   4085 
   4086 // ============================
   4087 // extract
   4088 // ============================
   4089 inline void
   4090 UnicodeString::doExtract(int32_t start,
   4091              int32_t _length,
   4092              UnicodeString& target) const
   4093 { target.replace(0, target.length(), *this, start, _length); }
   4094 
   4095 inline void
   4096 UnicodeString::extract(int32_t start,
   4097                int32_t _length,
   4098                UChar *target,
   4099                int32_t targetStart) const
   4100 { doExtract(start, _length, target, targetStart); }
   4101 
   4102 inline void
   4103 UnicodeString::extract(int32_t start,
   4104                int32_t _length,
   4105                UnicodeString& target) const
   4106 { doExtract(start, _length, target); }
   4107 
   4108 #if !UCONFIG_NO_CONVERSION
   4109 
   4110 inline int32_t
   4111 UnicodeString::extract(int32_t start,
   4112                int32_t _length,
   4113                char *dst,
   4114                const char *codepage) const
   4115 
   4116 {
   4117   // This dstSize value will be checked explicitly
   4118   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
   4119 }
   4120 
   4121 #endif
   4122 
   4123 inline void
   4124 UnicodeString::extractBetween(int32_t start,
   4125                   int32_t limit,
   4126                   UChar *dst,
   4127                   int32_t dstStart) const {
   4128   pinIndex(start);
   4129   pinIndex(limit);
   4130   doExtract(start, limit - start, dst, dstStart);
   4131 }
   4132 
   4133 inline UnicodeString
   4134 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
   4135     return tempSubString(start, limit - start);
   4136 }
   4137 
   4138 inline UChar
   4139 UnicodeString::doCharAt(int32_t offset) const
   4140 {
   4141   if((uint32_t)offset < (uint32_t)length()) {
   4142     return getArrayStart()[offset];
   4143   } else {
   4144     return kInvalidUChar;
   4145   }
   4146 }
   4147 
   4148 inline UChar
   4149 UnicodeString::charAt(int32_t offset) const
   4150 { return doCharAt(offset); }
   4151 
   4152 inline UChar
   4153 UnicodeString::operator[] (int32_t offset) const
   4154 { return doCharAt(offset); }
   4155 
   4156 inline UChar32
   4157 UnicodeString::char32At(int32_t offset) const
   4158 {
   4159   int32_t len = length();
   4160   if((uint32_t)offset < (uint32_t)len) {
   4161     const UChar *array = getArrayStart();
   4162     UChar32 c;
   4163     U16_GET(array, 0, offset, len, c);
   4164     return c;
   4165   } else {
   4166     return kInvalidUChar;
   4167   }
   4168 }
   4169 
   4170 inline int32_t
   4171 UnicodeString::getChar32Start(int32_t offset) const {
   4172   if((uint32_t)offset < (uint32_t)length()) {
   4173     const UChar *array = getArrayStart();
   4174     U16_SET_CP_START(array, 0, offset);
   4175     return offset;
   4176   } else {
   4177     return 0;
   4178   }
   4179 }
   4180 
   4181 inline int32_t
   4182 UnicodeString::getChar32Limit(int32_t offset) const {
   4183   int32_t len = length();
   4184   if((uint32_t)offset < (uint32_t)len) {
   4185     const UChar *array = getArrayStart();
   4186     U16_SET_CP_LIMIT(array, 0, offset, len);
   4187     return offset;
   4188   } else {
   4189     return len;
   4190   }
   4191 }
   4192 
   4193 inline UBool
   4194 UnicodeString::isEmpty() const {
   4195   return fShortLength == 0;
   4196 }
   4197 
   4198 //========================================
   4199 // Write implementation methods
   4200 //========================================
   4201 inline void
   4202 UnicodeString::setLength(int32_t len) {
   4203   if(len <= 127) {
   4204     fShortLength = (int8_t)len;
   4205   } else {
   4206     fShortLength = (int8_t)-1;
   4207     fUnion.fFields.fLength = len;
   4208   }
   4209 }
   4210 
   4211 inline void
   4212 UnicodeString::setToEmpty() {
   4213   fShortLength = 0;
   4214   fFlags = kShortString;
   4215 }
   4216 
   4217 inline void
   4218 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
   4219   setLength(len);
   4220   fUnion.fFields.fArray = array;
   4221   fUnion.fFields.fCapacity = capacity;
   4222 }
   4223 
   4224 inline const UChar *
   4225 UnicodeString::getTerminatedBuffer() {
   4226   if(!isWritable()) {
   4227     return 0;
   4228   } else {
   4229     UChar *array = getArrayStart();
   4230     int32_t len = length();
   4231     if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
   4232       /*
   4233        * kRefCounted: Do not write the NUL if the buffer is shared.
   4234        * That is mostly safe, except when the length of one copy was modified
   4235        * without copy-on-write, e.g., via truncate(newLength) or remove(void).
   4236        * Then the NUL would be written into the middle of another copy's string.
   4237        */
   4238       if(!(fFlags&kBufferIsReadonly)) {
   4239         /*
   4240          * We must not write to a readonly buffer, but it is known to be
   4241          * NUL-terminated if len<capacity.
   4242          * A shared, allocated buffer (refCount()>1) must not have its contents
   4243          * modified, but the NUL at [len] is beyond the string contents,
   4244          * and multiple string objects and threads writing the same NUL into the
   4245          * same location is harmless.
   4246          * In all other cases, the buffer is fully writable and it is anyway safe
   4247          * to write the NUL.
   4248          *
   4249          * Note: An earlier version of this code tested whether there is a NUL
   4250          * at [len] already, but, while safe, it generated lots of warnings from
   4251          * tools like valgrind and Purify.
   4252          */
   4253         array[len] = 0;
   4254       }
   4255       return array;
   4256     } else if(cloneArrayIfNeeded(len+1)) {
   4257       array = getArrayStart();
   4258       array[len] = 0;
   4259       return array;
   4260     } else {
   4261       return 0;
   4262     }
   4263   }
   4264 }
   4265 
   4266 inline UnicodeString&
   4267 UnicodeString::operator= (UChar ch)
   4268 { return doReplace(0, length(), &ch, 0, 1); }
   4269 
   4270 inline UnicodeString&
   4271 UnicodeString::operator= (UChar32 ch)
   4272 { return replace(0, length(), ch); }
   4273 
   4274 inline UnicodeString&
   4275 UnicodeString::setTo(const UnicodeString& srcText,
   4276              int32_t srcStart,
   4277              int32_t srcLength)
   4278 {
   4279   unBogus();
   4280   return doReplace(0, length(), srcText, srcStart, srcLength);
   4281 }
   4282 
   4283 inline UnicodeString&
   4284 UnicodeString::setTo(const UnicodeString& srcText,
   4285              int32_t srcStart)
   4286 {
   4287   unBogus();
   4288   srcText.pinIndex(srcStart);
   4289   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
   4290 }
   4291 
   4292 inline UnicodeString&
   4293 UnicodeString::setTo(const UnicodeString& srcText)
   4294 {
   4295   return copyFrom(srcText);
   4296 }
   4297 
   4298 inline UnicodeString&
   4299 UnicodeString::setTo(const UChar *srcChars,
   4300              int32_t srcLength)
   4301 {
   4302   unBogus();
   4303   return doReplace(0, length(), srcChars, 0, srcLength);
   4304 }
   4305 
   4306 inline UnicodeString&
   4307 UnicodeString::setTo(UChar srcChar)
   4308 {
   4309   unBogus();
   4310   return doReplace(0, length(), &srcChar, 0, 1);
   4311 }
   4312 
   4313 inline UnicodeString&
   4314 UnicodeString::setTo(UChar32 srcChar)
   4315 {
   4316   unBogus();
   4317   return replace(0, length(), srcChar);
   4318 }
   4319 
   4320 inline UnicodeString&
   4321 UnicodeString::append(const UnicodeString& srcText,
   4322               int32_t srcStart,
   4323               int32_t srcLength)
   4324 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
   4325 
   4326 inline UnicodeString&
   4327 UnicodeString::append(const UnicodeString& srcText)
   4328 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
   4329 
   4330 inline UnicodeString&
   4331 UnicodeString::append(const UChar *srcChars,
   4332               int32_t srcStart,
   4333               int32_t srcLength)
   4334 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
   4335 
   4336 inline UnicodeString&
   4337 UnicodeString::append(const UChar *srcChars,
   4338               int32_t srcLength)
   4339 { return doReplace(length(), 0, srcChars, 0, srcLength); }
   4340 
   4341 inline UnicodeString&
   4342 UnicodeString::append(UChar srcChar)
   4343 { return doReplace(length(), 0, &srcChar, 0, 1); }
   4344 
   4345 inline UnicodeString&
   4346 UnicodeString::append(UChar32 srcChar) {
   4347   UChar buffer[U16_MAX_LENGTH];
   4348   int32_t _length = 0;
   4349   UBool isError = FALSE;
   4350   U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
   4351   return doReplace(length(), 0, buffer, 0, _length);
   4352 }
   4353 
   4354 inline UnicodeString&
   4355 UnicodeString::operator+= (UChar ch)
   4356 { return doReplace(length(), 0, &ch, 0, 1); }
   4357 
   4358 inline UnicodeString&
   4359 UnicodeString::operator+= (UChar32 ch) {
   4360   return append(ch);
   4361 }
   4362 
   4363 inline UnicodeString&
   4364 UnicodeString::operator+= (const UnicodeString& srcText)
   4365 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
   4366 
   4367 inline UnicodeString&
   4368 UnicodeString::insert(int32_t start,
   4369               const UnicodeString& srcText,
   4370               int32_t srcStart,
   4371               int32_t srcLength)
   4372 { return doReplace(start, 0, srcText, srcStart, srcLength); }
   4373 
   4374 inline UnicodeString&
   4375 UnicodeString::insert(int32_t start,
   4376               const UnicodeString& srcText)
   4377 { return doReplace(start, 0, srcText, 0, srcText.length()); }
   4378 
   4379 inline UnicodeString&
   4380 UnicodeString::insert(int32_t start,
   4381               const UChar *srcChars,
   4382               int32_t srcStart,
   4383               int32_t srcLength)
   4384 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
   4385 
   4386 inline UnicodeString&
   4387 UnicodeString::insert(int32_t start,
   4388               const UChar *srcChars,
   4389               int32_t srcLength)
   4390 { return doReplace(start, 0, srcChars, 0, srcLength); }
   4391 
   4392 inline UnicodeString&
   4393 UnicodeString::insert(int32_t start,
   4394               UChar srcChar)
   4395 { return doReplace(start, 0, &srcChar, 0, 1); }
   4396 
   4397 inline UnicodeString&
   4398 UnicodeString::insert(int32_t start,
   4399               UChar32 srcChar)
   4400 { return replace(start, 0, srcChar); }
   4401 
   4402 
   4403 inline UnicodeString&
   4404 UnicodeString::remove()
   4405 {
   4406   // remove() of a bogus string makes the string empty and non-bogus
   4407   // we also un-alias a read-only alias to deal with NUL-termination
   4408   // issues with getTerminatedBuffer()
   4409   if(fFlags & (kIsBogus|kBufferIsReadonly)) {
   4410     setToEmpty();
   4411   } else {
   4412     fShortLength = 0;
   4413   }
   4414   return *this;
   4415 }
   4416 
   4417 inline UnicodeString&
   4418 UnicodeString::remove(int32_t start,
   4419              int32_t _length)
   4420 {
   4421     if(start <= 0 && _length == INT32_MAX) {
   4422         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
   4423         return remove();
   4424     }
   4425     return doReplace(start, _length, NULL, 0, 0);
   4426 }
   4427 
   4428 inline UnicodeString&
   4429 UnicodeString::removeBetween(int32_t start,
   4430                 int32_t limit)
   4431 { return doReplace(start, limit - start, NULL, 0, 0); }
   4432 
   4433 inline UnicodeString &
   4434 UnicodeString::retainBetween(int32_t start, int32_t limit) {
   4435   truncate(limit);
   4436   return doReplace(0, start, NULL, 0, 0);
   4437 }
   4438 
   4439 inline UBool
   4440 UnicodeString::truncate(int32_t targetLength)
   4441 {
   4442   if(isBogus() && targetLength == 0) {
   4443     // truncate(0) of a bogus string makes the string empty and non-bogus
   4444     unBogus();
   4445     return FALSE;
   4446   } else if((uint32_t)targetLength < (uint32_t)length()) {
   4447     setLength(targetLength);
   4448     if(fFlags&kBufferIsReadonly) {
   4449       fUnion.fFields.fCapacity = targetLength;  // not NUL-terminated any more
   4450     }
   4451     return TRUE;
   4452   } else {
   4453     return FALSE;
   4454   }
   4455 }
   4456 
   4457 inline UnicodeString&
   4458 UnicodeString::reverse()
   4459 { return doReverse(0, length()); }
   4460 
   4461 inline UnicodeString&
   4462 UnicodeString::reverse(int32_t start,
   4463                int32_t _length)
   4464 { return doReverse(start, _length); }
   4465 
   4466 U_NAMESPACE_END
   4467 
   4468 #endif
   4469