Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 * Copyright (C) 1999-2009, International Business Machines Corporation and   *
      4 * others. All Rights Reserved.                                               *
      5 ******************************************************************************
      6 *
      7 * File unistr.cpp
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   09/25/98    stephen     Creation.
     13 *   04/20/99    stephen     Overhauled per 4/16 code review.
     14 *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
     15 *   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
     16 *                           Replaceable.
     17 *   06/25/01    grhoten     Removed the dependency on iostream
     18 ******************************************************************************
     19 */
     20 
     21 #include "unicode/utypes.h"
     22 #include "unicode/putil.h"
     23 #include "cstring.h"
     24 #include "cmemory.h"
     25 #include "unicode/ustring.h"
     26 #include "unicode/unistr.h"
     27 #include "uhash.h"
     28 #include "ustr_imp.h"
     29 #include "umutex.h"
     30 
     31 #if 0
     32 
     33 #if U_IOSTREAM_SOURCE >= 199711
     34 #include <iostream>
     35 using namespace std;
     36 #elif U_IOSTREAM_SOURCE >= 198506
     37 #include <iostream.h>
     38 #endif
     39 
     40 //DEBUGGING
     41 void
     42 print(const UnicodeString& s,
     43       const char *name)
     44 {
     45   UChar c;
     46   cout << name << ":|";
     47   for(int i = 0; i < s.length(); ++i) {
     48     c = s[i];
     49     if(c>= 0x007E || c < 0x0020)
     50       cout << "[0x" << hex << s[i] << "]";
     51     else
     52       cout << (char) s[i];
     53   }
     54   cout << '|' << endl;
     55 }
     56 
     57 void
     58 print(const UChar *s,
     59       int32_t len,
     60       const char *name)
     61 {
     62   UChar c;
     63   cout << name << ":|";
     64   for(int i = 0; i < len; ++i) {
     65     c = s[i];
     66     if(c>= 0x007E || c < 0x0020)
     67       cout << "[0x" << hex << s[i] << "]";
     68     else
     69       cout << (char) s[i];
     70   }
     71   cout << '|' << endl;
     72 }
     73 // END DEBUGGING
     74 #endif
     75 
     76 // Local function definitions for now
     77 
     78 // need to copy areas that may overlap
     79 static
     80 inline void
     81 us_arrayCopy(const UChar *src, int32_t srcStart,
     82          UChar *dst, int32_t dstStart, int32_t count)
     83 {
     84   if(count>0) {
     85     uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
     86   }
     87 }
     88 
     89 // u_unescapeAt() callback to get a UChar from a UnicodeString
     90 U_CDECL_BEGIN
     91 static UChar U_CALLCONV
     92 UnicodeString_charAt(int32_t offset, void *context) {
     93     return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset);
     94 }
     95 U_CDECL_END
     96 
     97 U_NAMESPACE_BEGIN
     98 
     99 /* The Replaceable virtual destructor can't be defined in the header
    100    due to how AIX works with multiple definitions of virtual functions.
    101 */
    102 Replaceable::~Replaceable() {}
    103 Replaceable::Replaceable() {}
    104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
    105 
    106 UnicodeString U_EXPORT2
    107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
    108     return
    109         UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
    110             append(s1).
    111                 append(s2);
    112 }
    113 
    114 //========================================
    115 // Reference Counting functions, put at top of file so that optimizing compilers
    116 //                               have a chance to automatically inline.
    117 //========================================
    118 
    119 void
    120 UnicodeString::addRef()
    121 {  umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}
    122 
    123 int32_t
    124 UnicodeString::removeRef()
    125 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}
    126 
    127 int32_t
    128 UnicodeString::refCount() const
    129 {
    130     umtx_lock(NULL);
    131     // Note: without the lock to force a memory barrier, we might see a very
    132     //       stale value on some multi-processor systems.
    133     int32_t  count = *((int32_t *)fUnion.fFields.fArray - 1);
    134     umtx_unlock(NULL);
    135     return count;
    136  }
    137 
    138 void
    139 UnicodeString::releaseArray() {
    140   if((fFlags & kRefCounted) && removeRef() == 0) {
    141     uprv_free((int32_t *)fUnion.fFields.fArray - 1);
    142   }
    143 }
    144 
    145 
    146 
    147 //========================================
    148 // Constructors
    149 //========================================
    150 UnicodeString::UnicodeString()
    151   : fShortLength(0),
    152     fFlags(kShortString)
    153 {}
    154 
    155 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
    156   : fShortLength(0),
    157     fFlags(0)
    158 {
    159   if(count <= 0 || (uint32_t)c > 0x10ffff) {
    160     // just allocate and do not do anything else
    161     allocate(capacity);
    162   } else {
    163     // count > 0, allocate and fill the new string with count c's
    164     int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;
    165     if(capacity < length) {
    166       capacity = length;
    167     }
    168     if(allocate(capacity)) {
    169       UChar *array = getArrayStart();
    170       int32_t i = 0;
    171 
    172       // fill the new string with c
    173       if(unitCount == 1) {
    174         // fill with length UChars
    175         while(i < length) {
    176           array[i++] = (UChar)c;
    177         }
    178       } else {
    179         // get the code units for c
    180         UChar units[UTF_MAX_CHAR_LENGTH];
    181         UTF_APPEND_CHAR_UNSAFE(units, i, c);
    182 
    183         // now it must be i==unitCount
    184         i = 0;
    185 
    186         // for Unicode, unitCount can only be 1, 2, 3, or 4
    187         // 1 is handled above
    188         while(i < length) {
    189           int32_t unitIdx = 0;
    190           while(unitIdx < unitCount) {
    191             array[i++]=units[unitIdx++];
    192           }
    193         }
    194       }
    195     }
    196     setLength(length);
    197   }
    198 }
    199 
    200 UnicodeString::UnicodeString(UChar ch)
    201   : fShortLength(1),
    202     fFlags(kShortString)
    203 {
    204   fUnion.fStackBuffer[0] = ch;
    205 }
    206 
    207 UnicodeString::UnicodeString(UChar32 ch)
    208   : fShortLength(0),
    209     fFlags(kShortString)
    210 {
    211   int32_t i = 0;
    212   UBool isError = FALSE;
    213   U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
    214   fShortLength = (int8_t)i;
    215 }
    216 
    217 UnicodeString::UnicodeString(const UChar *text)
    218   : fShortLength(0),
    219     fFlags(kShortString)
    220 {
    221   doReplace(0, 0, text, 0, -1);
    222 }
    223 
    224 UnicodeString::UnicodeString(const UChar *text,
    225                              int32_t textLength)
    226   : fShortLength(0),
    227     fFlags(kShortString)
    228 {
    229   doReplace(0, 0, text, 0, textLength);
    230 }
    231 
    232 UnicodeString::UnicodeString(UBool isTerminated,
    233                              const UChar *text,
    234                              int32_t textLength)
    235   : fShortLength(0),
    236     fFlags(kReadonlyAlias)
    237 {
    238   if(text == NULL) {
    239     // treat as an empty string, do not alias
    240     setToEmpty();
    241   } else if(textLength < -1 ||
    242             (textLength == -1 && !isTerminated) ||
    243             (textLength >= 0 && isTerminated && text[textLength] != 0)
    244   ) {
    245     setToBogus();
    246   } else {
    247     if(textLength == -1) {
    248       // text is terminated, or else it would have failed the above test
    249       textLength = u_strlen(text);
    250     }
    251     setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
    252   }
    253 }
    254 
    255 UnicodeString::UnicodeString(UChar *buff,
    256                              int32_t buffLength,
    257                              int32_t buffCapacity)
    258   : fShortLength(0),
    259     fFlags(kWritableAlias)
    260 {
    261   if(buff == NULL) {
    262     // treat as an empty string, do not alias
    263     setToEmpty();
    264   } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
    265     setToBogus();
    266   } else {
    267     if(buffLength == -1) {
    268       // fLength = u_strlen(buff); but do not look beyond buffCapacity
    269       const UChar *p = buff, *limit = buff + buffCapacity;
    270       while(p != limit && *p != 0) {
    271         ++p;
    272       }
    273       buffLength = (int32_t)(p - buff);
    274     }
    275     setArray(buff, buffLength, buffCapacity);
    276   }
    277 }
    278 
    279 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
    280   : fShortLength(0),
    281     fFlags(kShortString)
    282 {
    283   if(src==NULL) {
    284     // treat as an empty string
    285   } else {
    286     if(length<0) {
    287       length=(int32_t)uprv_strlen(src);
    288     }
    289     if(cloneArrayIfNeeded(length, length, FALSE)) {
    290       u_charsToUChars(src, getArrayStart(), length);
    291       setLength(length);
    292     } else {
    293       setToBogus();
    294     }
    295   }
    296 }
    297 
    298 #if U_CHARSET_IS_UTF8
    299 
    300 UnicodeString::UnicodeString(const char *codepageData)
    301   : fShortLength(0),
    302     fFlags(kShortString) {
    303   if(codepageData != 0) {
    304     setToUTF8(codepageData);
    305   }
    306 }
    307 
    308 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
    309   : fShortLength(0),
    310     fFlags(kShortString) {
    311   // if there's nothing to convert, do nothing
    312   if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
    313     return;
    314   }
    315   if(dataLength == -1) {
    316     dataLength = (int32_t)uprv_strlen(codepageData);
    317   }
    318   setToUTF8(StringPiece(codepageData, dataLength));
    319 }
    320 
    321 // else see unistr_cnv.cpp
    322 #endif
    323 
    324 UnicodeString::UnicodeString(const UnicodeString& that)
    325   : Replaceable(),
    326     fShortLength(0),
    327     fFlags(kShortString)
    328 {
    329   copyFrom(that);
    330 }
    331 
    332 UnicodeString::UnicodeString(const UnicodeString& that,
    333                              int32_t srcStart)
    334   : Replaceable(),
    335     fShortLength(0),
    336     fFlags(kShortString)
    337 {
    338   setTo(that, srcStart);
    339 }
    340 
    341 UnicodeString::UnicodeString(const UnicodeString& that,
    342                              int32_t srcStart,
    343                              int32_t srcLength)
    344   : Replaceable(),
    345     fShortLength(0),
    346     fFlags(kShortString)
    347 {
    348   setTo(that, srcStart, srcLength);
    349 }
    350 
    351 // Replaceable base class clone() default implementation, does not clone
    352 Replaceable *
    353 Replaceable::clone() const {
    354   return NULL;
    355 }
    356 
    357 // UnicodeString overrides clone() with a real implementation
    358 Replaceable *
    359 UnicodeString::clone() const {
    360   return new UnicodeString(*this);
    361 }
    362 
    363 //========================================
    364 // array allocation
    365 //========================================
    366 
    367 UBool
    368 UnicodeString::allocate(int32_t capacity) {
    369   if(capacity <= US_STACKBUF_SIZE) {
    370     fFlags = kShortString;
    371   } else {
    372     // count bytes for the refCounter and the string capacity, and
    373     // round up to a multiple of 16; then divide by 4 and allocate int32_t's
    374     // to be safely aligned for the refCount
    375     int32_t words = (int32_t)(((sizeof(int32_t) + capacity * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
    376     int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
    377     if(array != 0) {
    378       // set initial refCount and point behind the refCount
    379       *array++ = 1;
    380 
    381       // have fArray point to the first UChar
    382       fUnion.fFields.fArray = (UChar *)array;
    383       fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
    384       fFlags = kLongString;
    385     } else {
    386       fShortLength = 0;
    387       fUnion.fFields.fArray = 0;
    388       fUnion.fFields.fCapacity = 0;
    389       fFlags = kIsBogus;
    390       return FALSE;
    391     }
    392   }
    393   return TRUE;
    394 }
    395 
    396 //========================================
    397 // Destructor
    398 //========================================
    399 UnicodeString::~UnicodeString()
    400 {
    401   releaseArray();
    402 }
    403 
    404 //========================================
    405 // Factory methods
    406 //========================================
    407 
    408 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
    409   UnicodeString result;
    410   result.setToUTF8(utf8);
    411   return result;
    412 }
    413 
    414 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
    415   UnicodeString result;
    416   int32_t capacity;
    417   // Most UTF-32 strings will be BMP-only and result in a same-length
    418   // UTF-16 string. We overestimate the capacity just slightly,
    419   // just in case there are a few supplementary characters.
    420   if(length <= US_STACKBUF_SIZE) {
    421     capacity = US_STACKBUF_SIZE;
    422   } else {
    423     capacity = length + (length >> 4) + 4;
    424   }
    425   do {
    426     UChar *utf16 = result.getBuffer(capacity);
    427     int32_t length16;
    428     UErrorCode errorCode = U_ZERO_ERROR;
    429     u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
    430         utf32, length,
    431         0xfffd,  // Substitution character.
    432         NULL,    // Don't care about number of substitutions.
    433         &errorCode);
    434     result.releaseBuffer(length16);
    435     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
    436       capacity = length16 + 1;  // +1 for the terminating NUL.
    437       continue;
    438     } else if(U_FAILURE(errorCode)) {
    439       result.setToBogus();
    440     }
    441     break;
    442   } while(TRUE);
    443   return result;
    444 }
    445 
    446 //========================================
    447 // Assignment
    448 //========================================
    449 
    450 UnicodeString &
    451 UnicodeString::operator=(const UnicodeString &src) {
    452   return copyFrom(src);
    453 }
    454 
    455 UnicodeString &
    456 UnicodeString::fastCopyFrom(const UnicodeString &src) {
    457   return copyFrom(src, TRUE);
    458 }
    459 
    460 UnicodeString &
    461 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
    462   // if assigning to ourselves, do nothing
    463   if(this == 0 || this == &src) {
    464     return *this;
    465   }
    466 
    467   // is the right side bogus?
    468   if(&src == 0 || src.isBogus()) {
    469     setToBogus();
    470     return *this;
    471   }
    472 
    473   // delete the current contents
    474   releaseArray();
    475 
    476   if(src.isEmpty()) {
    477     // empty string - use the stack buffer
    478     setToEmpty();
    479     return *this;
    480   }
    481 
    482   // we always copy the length
    483   int32_t srcLength = src.length();
    484   setLength(srcLength);
    485 
    486   // fLength>0 and not an "open" src.getBuffer(minCapacity)
    487   switch(src.fFlags) {
    488   case kShortString:
    489     // short string using the stack buffer, do the same
    490     fFlags = kShortString;
    491     uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, fShortLength * U_SIZEOF_UCHAR);
    492     break;
    493   case kLongString:
    494     // src uses a refCounted string buffer, use that buffer with refCount
    495     // src is const, use a cast - we don't really change it
    496     ((UnicodeString &)src).addRef();
    497     // copy all fields, share the reference-counted buffer
    498     fUnion.fFields.fArray = src.fUnion.fFields.fArray;
    499     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
    500     fFlags = src.fFlags;
    501     break;
    502   case kReadonlyAlias:
    503     if(fastCopy) {
    504       // src is a readonly alias, do the same
    505       // -> maintain the readonly alias as such
    506       fUnion.fFields.fArray = src.fUnion.fFields.fArray;
    507       fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
    508       fFlags = src.fFlags;
    509       break;
    510     }
    511     // else if(!fastCopy) fall through to case kWritableAlias
    512     // -> allocate a new buffer and copy the contents
    513   case kWritableAlias:
    514     // src is a writable alias; we make a copy of that instead
    515     if(allocate(srcLength)) {
    516       uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
    517       break;
    518     }
    519     // if there is not enough memory, then fall through to setting to bogus
    520   default:
    521     // if src is bogus, set ourselves to bogus
    522     // do not call setToBogus() here because fArray and fFlags are not consistent here
    523     fShortLength = 0;
    524     fUnion.fFields.fArray = 0;
    525     fUnion.fFields.fCapacity = 0;
    526     fFlags = kIsBogus;
    527     break;
    528   }
    529 
    530   return *this;
    531 }
    532 
    533 //========================================
    534 // Miscellaneous operations
    535 //========================================
    536 
    537 UnicodeString UnicodeString::unescape() const {
    538     UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
    539     const UChar *array = getBuffer();
    540     int32_t len = length();
    541     int32_t prev = 0;
    542     for (int32_t i=0;;) {
    543         if (i == len) {
    544             result.append(array, prev, len - prev);
    545             break;
    546         }
    547         if (array[i++] == 0x5C /*'\\'*/) {
    548             result.append(array, prev, (i - 1) - prev);
    549             UChar32 c = unescapeAt(i); // advances i
    550             if (c < 0) {
    551                 result.remove(); // return empty string
    552                 break; // invalid escape sequence
    553             }
    554             result.append(c);
    555             prev = i;
    556         }
    557     }
    558     return result;
    559 }
    560 
    561 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
    562     return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
    563 }
    564 
    565 //========================================
    566 // Read-only implementation
    567 //========================================
    568 int8_t
    569 UnicodeString::doCompare( int32_t start,
    570               int32_t length,
    571               const UChar *srcChars,
    572               int32_t srcStart,
    573               int32_t srcLength) const
    574 {
    575   // compare illegal string values
    576   // treat const UChar *srcChars==NULL as an empty string
    577   if(isBogus()) {
    578     return -1;
    579   }
    580 
    581   // pin indices to legal values
    582   pinIndices(start, length);
    583 
    584   if(srcChars == NULL) {
    585     srcStart = srcLength = 0;
    586   }
    587 
    588   // get the correct pointer
    589   const UChar *chars = getArrayStart();
    590 
    591   chars += start;
    592   srcChars += srcStart;
    593 
    594   int32_t minLength;
    595   int8_t lengthResult;
    596 
    597   // get the srcLength if necessary
    598   if(srcLength < 0) {
    599     srcLength = u_strlen(srcChars + srcStart);
    600   }
    601 
    602   // are we comparing different lengths?
    603   if(length != srcLength) {
    604     if(length < srcLength) {
    605       minLength = length;
    606       lengthResult = -1;
    607     } else {
    608       minLength = srcLength;
    609       lengthResult = 1;
    610     }
    611   } else {
    612     minLength = length;
    613     lengthResult = 0;
    614   }
    615 
    616   /*
    617    * note that uprv_memcmp() returns an int but we return an int8_t;
    618    * we need to take care not to truncate the result -
    619    * one way to do this is to right-shift the value to
    620    * move the sign bit into the lower 8 bits and making sure that this
    621    * does not become 0 itself
    622    */
    623 
    624   if(minLength > 0 && chars != srcChars) {
    625     int32_t result;
    626 
    627 #   if U_IS_BIG_ENDIAN
    628       // big-endian: byte comparison works
    629       result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
    630       if(result != 0) {
    631         return (int8_t)(result >> 15 | 1);
    632       }
    633 #   else
    634       // little-endian: compare UChar units
    635       do {
    636         result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
    637         if(result != 0) {
    638           return (int8_t)(result >> 15 | 1);
    639         }
    640       } while(--minLength > 0);
    641 #   endif
    642   }
    643   return lengthResult;
    644 }
    645 
    646 /* String compare in code point order - doCompare() compares in code unit order. */
    647 int8_t
    648 UnicodeString::doCompareCodePointOrder(int32_t start,
    649                                        int32_t length,
    650                                        const UChar *srcChars,
    651                                        int32_t srcStart,
    652                                        int32_t srcLength) const
    653 {
    654   // compare illegal string values
    655   // treat const UChar *srcChars==NULL as an empty string
    656   if(isBogus()) {
    657     return -1;
    658   }
    659 
    660   // pin indices to legal values
    661   pinIndices(start, length);
    662 
    663   if(srcChars == NULL) {
    664     srcStart = srcLength = 0;
    665   }
    666 
    667   int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
    668   /* translate the 32-bit result into an 8-bit one */
    669   if(diff!=0) {
    670     return (int8_t)(diff >> 15 | 1);
    671   } else {
    672     return 0;
    673   }
    674 }
    675 
    676 int32_t
    677 UnicodeString::getLength() const {
    678     return length();
    679 }
    680 
    681 UChar
    682 UnicodeString::getCharAt(int32_t offset) const {
    683   return charAt(offset);
    684 }
    685 
    686 UChar32
    687 UnicodeString::getChar32At(int32_t offset) const {
    688   return char32At(offset);
    689 }
    690 
    691 int32_t
    692 UnicodeString::countChar32(int32_t start, int32_t length) const {
    693   pinIndices(start, length);
    694   // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
    695   return u_countChar32(getArrayStart()+start, length);
    696 }
    697 
    698 UBool
    699 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
    700   pinIndices(start, length);
    701   // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
    702   return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
    703 }
    704 
    705 int32_t
    706 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
    707   // pin index
    708   int32_t len = length();
    709   if(index<0) {
    710     index=0;
    711   } else if(index>len) {
    712     index=len;
    713   }
    714 
    715   const UChar *array = getArrayStart();
    716   if(delta>0) {
    717     UTF_FWD_N(array, index, len, delta);
    718   } else {
    719     UTF_BACK_N(array, 0, index, -delta);
    720   }
    721 
    722   return index;
    723 }
    724 
    725 void
    726 UnicodeString::doExtract(int32_t start,
    727              int32_t length,
    728              UChar *dst,
    729              int32_t dstStart) const
    730 {
    731   // pin indices to legal values
    732   pinIndices(start, length);
    733 
    734   // do not copy anything if we alias dst itself
    735   const UChar *array = getArrayStart();
    736   if(array + start != dst + dstStart) {
    737     us_arrayCopy(array, start, dst, dstStart, length);
    738   }
    739 }
    740 
    741 int32_t
    742 UnicodeString::extract(UChar *dest, int32_t destCapacity,
    743                        UErrorCode &errorCode) const {
    744   int32_t len = length();
    745   if(U_SUCCESS(errorCode)) {
    746     if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
    747       errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    748     } else {
    749       const UChar *array = getArrayStart();
    750       if(len>0 && len<=destCapacity && array!=dest) {
    751         uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
    752       }
    753       return u_terminateUChars(dest, destCapacity, len, &errorCode);
    754     }
    755   }
    756 
    757   return len;
    758 }
    759 
    760 int32_t
    761 UnicodeString::extract(int32_t start,
    762                        int32_t length,
    763                        char *target,
    764                        int32_t targetCapacity,
    765                        enum EInvariant) const
    766 {
    767   // if the arguments are illegal, then do nothing
    768   if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
    769     return 0;
    770   }
    771 
    772   // pin the indices to legal values
    773   pinIndices(start, length);
    774 
    775   if(length <= targetCapacity) {
    776     u_UCharsToChars(getArrayStart() + start, target, length);
    777   }
    778   UErrorCode status = U_ZERO_ERROR;
    779   return u_terminateChars(target, targetCapacity, length, &status);
    780 }
    781 
    782 int32_t
    783 UnicodeString::toUTF8(int32_t start, int32_t len,
    784                       char *target, int32_t capacity) const {
    785   pinIndices(start, len);
    786   int32_t length8;
    787   UErrorCode errorCode = U_ZERO_ERROR;
    788   u_strToUTF8WithSub(target, capacity, &length8,
    789                      getBuffer() + start, len,
    790                      0xFFFD,  // Standard substitution character.
    791                      NULL,    // Don't care about number of substitutions.
    792                      &errorCode);
    793   return length8;
    794 }
    795 
    796 #if U_CHARSET_IS_UTF8
    797 
    798 int32_t
    799 UnicodeString::extract(int32_t start, int32_t len,
    800                        char *target, uint32_t dstSize) const {
    801   // if the arguments are illegal, then do nothing
    802   if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
    803     return 0;
    804   }
    805   return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
    806 }
    807 
    808 // else see unistr_cnv.cpp
    809 #endif
    810 
    811 void
    812 UnicodeString::extractBetween(int32_t start,
    813                   int32_t limit,
    814                   UnicodeString& target) const {
    815   pinIndex(start);
    816   pinIndex(limit);
    817   doExtract(start, limit - start, target);
    818 }
    819 
    820 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
    821 // as many bytes as the source has UChars.
    822 // The "worst cases" are writing systems like Indic, Thai and CJK with
    823 // 3:1 bytes:UChars.
    824 void
    825 UnicodeString::toUTF8(ByteSink &sink) const {
    826   int32_t length16 = length();
    827   if(length16 != 0) {
    828     char stackBuffer[1024];
    829     int32_t capacity = (int32_t)sizeof(stackBuffer);
    830     UBool utf8IsOwned = FALSE;
    831     char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
    832                                       3*length16,
    833                                       stackBuffer, capacity,
    834                                       &capacity);
    835     int32_t length8 = 0;
    836     UErrorCode errorCode = U_ZERO_ERROR;
    837     u_strToUTF8WithSub(utf8, capacity, &length8,
    838                        getBuffer(), length16,
    839                        0xFFFD,  // Standard substitution character.
    840                        NULL,    // Don't care about number of substitutions.
    841                        &errorCode);
    842     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
    843       utf8 = (char *)uprv_malloc(length8);
    844       if(utf8 != NULL) {
    845         utf8IsOwned = TRUE;
    846         errorCode = U_ZERO_ERROR;
    847         u_strToUTF8WithSub(utf8, length8, &length8,
    848                            getBuffer(), length16,
    849                            0xFFFD,  // Standard substitution character.
    850                            NULL,    // Don't care about number of substitutions.
    851                            &errorCode);
    852       } else {
    853         errorCode = U_MEMORY_ALLOCATION_ERROR;
    854       }
    855     }
    856     if(U_SUCCESS(errorCode)) {
    857       sink.Append(utf8, length8);
    858     }
    859     if(utf8IsOwned) {
    860       uprv_free(utf8);
    861     }
    862   }
    863 }
    864 
    865 int32_t
    866 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
    867   int32_t length32=0;
    868   if(U_SUCCESS(errorCode)) {
    869     // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
    870     u_strToUTF32WithSub(utf32, capacity, &length32,
    871         getBuffer(), length(),
    872         0xfffd,  // Substitution character.
    873         NULL,    // Don't care about number of substitutions.
    874         &errorCode);
    875   }
    876   return length32;
    877 }
    878 
    879 int32_t
    880 UnicodeString::indexOf(const UChar *srcChars,
    881                int32_t srcStart,
    882                int32_t srcLength,
    883                int32_t start,
    884                int32_t length) const
    885 {
    886   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
    887     return -1;
    888   }
    889 
    890   // UnicodeString does not find empty substrings
    891   if(srcLength < 0 && srcChars[srcStart] == 0) {
    892     return -1;
    893   }
    894 
    895   // get the indices within bounds
    896   pinIndices(start, length);
    897 
    898   // find the first occurrence of the substring
    899   const UChar *array = getArrayStart();
    900   const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
    901   if(match == NULL) {
    902     return -1;
    903   } else {
    904     return (int32_t)(match - array);
    905   }
    906 }
    907 
    908 int32_t
    909 UnicodeString::doIndexOf(UChar c,
    910              int32_t start,
    911              int32_t length) const
    912 {
    913   // pin indices
    914   pinIndices(start, length);
    915 
    916   // find the first occurrence of c
    917   const UChar *array = getArrayStart();
    918   const UChar *match = u_memchr(array + start, c, length);
    919   if(match == NULL) {
    920     return -1;
    921   } else {
    922     return (int32_t)(match - array);
    923   }
    924 }
    925 
    926 int32_t
    927 UnicodeString::doIndexOf(UChar32 c,
    928                          int32_t start,
    929                          int32_t length) const {
    930   // pin indices
    931   pinIndices(start, length);
    932 
    933   // find the first occurrence of c
    934   const UChar *array = getArrayStart();
    935   const UChar *match = u_memchr32(array + start, c, length);
    936   if(match == NULL) {
    937     return -1;
    938   } else {
    939     return (int32_t)(match - array);
    940   }
    941 }
    942 
    943 int32_t
    944 UnicodeString::lastIndexOf(const UChar *srcChars,
    945                int32_t srcStart,
    946                int32_t srcLength,
    947                int32_t start,
    948                int32_t length) const
    949 {
    950   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
    951     return -1;
    952   }
    953 
    954   // UnicodeString does not find empty substrings
    955   if(srcLength < 0 && srcChars[srcStart] == 0) {
    956     return -1;
    957   }
    958 
    959   // get the indices within bounds
    960   pinIndices(start, length);
    961 
    962   // find the last occurrence of the substring
    963   const UChar *array = getArrayStart();
    964   const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
    965   if(match == NULL) {
    966     return -1;
    967   } else {
    968     return (int32_t)(match - array);
    969   }
    970 }
    971 
    972 int32_t
    973 UnicodeString::doLastIndexOf(UChar c,
    974                  int32_t start,
    975                  int32_t length) const
    976 {
    977   if(isBogus()) {
    978     return -1;
    979   }
    980 
    981   // pin indices
    982   pinIndices(start, length);
    983 
    984   // find the last occurrence of c
    985   const UChar *array = getArrayStart();
    986   const UChar *match = u_memrchr(array + start, c, length);
    987   if(match == NULL) {
    988     return -1;
    989   } else {
    990     return (int32_t)(match - array);
    991   }
    992 }
    993 
    994 int32_t
    995 UnicodeString::doLastIndexOf(UChar32 c,
    996                              int32_t start,
    997                              int32_t length) const {
    998   // pin indices
    999   pinIndices(start, length);
   1000 
   1001   // find the last occurrence of c
   1002   const UChar *array = getArrayStart();
   1003   const UChar *match = u_memrchr32(array + start, c, length);
   1004   if(match == NULL) {
   1005     return -1;
   1006   } else {
   1007     return (int32_t)(match - array);
   1008   }
   1009 }
   1010 
   1011 //========================================
   1012 // Write implementation
   1013 //========================================
   1014 
   1015 UnicodeString&
   1016 UnicodeString::findAndReplace(int32_t start,
   1017                   int32_t length,
   1018                   const UnicodeString& oldText,
   1019                   int32_t oldStart,
   1020                   int32_t oldLength,
   1021                   const UnicodeString& newText,
   1022                   int32_t newStart,
   1023                   int32_t newLength)
   1024 {
   1025   if(isBogus() || oldText.isBogus() || newText.isBogus()) {
   1026     return *this;
   1027   }
   1028 
   1029   pinIndices(start, length);
   1030   oldText.pinIndices(oldStart, oldLength);
   1031   newText.pinIndices(newStart, newLength);
   1032 
   1033   if(oldLength == 0) {
   1034     return *this;
   1035   }
   1036 
   1037   while(length > 0 && length >= oldLength) {
   1038     int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
   1039     if(pos < 0) {
   1040       // no more oldText's here: done
   1041       break;
   1042     } else {
   1043       // we found oldText, replace it by newText and go beyond it
   1044       replace(pos, oldLength, newText, newStart, newLength);
   1045       length -= pos + oldLength - start;
   1046       start = pos + newLength;
   1047     }
   1048   }
   1049 
   1050   return *this;
   1051 }
   1052 
   1053 
   1054 void
   1055 UnicodeString::setToBogus()
   1056 {
   1057   releaseArray();
   1058 
   1059   fShortLength = 0;
   1060   fUnion.fFields.fArray = 0;
   1061   fUnion.fFields.fCapacity = 0;
   1062   fFlags = kIsBogus;
   1063 }
   1064 
   1065 // turn a bogus string into an empty one
   1066 void
   1067 UnicodeString::unBogus() {
   1068   if(fFlags & kIsBogus) {
   1069     setToEmpty();
   1070   }
   1071 }
   1072 
   1073 // setTo() analogous to the readonly-aliasing constructor with the same signature
   1074 UnicodeString &
   1075 UnicodeString::setTo(UBool isTerminated,
   1076                      const UChar *text,
   1077                      int32_t textLength)
   1078 {
   1079   if(fFlags & kOpenGetBuffer) {
   1080     // do not modify a string that has an "open" getBuffer(minCapacity)
   1081     return *this;
   1082   }
   1083 
   1084   if(text == NULL) {
   1085     // treat as an empty string, do not alias
   1086     releaseArray();
   1087     setToEmpty();
   1088     return *this;
   1089   }
   1090 
   1091   if( textLength < -1 ||
   1092       (textLength == -1 && !isTerminated) ||
   1093       (textLength >= 0 && isTerminated && text[textLength] != 0)
   1094   ) {
   1095     setToBogus();
   1096     return *this;
   1097   }
   1098 
   1099   releaseArray();
   1100 
   1101   if(textLength == -1) {
   1102     // text is terminated, or else it would have failed the above test
   1103     textLength = u_strlen(text);
   1104   }
   1105   setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
   1106 
   1107   fFlags = kReadonlyAlias;
   1108   return *this;
   1109 }
   1110 
   1111 // setTo() analogous to the writable-aliasing constructor with the same signature
   1112 UnicodeString &
   1113 UnicodeString::setTo(UChar *buffer,
   1114                      int32_t buffLength,
   1115                      int32_t buffCapacity) {
   1116   if(fFlags & kOpenGetBuffer) {
   1117     // do not modify a string that has an "open" getBuffer(minCapacity)
   1118     return *this;
   1119   }
   1120 
   1121   if(buffer == NULL) {
   1122     // treat as an empty string, do not alias
   1123     releaseArray();
   1124     setToEmpty();
   1125     return *this;
   1126   }
   1127 
   1128   if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
   1129     setToBogus();
   1130     return *this;
   1131   } else if(buffLength == -1) {
   1132     // buffLength = u_strlen(buff); but do not look beyond buffCapacity
   1133     const UChar *p = buffer, *limit = buffer + buffCapacity;
   1134     while(p != limit && *p != 0) {
   1135       ++p;
   1136     }
   1137     buffLength = (int32_t)(p - buffer);
   1138   }
   1139 
   1140   releaseArray();
   1141 
   1142   setArray(buffer, buffLength, buffCapacity);
   1143   fFlags = kWritableAlias;
   1144   return *this;
   1145 }
   1146 
   1147 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
   1148   unBogus();
   1149   int32_t length = utf8.length();
   1150   int32_t capacity;
   1151   // The UTF-16 string will be at most as long as the UTF-8 string.
   1152   if(length <= US_STACKBUF_SIZE) {
   1153     capacity = US_STACKBUF_SIZE;
   1154   } else {
   1155     capacity = length + 1;  // +1 for the terminating NUL.
   1156   }
   1157   UChar *utf16 = getBuffer(capacity);
   1158   int32_t length16;
   1159   UErrorCode errorCode = U_ZERO_ERROR;
   1160   u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
   1161       utf8.data(), length,
   1162       0xfffd,  // Substitution character.
   1163       NULL,    // Don't care about number of substitutions.
   1164       &errorCode);
   1165   releaseBuffer(length16);
   1166   if(U_FAILURE(errorCode)) {
   1167     setToBogus();
   1168   }
   1169   return *this;
   1170 }
   1171 
   1172 UnicodeString&
   1173 UnicodeString::setCharAt(int32_t offset,
   1174              UChar c)
   1175 {
   1176   int32_t len = length();
   1177   if(cloneArrayIfNeeded() && len > 0) {
   1178     if(offset < 0) {
   1179       offset = 0;
   1180     } else if(offset >= len) {
   1181       offset = len - 1;
   1182     }
   1183 
   1184     getArrayStart()[offset] = c;
   1185   }
   1186   return *this;
   1187 }
   1188 
   1189 UnicodeString&
   1190 UnicodeString::doReplace( int32_t start,
   1191               int32_t length,
   1192               const UnicodeString& src,
   1193               int32_t srcStart,
   1194               int32_t srcLength)
   1195 {
   1196   if(!src.isBogus()) {
   1197     // pin the indices to legal values
   1198     src.pinIndices(srcStart, srcLength);
   1199 
   1200     // get the characters from src
   1201     // and replace the range in ourselves with them
   1202     return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
   1203   } else {
   1204     // remove the range
   1205     return doReplace(start, length, 0, 0, 0);
   1206   }
   1207 }
   1208 
   1209 UnicodeString&
   1210 UnicodeString::doReplace(int32_t start,
   1211              int32_t length,
   1212              const UChar *srcChars,
   1213              int32_t srcStart,
   1214              int32_t srcLength)
   1215 {
   1216   if(!isWritable()) {
   1217     return *this;
   1218   }
   1219 
   1220   if(srcChars == 0) {
   1221     srcStart = srcLength = 0;
   1222   } else if(srcLength < 0) {
   1223     // get the srcLength if necessary
   1224     srcLength = u_strlen(srcChars + srcStart);
   1225   }
   1226 
   1227   int32_t oldLength = this->length();
   1228 
   1229   // calculate the size of the string after the replace
   1230   int32_t newSize;
   1231 
   1232   // optimize append() onto a large-enough, owned string
   1233   if(start >= oldLength) {
   1234     newSize = oldLength + srcLength;
   1235     if(newSize <= getCapacity() && isBufferWritable()) {
   1236       us_arrayCopy(srcChars, srcStart, getArrayStart(), oldLength, srcLength);
   1237       setLength(newSize);
   1238       return *this;
   1239     } else {
   1240       // pin the indices to legal values
   1241       start = oldLength;
   1242       length = 0;
   1243     }
   1244   } else {
   1245     // pin the indices to legal values
   1246     pinIndices(start, length);
   1247 
   1248     newSize = oldLength - length + srcLength;
   1249   }
   1250 
   1251   // the following may change fArray but will not copy the current contents;
   1252   // therefore we need to keep the current fArray
   1253   UChar oldStackBuffer[US_STACKBUF_SIZE];
   1254   UChar *oldArray;
   1255   if((fFlags&kUsingStackBuffer) && (newSize > US_STACKBUF_SIZE)) {
   1256     // copy the stack buffer contents because it will be overwritten with
   1257     // fUnion.fFields values
   1258     u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
   1259     oldArray = oldStackBuffer;
   1260   } else {
   1261     oldArray = getArrayStart();
   1262   }
   1263 
   1264   // clone our array and allocate a bigger array if needed
   1265   int32_t *bufferToDelete = 0;
   1266   if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize,
   1267                          FALSE, &bufferToDelete)
   1268   ) {
   1269     return *this;
   1270   }
   1271 
   1272   // now do the replace
   1273 
   1274   UChar *newArray = getArrayStart();
   1275   if(newArray != oldArray) {
   1276     // if fArray changed, then we need to copy everything except what will change
   1277     us_arrayCopy(oldArray, 0, newArray, 0, start);
   1278     us_arrayCopy(oldArray, start + length,
   1279                  newArray, start + srcLength,
   1280                  oldLength - (start + length));
   1281   } else if(length != srcLength) {
   1282     // fArray did not change; copy only the portion that isn't changing, leaving a hole
   1283     us_arrayCopy(oldArray, start + length,
   1284                  newArray, start + srcLength,
   1285                  oldLength - (start + length));
   1286   }
   1287 
   1288   // now fill in the hole with the new string
   1289   us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
   1290 
   1291   setLength(newSize);
   1292 
   1293   // delayed delete in case srcChars == fArray when we started, and
   1294   // to keep oldArray alive for the above operations
   1295   if (bufferToDelete) {
   1296     uprv_free(bufferToDelete);
   1297   }
   1298 
   1299   return *this;
   1300 }
   1301 
   1302 /**
   1303  * Replaceable API
   1304  */
   1305 void
   1306 UnicodeString::handleReplaceBetween(int32_t start,
   1307                                     int32_t limit,
   1308                                     const UnicodeString& text) {
   1309     replaceBetween(start, limit, text);
   1310 }
   1311 
   1312 /**
   1313  * Replaceable API
   1314  */
   1315 void
   1316 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
   1317     if (limit <= start) {
   1318         return; // Nothing to do; avoid bogus malloc call
   1319     }
   1320     UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
   1321     // Check to make sure text is not null.
   1322     if (text != NULL) {
   1323 	    extractBetween(start, limit, text, 0);
   1324 	    insert(dest, text, 0, limit - start);
   1325 	    uprv_free(text);
   1326     }
   1327 }
   1328 
   1329 /**
   1330  * Replaceable API
   1331  *
   1332  * NOTE: This is for the Replaceable class.  There is no rep.cpp,
   1333  * so we implement this function here.
   1334  */
   1335 UBool Replaceable::hasMetaData() const {
   1336     return TRUE;
   1337 }
   1338 
   1339 /**
   1340  * Replaceable API
   1341  */
   1342 UBool UnicodeString::hasMetaData() const {
   1343     return FALSE;
   1344 }
   1345 
   1346 UnicodeString&
   1347 UnicodeString::doReverse(int32_t start,
   1348              int32_t length)
   1349 {
   1350   if(this->length() <= 1 || !cloneArrayIfNeeded()) {
   1351     return *this;
   1352   }
   1353 
   1354   // pin the indices to legal values
   1355   pinIndices(start, length);
   1356 
   1357   UChar *left = getArrayStart() + start;
   1358   UChar *right = left + length;
   1359   UChar swap;
   1360   UBool hasSupplementary = FALSE;
   1361 
   1362   while(left < --right) {
   1363     hasSupplementary |= (UBool)UTF_IS_LEAD(swap = *left);
   1364     hasSupplementary |= (UBool)UTF_IS_LEAD(*left++ = *right);
   1365     *right = swap;
   1366   }
   1367 
   1368   /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
   1369   if(hasSupplementary) {
   1370     UChar swap2;
   1371 
   1372     left = getArrayStart() + start;
   1373     right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
   1374     while(left < right) {
   1375       if(UTF_IS_TRAIL(swap = *left) && UTF_IS_LEAD(swap2 = *(left + 1))) {
   1376         *left++ = swap2;
   1377         *left++ = swap;
   1378       } else {
   1379         ++left;
   1380       }
   1381     }
   1382   }
   1383 
   1384   return *this;
   1385 }
   1386 
   1387 UBool
   1388 UnicodeString::padLeading(int32_t targetLength,
   1389                           UChar padChar)
   1390 {
   1391   int32_t oldLength = length();
   1392   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
   1393     return FALSE;
   1394   } else {
   1395     // move contents up by padding width
   1396     UChar *array = getArrayStart();
   1397     int32_t start = targetLength - oldLength;
   1398     us_arrayCopy(array, 0, array, start, oldLength);
   1399 
   1400     // fill in padding character
   1401     while(--start >= 0) {
   1402       array[start] = padChar;
   1403     }
   1404     setLength(targetLength);
   1405     return TRUE;
   1406   }
   1407 }
   1408 
   1409 UBool
   1410 UnicodeString::padTrailing(int32_t targetLength,
   1411                            UChar padChar)
   1412 {
   1413   int32_t oldLength = length();
   1414   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
   1415     return FALSE;
   1416   } else {
   1417     // fill in padding character
   1418     UChar *array = getArrayStart();
   1419     int32_t length = targetLength;
   1420     while(--length >= oldLength) {
   1421       array[length] = padChar;
   1422     }
   1423     setLength(targetLength);
   1424     return TRUE;
   1425   }
   1426 }
   1427 
   1428 //========================================
   1429 // Hashing
   1430 //========================================
   1431 int32_t
   1432 UnicodeString::doHashCode() const
   1433 {
   1434     /* Delegate hash computation to uhash.  This makes UnicodeString
   1435      * hashing consistent with UChar* hashing.  */
   1436     int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());
   1437     if (hashCode == kInvalidHashCode) {
   1438         hashCode = kEmptyHashCode;
   1439     }
   1440     return hashCode;
   1441 }
   1442 
   1443 //========================================
   1444 // External Buffer
   1445 //========================================
   1446 
   1447 UChar *
   1448 UnicodeString::getBuffer(int32_t minCapacity) {
   1449   if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
   1450     fFlags|=kOpenGetBuffer;
   1451     fShortLength=0;
   1452     return getArrayStart();
   1453   } else {
   1454     return 0;
   1455   }
   1456 }
   1457 
   1458 void
   1459 UnicodeString::releaseBuffer(int32_t newLength) {
   1460   if(fFlags&kOpenGetBuffer && newLength>=-1) {
   1461     // set the new fLength
   1462     int32_t capacity=getCapacity();
   1463     if(newLength==-1) {
   1464       // the new length is the string length, capped by fCapacity
   1465       const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
   1466       while(p<limit && *p!=0) {
   1467         ++p;
   1468       }
   1469       newLength=(int32_t)(p-array);
   1470     } else if(newLength>capacity) {
   1471       newLength=capacity;
   1472     }
   1473     setLength(newLength);
   1474     fFlags&=~kOpenGetBuffer;
   1475   }
   1476 }
   1477 
   1478 //========================================
   1479 // Miscellaneous
   1480 //========================================
   1481 UBool
   1482 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
   1483                                   int32_t growCapacity,
   1484                                   UBool doCopyArray,
   1485                                   int32_t **pBufferToDelete,
   1486                                   UBool forceClone) {
   1487   // default parameters need to be static, therefore
   1488   // the defaults are -1 to have convenience defaults
   1489   if(newCapacity == -1) {
   1490     newCapacity = getCapacity();
   1491   }
   1492 
   1493   // while a getBuffer(minCapacity) is "open",
   1494   // prevent any modifications of the string by returning FALSE here
   1495   // if the string is bogus, then only an assignment or similar can revive it
   1496   if(!isWritable()) {
   1497     return FALSE;
   1498   }
   1499 
   1500   /*
   1501    * We need to make a copy of the array if
   1502    * the buffer is read-only, or
   1503    * the buffer is refCounted (shared), and refCount>1, or
   1504    * the buffer is too small.
   1505    * Return FALSE if memory could not be allocated.
   1506    */
   1507   if(forceClone ||
   1508      fFlags & kBufferIsReadonly ||
   1509      fFlags & kRefCounted && refCount() > 1 ||
   1510      newCapacity > getCapacity()
   1511   ) {
   1512     // check growCapacity for default value and use of the stack buffer
   1513     if(growCapacity == -1) {
   1514       growCapacity = newCapacity;
   1515     } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
   1516       growCapacity = US_STACKBUF_SIZE;
   1517     }
   1518 
   1519     // save old values
   1520     UChar oldStackBuffer[US_STACKBUF_SIZE];
   1521     UChar *oldArray;
   1522     uint8_t flags = fFlags;
   1523 
   1524     if(flags&kUsingStackBuffer) {
   1525       if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
   1526         // copy the stack buffer contents because it will be overwritten with
   1527         // fUnion.fFields values
   1528         us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
   1529         oldArray = oldStackBuffer;
   1530       } else {
   1531         oldArray = 0; // no need to copy from stack buffer to itself
   1532       }
   1533     } else {
   1534       oldArray = fUnion.fFields.fArray;
   1535     }
   1536 
   1537     // allocate a new array
   1538     if(allocate(growCapacity) ||
   1539        newCapacity < growCapacity && allocate(newCapacity)
   1540     ) {
   1541       if(doCopyArray && oldArray != 0) {
   1542         // copy the contents
   1543         // do not copy more than what fits - it may be smaller than before
   1544         int32_t minLength = length();
   1545         newCapacity = getCapacity();
   1546         if(newCapacity < minLength) {
   1547           minLength = newCapacity;
   1548           setLength(minLength);
   1549         }
   1550         us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
   1551       } else {
   1552         fShortLength = 0;
   1553       }
   1554 
   1555       // release the old array
   1556       if(flags & kRefCounted) {
   1557         // the array is refCounted; decrement and release if 0
   1558         int32_t *pRefCount = ((int32_t *)oldArray - 1);
   1559         if(umtx_atomic_dec(pRefCount) == 0) {
   1560           if(pBufferToDelete == 0) {
   1561             uprv_free(pRefCount);
   1562           } else {
   1563             // the caller requested to delete it himself
   1564             *pBufferToDelete = pRefCount;
   1565           }
   1566         }
   1567       }
   1568     } else {
   1569       // not enough memory for growCapacity and not even for the smaller newCapacity
   1570       // reset the old values for setToBogus() to release the array
   1571       if(!(flags&kUsingStackBuffer)) {
   1572         fUnion.fFields.fArray = oldArray;
   1573       }
   1574       fFlags = flags;
   1575       setToBogus();
   1576       return FALSE;
   1577     }
   1578   }
   1579   return TRUE;
   1580 }
   1581 U_NAMESPACE_END
   1582 
   1583 #ifdef U_STATIC_IMPLEMENTATION
   1584 /*
   1585 This should never be called. It is defined here to make sure that the
   1586 virtual vector deleting destructor is defined within unistr.cpp.
   1587 The vector deleting destructor is already a part of UObject,
   1588 but defining it here makes sure that it is included with this object file.
   1589 This makes sure that static library dependencies are kept to a minimum.
   1590 */
   1591 static void uprv_UnicodeStringDummy(void) {
   1592     U_NAMESPACE_USE
   1593     delete [] (new UnicodeString[2]);
   1594 }
   1595 #endif
   1596 
   1597