Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 * Copyright (C) 1999-2011, International Business Machines Corporation and   *
      4 * others. All Rights Reserved.                                               *
      5 ******************************************************************************
      6 *
      7 * File unistr.cpp
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   09/25/98    stephen     Creation.
     13 *   04/20/99    stephen     Overhauled per 4/16 code review.
     14 *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
     15 *   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
     16 *                           Replaceable.
     17 *   06/25/01    grhoten     Removed the dependency on iostream
     18 ******************************************************************************
     19 */
     20 
     21 #include "unicode/utypes.h"
     22 #include "unicode/appendable.h"
     23 #include "unicode/putil.h"
     24 #include "cstring.h"
     25 #include "cmemory.h"
     26 #include "unicode/ustring.h"
     27 #include "unicode/unistr.h"
     28 #include "uhash.h"
     29 #include "ustr_imp.h"
     30 #include "umutex.h"
     31 
     32 #if 0
     33 
     34 #if U_IOSTREAM_SOURCE >= 199711
     35 #include <iostream>
     36 using namespace std;
     37 #elif U_IOSTREAM_SOURCE >= 198506
     38 #include <iostream.h>
     39 #endif
     40 
     41 //DEBUGGING
     42 void
     43 print(const UnicodeString& s,
     44       const char *name)
     45 {
     46   UChar c;
     47   cout << name << ":|";
     48   for(int i = 0; i < s.length(); ++i) {
     49     c = s[i];
     50     if(c>= 0x007E || c < 0x0020)
     51       cout << "[0x" << hex << s[i] << "]";
     52     else
     53       cout << (char) s[i];
     54   }
     55   cout << '|' << endl;
     56 }
     57 
     58 void
     59 print(const UChar *s,
     60       int32_t len,
     61       const char *name)
     62 {
     63   UChar c;
     64   cout << name << ":|";
     65   for(int i = 0; i < len; ++i) {
     66     c = s[i];
     67     if(c>= 0x007E || c < 0x0020)
     68       cout << "[0x" << hex << s[i] << "]";
     69     else
     70       cout << (char) s[i];
     71   }
     72   cout << '|' << endl;
     73 }
     74 // END DEBUGGING
     75 #endif
     76 
     77 // Local function definitions for now
     78 
     79 // need to copy areas that may overlap
     80 static
     81 inline void
     82 us_arrayCopy(const UChar *src, int32_t srcStart,
     83          UChar *dst, int32_t dstStart, int32_t count)
     84 {
     85   if(count>0) {
     86     uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
     87   }
     88 }
     89 
     90 // u_unescapeAt() callback to get a UChar from a UnicodeString
     91 U_CDECL_BEGIN
     92 static UChar U_CALLCONV
     93 UnicodeString_charAt(int32_t offset, void *context) {
     94     return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset);
     95 }
     96 U_CDECL_END
     97 
     98 U_NAMESPACE_BEGIN
     99 
    100 /* The Replaceable virtual destructor can't be defined in the header
    101    due to how AIX works with multiple definitions of virtual functions.
    102 */
    103 Replaceable::~Replaceable() {}
    104 Replaceable::Replaceable() {}
    105 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
    106 
    107 UnicodeString U_EXPORT2
    108 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
    109     return
    110         UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
    111             append(s1).
    112                 append(s2);
    113 }
    114 
    115 //========================================
    116 // Reference Counting functions, put at top of file so that optimizing compilers
    117 //                               have a chance to automatically inline.
    118 //========================================
    119 
    120 void
    121 UnicodeString::addRef()
    122 {  umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}
    123 
    124 int32_t
    125 UnicodeString::removeRef()
    126 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}
    127 
    128 int32_t
    129 UnicodeString::refCount() const
    130 {
    131     umtx_lock(NULL);
    132     // Note: without the lock to force a memory barrier, we might see a very
    133     //       stale value on some multi-processor systems.
    134     int32_t  count = *((int32_t *)fUnion.fFields.fArray - 1);
    135     umtx_unlock(NULL);
    136     return count;
    137  }
    138 
    139 void
    140 UnicodeString::releaseArray() {
    141   if((fFlags & kRefCounted) && removeRef() == 0) {
    142     uprv_free((int32_t *)fUnion.fFields.fArray - 1);
    143   }
    144 }
    145 
    146 
    147 
    148 //========================================
    149 // Constructors
    150 //========================================
    151 UnicodeString::UnicodeString()
    152   : fShortLength(0),
    153     fFlags(kShortString)
    154 {}
    155 
    156 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
    157   : fShortLength(0),
    158     fFlags(0)
    159 {
    160   if(count <= 0 || (uint32_t)c > 0x10ffff) {
    161     // just allocate and do not do anything else
    162     allocate(capacity);
    163   } else {
    164     // count > 0, allocate and fill the new string with count c's
    165     int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;
    166     if(capacity < length) {
    167       capacity = length;
    168     }
    169     if(allocate(capacity)) {
    170       UChar *array = getArrayStart();
    171       int32_t i = 0;
    172 
    173       // fill the new string with c
    174       if(unitCount == 1) {
    175         // fill with length UChars
    176         while(i < length) {
    177           array[i++] = (UChar)c;
    178         }
    179       } else {
    180         // get the code units for c
    181         UChar units[UTF_MAX_CHAR_LENGTH];
    182         UTF_APPEND_CHAR_UNSAFE(units, i, c);
    183 
    184         // now it must be i==unitCount
    185         i = 0;
    186 
    187         // for Unicode, unitCount can only be 1, 2, 3, or 4
    188         // 1 is handled above
    189         while(i < length) {
    190           int32_t unitIdx = 0;
    191           while(unitIdx < unitCount) {
    192             array[i++]=units[unitIdx++];
    193           }
    194         }
    195       }
    196     }
    197     setLength(length);
    198   }
    199 }
    200 
    201 UnicodeString::UnicodeString(UChar ch)
    202   : fShortLength(1),
    203     fFlags(kShortString)
    204 {
    205   fUnion.fStackBuffer[0] = ch;
    206 }
    207 
    208 UnicodeString::UnicodeString(UChar32 ch)
    209   : fShortLength(0),
    210     fFlags(kShortString)
    211 {
    212   int32_t i = 0;
    213   UBool isError = FALSE;
    214   U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
    215   fShortLength = (int8_t)i;
    216 }
    217 
    218 UnicodeString::UnicodeString(const UChar *text)
    219   : fShortLength(0),
    220     fFlags(kShortString)
    221 {
    222   doReplace(0, 0, text, 0, -1);
    223 }
    224 
    225 UnicodeString::UnicodeString(const UChar *text,
    226                              int32_t textLength)
    227   : fShortLength(0),
    228     fFlags(kShortString)
    229 {
    230   doReplace(0, 0, text, 0, textLength);
    231 }
    232 
    233 UnicodeString::UnicodeString(UBool isTerminated,
    234                              const UChar *text,
    235                              int32_t textLength)
    236   : fShortLength(0),
    237     fFlags(kReadonlyAlias)
    238 {
    239   if(text == NULL) {
    240     // treat as an empty string, do not alias
    241     setToEmpty();
    242   } else if(textLength < -1 ||
    243             (textLength == -1 && !isTerminated) ||
    244             (textLength >= 0 && isTerminated && text[textLength] != 0)
    245   ) {
    246     setToBogus();
    247   } else {
    248     if(textLength == -1) {
    249       // text is terminated, or else it would have failed the above test
    250       textLength = u_strlen(text);
    251     }
    252     setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
    253   }
    254 }
    255 
    256 UnicodeString::UnicodeString(UChar *buff,
    257                              int32_t buffLength,
    258                              int32_t buffCapacity)
    259   : fShortLength(0),
    260     fFlags(kWritableAlias)
    261 {
    262   if(buff == NULL) {
    263     // treat as an empty string, do not alias
    264     setToEmpty();
    265   } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
    266     setToBogus();
    267   } else {
    268     if(buffLength == -1) {
    269       // fLength = u_strlen(buff); but do not look beyond buffCapacity
    270       const UChar *p = buff, *limit = buff + buffCapacity;
    271       while(p != limit && *p != 0) {
    272         ++p;
    273       }
    274       buffLength = (int32_t)(p - buff);
    275     }
    276     setArray(buff, buffLength, buffCapacity);
    277   }
    278 }
    279 
    280 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
    281   : fShortLength(0),
    282     fFlags(kShortString)
    283 {
    284   if(src==NULL) {
    285     // treat as an empty string
    286   } else {
    287     if(length<0) {
    288       length=(int32_t)uprv_strlen(src);
    289     }
    290     if(cloneArrayIfNeeded(length, length, FALSE)) {
    291       u_charsToUChars(src, getArrayStart(), length);
    292       setLength(length);
    293     } else {
    294       setToBogus();
    295     }
    296   }
    297 }
    298 
    299 #if U_CHARSET_IS_UTF8
    300 
    301 UnicodeString::UnicodeString(const char *codepageData)
    302   : fShortLength(0),
    303     fFlags(kShortString) {
    304   if(codepageData != 0) {
    305     setToUTF8(codepageData);
    306   }
    307 }
    308 
    309 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
    310   : fShortLength(0),
    311     fFlags(kShortString) {
    312   // if there's nothing to convert, do nothing
    313   if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
    314     return;
    315   }
    316   if(dataLength == -1) {
    317     dataLength = (int32_t)uprv_strlen(codepageData);
    318   }
    319   setToUTF8(StringPiece(codepageData, dataLength));
    320 }
    321 
    322 // else see unistr_cnv.cpp
    323 #endif
    324 
    325 UnicodeString::UnicodeString(const UnicodeString& that)
    326   : Replaceable(),
    327     fShortLength(0),
    328     fFlags(kShortString)
    329 {
    330   copyFrom(that);
    331 }
    332 
    333 UnicodeString::UnicodeString(const UnicodeString& that,
    334                              int32_t srcStart)
    335   : Replaceable(),
    336     fShortLength(0),
    337     fFlags(kShortString)
    338 {
    339   setTo(that, srcStart);
    340 }
    341 
    342 UnicodeString::UnicodeString(const UnicodeString& that,
    343                              int32_t srcStart,
    344                              int32_t srcLength)
    345   : Replaceable(),
    346     fShortLength(0),
    347     fFlags(kShortString)
    348 {
    349   setTo(that, srcStart, srcLength);
    350 }
    351 
    352 // Replaceable base class clone() default implementation, does not clone
    353 Replaceable *
    354 Replaceable::clone() const {
    355   return NULL;
    356 }
    357 
    358 // UnicodeString overrides clone() with a real implementation
    359 Replaceable *
    360 UnicodeString::clone() const {
    361   return new UnicodeString(*this);
    362 }
    363 
    364 //========================================
    365 // array allocation
    366 //========================================
    367 
    368 UBool
    369 UnicodeString::allocate(int32_t capacity) {
    370   if(capacity <= US_STACKBUF_SIZE) {
    371     fFlags = kShortString;
    372   } else {
    373     // count bytes for the refCounter and the string capacity, and
    374     // round up to a multiple of 16; then divide by 4 and allocate int32_t's
    375     // to be safely aligned for the refCount
    376     // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
    377     int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
    378     int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
    379     if(array != 0) {
    380       // set initial refCount and point behind the refCount
    381       *array++ = 1;
    382 
    383       // have fArray point to the first UChar
    384       fUnion.fFields.fArray = (UChar *)array;
    385       fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
    386       fFlags = kLongString;
    387     } else {
    388       fShortLength = 0;
    389       fUnion.fFields.fArray = 0;
    390       fUnion.fFields.fCapacity = 0;
    391       fFlags = kIsBogus;
    392       return FALSE;
    393     }
    394   }
    395   return TRUE;
    396 }
    397 
    398 //========================================
    399 // Destructor
    400 //========================================
    401 UnicodeString::~UnicodeString()
    402 {
    403   releaseArray();
    404 }
    405 
    406 //========================================
    407 // Factory methods
    408 //========================================
    409 
    410 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
    411   UnicodeString result;
    412   result.setToUTF8(utf8);
    413   return result;
    414 }
    415 
    416 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
    417   UnicodeString result;
    418   int32_t capacity;
    419   // Most UTF-32 strings will be BMP-only and result in a same-length
    420   // UTF-16 string. We overestimate the capacity just slightly,
    421   // just in case there are a few supplementary characters.
    422   if(length <= US_STACKBUF_SIZE) {
    423     capacity = US_STACKBUF_SIZE;
    424   } else {
    425     capacity = length + (length >> 4) + 4;
    426   }
    427   do {
    428     UChar *utf16 = result.getBuffer(capacity);
    429     int32_t length16;
    430     UErrorCode errorCode = U_ZERO_ERROR;
    431     u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
    432         utf32, length,
    433         0xfffd,  // Substitution character.
    434         NULL,    // Don't care about number of substitutions.
    435         &errorCode);
    436     result.releaseBuffer(length16);
    437     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
    438       capacity = length16 + 1;  // +1 for the terminating NUL.
    439       continue;
    440     } else if(U_FAILURE(errorCode)) {
    441       result.setToBogus();
    442     }
    443     break;
    444   } while(TRUE);
    445   return result;
    446 }
    447 
    448 //========================================
    449 // Assignment
    450 //========================================
    451 
    452 UnicodeString &
    453 UnicodeString::operator=(const UnicodeString &src) {
    454   return copyFrom(src);
    455 }
    456 
    457 UnicodeString &
    458 UnicodeString::fastCopyFrom(const UnicodeString &src) {
    459   return copyFrom(src, TRUE);
    460 }
    461 
    462 UnicodeString &
    463 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
    464   // if assigning to ourselves, do nothing
    465   if(this == 0 || this == &src) {
    466     return *this;
    467   }
    468 
    469   // is the right side bogus?
    470   if(&src == 0 || src.isBogus()) {
    471     setToBogus();
    472     return *this;
    473   }
    474 
    475   // delete the current contents
    476   releaseArray();
    477 
    478   if(src.isEmpty()) {
    479     // empty string - use the stack buffer
    480     setToEmpty();
    481     return *this;
    482   }
    483 
    484   // we always copy the length
    485   int32_t srcLength = src.length();
    486   setLength(srcLength);
    487 
    488   // fLength>0 and not an "open" src.getBuffer(minCapacity)
    489   switch(src.fFlags) {
    490   case kShortString:
    491     // short string using the stack buffer, do the same
    492     fFlags = kShortString;
    493     uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
    494     break;
    495   case kLongString:
    496     // src uses a refCounted string buffer, use that buffer with refCount
    497     // src is const, use a cast - we don't really change it
    498     ((UnicodeString &)src).addRef();
    499     // copy all fields, share the reference-counted buffer
    500     fUnion.fFields.fArray = src.fUnion.fFields.fArray;
    501     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
    502     fFlags = src.fFlags;
    503     break;
    504   case kReadonlyAlias:
    505     if(fastCopy) {
    506       // src is a readonly alias, do the same
    507       // -> maintain the readonly alias as such
    508       fUnion.fFields.fArray = src.fUnion.fFields.fArray;
    509       fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
    510       fFlags = src.fFlags;
    511       break;
    512     }
    513     // else if(!fastCopy) fall through to case kWritableAlias
    514     // -> allocate a new buffer and copy the contents
    515   case kWritableAlias:
    516     // src is a writable alias; we make a copy of that instead
    517     if(allocate(srcLength)) {
    518       uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
    519       break;
    520     }
    521     // if there is not enough memory, then fall through to setting to bogus
    522   default:
    523     // if src is bogus, set ourselves to bogus
    524     // do not call setToBogus() here because fArray and fFlags are not consistent here
    525     fShortLength = 0;
    526     fUnion.fFields.fArray = 0;
    527     fUnion.fFields.fCapacity = 0;
    528     fFlags = kIsBogus;
    529     break;
    530   }
    531 
    532   return *this;
    533 }
    534 
    535 //========================================
    536 // Miscellaneous operations
    537 //========================================
    538 
    539 UnicodeString UnicodeString::unescape() const {
    540     UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
    541     const UChar *array = getBuffer();
    542     int32_t len = length();
    543     int32_t prev = 0;
    544     for (int32_t i=0;;) {
    545         if (i == len) {
    546             result.append(array, prev, len - prev);
    547             break;
    548         }
    549         if (array[i++] == 0x5C /*'\\'*/) {
    550             result.append(array, prev, (i - 1) - prev);
    551             UChar32 c = unescapeAt(i); // advances i
    552             if (c < 0) {
    553                 result.remove(); // return empty string
    554                 break; // invalid escape sequence
    555             }
    556             result.append(c);
    557             prev = i;
    558         }
    559     }
    560     return result;
    561 }
    562 
    563 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
    564     return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
    565 }
    566 
    567 //========================================
    568 // Read-only implementation
    569 //========================================
    570 int8_t
    571 UnicodeString::doCompare( int32_t start,
    572               int32_t length,
    573               const UChar *srcChars,
    574               int32_t srcStart,
    575               int32_t srcLength) const
    576 {
    577   // compare illegal string values
    578   // treat const UChar *srcChars==NULL as an empty string
    579   if(isBogus()) {
    580     return -1;
    581   }
    582 
    583   // pin indices to legal values
    584   pinIndices(start, length);
    585 
    586   if(srcChars == NULL) {
    587     srcStart = srcLength = 0;
    588   }
    589 
    590   // get the correct pointer
    591   const UChar *chars = getArrayStart();
    592 
    593   chars += start;
    594   srcChars += srcStart;
    595 
    596   int32_t minLength;
    597   int8_t lengthResult;
    598 
    599   // get the srcLength if necessary
    600   if(srcLength < 0) {
    601     srcLength = u_strlen(srcChars + srcStart);
    602   }
    603 
    604   // are we comparing different lengths?
    605   if(length != srcLength) {
    606     if(length < srcLength) {
    607       minLength = length;
    608       lengthResult = -1;
    609     } else {
    610       minLength = srcLength;
    611       lengthResult = 1;
    612     }
    613   } else {
    614     minLength = length;
    615     lengthResult = 0;
    616   }
    617 
    618   /*
    619    * note that uprv_memcmp() returns an int but we return an int8_t;
    620    * we need to take care not to truncate the result -
    621    * one way to do this is to right-shift the value to
    622    * move the sign bit into the lower 8 bits and making sure that this
    623    * does not become 0 itself
    624    */
    625 
    626   if(minLength > 0 && chars != srcChars) {
    627     int32_t result;
    628 
    629 #   if U_IS_BIG_ENDIAN
    630       // big-endian: byte comparison works
    631       result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
    632       if(result != 0) {
    633         return (int8_t)(result >> 15 | 1);
    634       }
    635 #   else
    636       // little-endian: compare UChar units
    637       do {
    638         result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
    639         if(result != 0) {
    640           return (int8_t)(result >> 15 | 1);
    641         }
    642       } while(--minLength > 0);
    643 #   endif
    644   }
    645   return lengthResult;
    646 }
    647 
    648 /* String compare in code point order - doCompare() compares in code unit order. */
    649 int8_t
    650 UnicodeString::doCompareCodePointOrder(int32_t start,
    651                                        int32_t length,
    652                                        const UChar *srcChars,
    653                                        int32_t srcStart,
    654                                        int32_t srcLength) const
    655 {
    656   // compare illegal string values
    657   // treat const UChar *srcChars==NULL as an empty string
    658   if(isBogus()) {
    659     return -1;
    660   }
    661 
    662   // pin indices to legal values
    663   pinIndices(start, length);
    664 
    665   if(srcChars == NULL) {
    666     srcStart = srcLength = 0;
    667   }
    668 
    669   int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
    670   /* translate the 32-bit result into an 8-bit one */
    671   if(diff!=0) {
    672     return (int8_t)(diff >> 15 | 1);
    673   } else {
    674     return 0;
    675   }
    676 }
    677 
    678 int32_t
    679 UnicodeString::getLength() const {
    680     return length();
    681 }
    682 
    683 UChar
    684 UnicodeString::getCharAt(int32_t offset) const {
    685   return charAt(offset);
    686 }
    687 
    688 UChar32
    689 UnicodeString::getChar32At(int32_t offset) const {
    690   return char32At(offset);
    691 }
    692 
    693 int32_t
    694 UnicodeString::countChar32(int32_t start, int32_t length) const {
    695   pinIndices(start, length);
    696   // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
    697   return u_countChar32(getArrayStart()+start, length);
    698 }
    699 
    700 UBool
    701 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
    702   pinIndices(start, length);
    703   // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
    704   return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
    705 }
    706 
    707 int32_t
    708 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
    709   // pin index
    710   int32_t len = length();
    711   if(index<0) {
    712     index=0;
    713   } else if(index>len) {
    714     index=len;
    715   }
    716 
    717   const UChar *array = getArrayStart();
    718   if(delta>0) {
    719     UTF_FWD_N(array, index, len, delta);
    720   } else {
    721     UTF_BACK_N(array, 0, index, -delta);
    722   }
    723 
    724   return index;
    725 }
    726 
    727 void
    728 UnicodeString::doExtract(int32_t start,
    729              int32_t length,
    730              UChar *dst,
    731              int32_t dstStart) const
    732 {
    733   // pin indices to legal values
    734   pinIndices(start, length);
    735 
    736   // do not copy anything if we alias dst itself
    737   const UChar *array = getArrayStart();
    738   if(array + start != dst + dstStart) {
    739     us_arrayCopy(array, start, dst, dstStart, length);
    740   }
    741 }
    742 
    743 int32_t
    744 UnicodeString::extract(UChar *dest, int32_t destCapacity,
    745                        UErrorCode &errorCode) const {
    746   int32_t len = length();
    747   if(U_SUCCESS(errorCode)) {
    748     if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
    749       errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    750     } else {
    751       const UChar *array = getArrayStart();
    752       if(len>0 && len<=destCapacity && array!=dest) {
    753         uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
    754       }
    755       return u_terminateUChars(dest, destCapacity, len, &errorCode);
    756     }
    757   }
    758 
    759   return len;
    760 }
    761 
    762 int32_t
    763 UnicodeString::extract(int32_t start,
    764                        int32_t length,
    765                        char *target,
    766                        int32_t targetCapacity,
    767                        enum EInvariant) const
    768 {
    769   // if the arguments are illegal, then do nothing
    770   if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
    771     return 0;
    772   }
    773 
    774   // pin the indices to legal values
    775   pinIndices(start, length);
    776 
    777   if(length <= targetCapacity) {
    778     u_UCharsToChars(getArrayStart() + start, target, length);
    779   }
    780   UErrorCode status = U_ZERO_ERROR;
    781   return u_terminateChars(target, targetCapacity, length, &status);
    782 }
    783 
    784 UnicodeString
    785 UnicodeString::tempSubString(int32_t start, int32_t len) const {
    786   pinIndices(start, len);
    787   const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
    788   if(array==NULL) {
    789     array=fUnion.fStackBuffer;  // anything not NULL because that would make an empty string
    790     len=-2;  // bogus result string
    791   }
    792   return UnicodeString(FALSE, array + start, len);
    793 }
    794 
    795 int32_t
    796 UnicodeString::toUTF8(int32_t start, int32_t len,
    797                       char *target, int32_t capacity) const {
    798   pinIndices(start, len);
    799   int32_t length8;
    800   UErrorCode errorCode = U_ZERO_ERROR;
    801   u_strToUTF8WithSub(target, capacity, &length8,
    802                      getBuffer() + start, len,
    803                      0xFFFD,  // Standard substitution character.
    804                      NULL,    // Don't care about number of substitutions.
    805                      &errorCode);
    806   return length8;
    807 }
    808 
    809 #if U_CHARSET_IS_UTF8
    810 
    811 int32_t
    812 UnicodeString::extract(int32_t start, int32_t len,
    813                        char *target, uint32_t dstSize) const {
    814   // if the arguments are illegal, then do nothing
    815   if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
    816     return 0;
    817   }
    818   return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
    819 }
    820 
    821 // else see unistr_cnv.cpp
    822 #endif
    823 
    824 void
    825 UnicodeString::extractBetween(int32_t start,
    826                   int32_t limit,
    827                   UnicodeString& target) const {
    828   pinIndex(start);
    829   pinIndex(limit);
    830   doExtract(start, limit - start, target);
    831 }
    832 
    833 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
    834 // as many bytes as the source has UChars.
    835 // The "worst cases" are writing systems like Indic, Thai and CJK with
    836 // 3:1 bytes:UChars.
    837 void
    838 UnicodeString::toUTF8(ByteSink &sink) const {
    839   int32_t length16 = length();
    840   if(length16 != 0) {
    841     char stackBuffer[1024];
    842     int32_t capacity = (int32_t)sizeof(stackBuffer);
    843     UBool utf8IsOwned = FALSE;
    844     char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
    845                                       3*length16,
    846                                       stackBuffer, capacity,
    847                                       &capacity);
    848     int32_t length8 = 0;
    849     UErrorCode errorCode = U_ZERO_ERROR;
    850     u_strToUTF8WithSub(utf8, capacity, &length8,
    851                        getBuffer(), length16,
    852                        0xFFFD,  // Standard substitution character.
    853                        NULL,    // Don't care about number of substitutions.
    854                        &errorCode);
    855     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
    856       utf8 = (char *)uprv_malloc(length8);
    857       if(utf8 != NULL) {
    858         utf8IsOwned = TRUE;
    859         errorCode = U_ZERO_ERROR;
    860         u_strToUTF8WithSub(utf8, length8, &length8,
    861                            getBuffer(), length16,
    862                            0xFFFD,  // Standard substitution character.
    863                            NULL,    // Don't care about number of substitutions.
    864                            &errorCode);
    865       } else {
    866         errorCode = U_MEMORY_ALLOCATION_ERROR;
    867       }
    868     }
    869     if(U_SUCCESS(errorCode)) {
    870       sink.Append(utf8, length8);
    871       sink.Flush();
    872     }
    873     if(utf8IsOwned) {
    874       uprv_free(utf8);
    875     }
    876   }
    877 }
    878 
    879 int32_t
    880 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
    881   int32_t length32=0;
    882   if(U_SUCCESS(errorCode)) {
    883     // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
    884     u_strToUTF32WithSub(utf32, capacity, &length32,
    885         getBuffer(), length(),
    886         0xfffd,  // Substitution character.
    887         NULL,    // Don't care about number of substitutions.
    888         &errorCode);
    889   }
    890   return length32;
    891 }
    892 
    893 int32_t
    894 UnicodeString::indexOf(const UChar *srcChars,
    895                int32_t srcStart,
    896                int32_t srcLength,
    897                int32_t start,
    898                int32_t length) const
    899 {
    900   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
    901     return -1;
    902   }
    903 
    904   // UnicodeString does not find empty substrings
    905   if(srcLength < 0 && srcChars[srcStart] == 0) {
    906     return -1;
    907   }
    908 
    909   // get the indices within bounds
    910   pinIndices(start, length);
    911 
    912   // find the first occurrence of the substring
    913   const UChar *array = getArrayStart();
    914   const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
    915   if(match == NULL) {
    916     return -1;
    917   } else {
    918     return (int32_t)(match - array);
    919   }
    920 }
    921 
    922 int32_t
    923 UnicodeString::doIndexOf(UChar c,
    924              int32_t start,
    925              int32_t length) const
    926 {
    927   // pin indices
    928   pinIndices(start, length);
    929 
    930   // find the first occurrence of c
    931   const UChar *array = getArrayStart();
    932   const UChar *match = u_memchr(array + start, c, length);
    933   if(match == NULL) {
    934     return -1;
    935   } else {
    936     return (int32_t)(match - array);
    937   }
    938 }
    939 
    940 int32_t
    941 UnicodeString::doIndexOf(UChar32 c,
    942                          int32_t start,
    943                          int32_t length) const {
    944   // pin indices
    945   pinIndices(start, length);
    946 
    947   // find the first occurrence of c
    948   const UChar *array = getArrayStart();
    949   const UChar *match = u_memchr32(array + start, c, length);
    950   if(match == NULL) {
    951     return -1;
    952   } else {
    953     return (int32_t)(match - array);
    954   }
    955 }
    956 
    957 int32_t
    958 UnicodeString::lastIndexOf(const UChar *srcChars,
    959                int32_t srcStart,
    960                int32_t srcLength,
    961                int32_t start,
    962                int32_t length) const
    963 {
    964   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
    965     return -1;
    966   }
    967 
    968   // UnicodeString does not find empty substrings
    969   if(srcLength < 0 && srcChars[srcStart] == 0) {
    970     return -1;
    971   }
    972 
    973   // get the indices within bounds
    974   pinIndices(start, length);
    975 
    976   // find the last occurrence of the substring
    977   const UChar *array = getArrayStart();
    978   const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
    979   if(match == NULL) {
    980     return -1;
    981   } else {
    982     return (int32_t)(match - array);
    983   }
    984 }
    985 
    986 int32_t
    987 UnicodeString::doLastIndexOf(UChar c,
    988                  int32_t start,
    989                  int32_t length) const
    990 {
    991   if(isBogus()) {
    992     return -1;
    993   }
    994 
    995   // pin indices
    996   pinIndices(start, length);
    997 
    998   // find the last occurrence of c
    999   const UChar *array = getArrayStart();
   1000   const UChar *match = u_memrchr(array + start, c, length);
   1001   if(match == NULL) {
   1002     return -1;
   1003   } else {
   1004     return (int32_t)(match - array);
   1005   }
   1006 }
   1007 
   1008 int32_t
   1009 UnicodeString::doLastIndexOf(UChar32 c,
   1010                              int32_t start,
   1011                              int32_t length) const {
   1012   // pin indices
   1013   pinIndices(start, length);
   1014 
   1015   // find the last occurrence of c
   1016   const UChar *array = getArrayStart();
   1017   const UChar *match = u_memrchr32(array + start, c, length);
   1018   if(match == NULL) {
   1019     return -1;
   1020   } else {
   1021     return (int32_t)(match - array);
   1022   }
   1023 }
   1024 
   1025 //========================================
   1026 // Write implementation
   1027 //========================================
   1028 
   1029 UnicodeString&
   1030 UnicodeString::findAndReplace(int32_t start,
   1031                   int32_t length,
   1032                   const UnicodeString& oldText,
   1033                   int32_t oldStart,
   1034                   int32_t oldLength,
   1035                   const UnicodeString& newText,
   1036                   int32_t newStart,
   1037                   int32_t newLength)
   1038 {
   1039   if(isBogus() || oldText.isBogus() || newText.isBogus()) {
   1040     return *this;
   1041   }
   1042 
   1043   pinIndices(start, length);
   1044   oldText.pinIndices(oldStart, oldLength);
   1045   newText.pinIndices(newStart, newLength);
   1046 
   1047   if(oldLength == 0) {
   1048     return *this;
   1049   }
   1050 
   1051   while(length > 0 && length >= oldLength) {
   1052     int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
   1053     if(pos < 0) {
   1054       // no more oldText's here: done
   1055       break;
   1056     } else {
   1057       // we found oldText, replace it by newText and go beyond it
   1058       replace(pos, oldLength, newText, newStart, newLength);
   1059       length -= pos + oldLength - start;
   1060       start = pos + newLength;
   1061     }
   1062   }
   1063 
   1064   return *this;
   1065 }
   1066 
   1067 
   1068 void
   1069 UnicodeString::setToBogus()
   1070 {
   1071   releaseArray();
   1072 
   1073   fShortLength = 0;
   1074   fUnion.fFields.fArray = 0;
   1075   fUnion.fFields.fCapacity = 0;
   1076   fFlags = kIsBogus;
   1077 }
   1078 
   1079 // turn a bogus string into an empty one
   1080 void
   1081 UnicodeString::unBogus() {
   1082   if(fFlags & kIsBogus) {
   1083     setToEmpty();
   1084   }
   1085 }
   1086 
   1087 // setTo() analogous to the readonly-aliasing constructor with the same signature
   1088 UnicodeString &
   1089 UnicodeString::setTo(UBool isTerminated,
   1090                      const UChar *text,
   1091                      int32_t textLength)
   1092 {
   1093   if(fFlags & kOpenGetBuffer) {
   1094     // do not modify a string that has an "open" getBuffer(minCapacity)
   1095     return *this;
   1096   }
   1097 
   1098   if(text == NULL) {
   1099     // treat as an empty string, do not alias
   1100     releaseArray();
   1101     setToEmpty();
   1102     return *this;
   1103   }
   1104 
   1105   if( textLength < -1 ||
   1106       (textLength == -1 && !isTerminated) ||
   1107       (textLength >= 0 && isTerminated && text[textLength] != 0)
   1108   ) {
   1109     setToBogus();
   1110     return *this;
   1111   }
   1112 
   1113   releaseArray();
   1114 
   1115   if(textLength == -1) {
   1116     // text is terminated, or else it would have failed the above test
   1117     textLength = u_strlen(text);
   1118   }
   1119   setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
   1120 
   1121   fFlags = kReadonlyAlias;
   1122   return *this;
   1123 }
   1124 
   1125 // setTo() analogous to the writable-aliasing constructor with the same signature
   1126 UnicodeString &
   1127 UnicodeString::setTo(UChar *buffer,
   1128                      int32_t buffLength,
   1129                      int32_t buffCapacity) {
   1130   if(fFlags & kOpenGetBuffer) {
   1131     // do not modify a string that has an "open" getBuffer(minCapacity)
   1132     return *this;
   1133   }
   1134 
   1135   if(buffer == NULL) {
   1136     // treat as an empty string, do not alias
   1137     releaseArray();
   1138     setToEmpty();
   1139     return *this;
   1140   }
   1141 
   1142   if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
   1143     setToBogus();
   1144     return *this;
   1145   } else if(buffLength == -1) {
   1146     // buffLength = u_strlen(buff); but do not look beyond buffCapacity
   1147     const UChar *p = buffer, *limit = buffer + buffCapacity;
   1148     while(p != limit && *p != 0) {
   1149       ++p;
   1150     }
   1151     buffLength = (int32_t)(p - buffer);
   1152   }
   1153 
   1154   releaseArray();
   1155 
   1156   setArray(buffer, buffLength, buffCapacity);
   1157   fFlags = kWritableAlias;
   1158   return *this;
   1159 }
   1160 
   1161 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
   1162   unBogus();
   1163   int32_t length = utf8.length();
   1164   int32_t capacity;
   1165   // The UTF-16 string will be at most as long as the UTF-8 string.
   1166   if(length <= US_STACKBUF_SIZE) {
   1167     capacity = US_STACKBUF_SIZE;
   1168   } else {
   1169     capacity = length + 1;  // +1 for the terminating NUL.
   1170   }
   1171   UChar *utf16 = getBuffer(capacity);
   1172   int32_t length16;
   1173   UErrorCode errorCode = U_ZERO_ERROR;
   1174   u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
   1175       utf8.data(), length,
   1176       0xfffd,  // Substitution character.
   1177       NULL,    // Don't care about number of substitutions.
   1178       &errorCode);
   1179   releaseBuffer(length16);
   1180   if(U_FAILURE(errorCode)) {
   1181     setToBogus();
   1182   }
   1183   return *this;
   1184 }
   1185 
   1186 UnicodeString&
   1187 UnicodeString::setCharAt(int32_t offset,
   1188              UChar c)
   1189 {
   1190   int32_t len = length();
   1191   if(cloneArrayIfNeeded() && len > 0) {
   1192     if(offset < 0) {
   1193       offset = 0;
   1194     } else if(offset >= len) {
   1195       offset = len - 1;
   1196     }
   1197 
   1198     getArrayStart()[offset] = c;
   1199   }
   1200   return *this;
   1201 }
   1202 
   1203 UnicodeString&
   1204 UnicodeString::doReplace( int32_t start,
   1205               int32_t length,
   1206               const UnicodeString& src,
   1207               int32_t srcStart,
   1208               int32_t srcLength)
   1209 {
   1210   if(!src.isBogus()) {
   1211     // pin the indices to legal values
   1212     src.pinIndices(srcStart, srcLength);
   1213 
   1214     // get the characters from src
   1215     // and replace the range in ourselves with them
   1216     return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
   1217   } else {
   1218     // remove the range
   1219     return doReplace(start, length, 0, 0, 0);
   1220   }
   1221 }
   1222 
   1223 UnicodeString&
   1224 UnicodeString::doReplace(int32_t start,
   1225              int32_t length,
   1226              const UChar *srcChars,
   1227              int32_t srcStart,
   1228              int32_t srcLength)
   1229 {
   1230   if(!isWritable()) {
   1231     return *this;
   1232   }
   1233 
   1234   int32_t oldLength = this->length();
   1235 
   1236   // optimize (read-only alias).remove(0, start) and .remove(start, end)
   1237   if((fFlags&kBufferIsReadonly) && srcLength == 0) {
   1238     if(start == 0) {
   1239       // remove prefix by adjusting the array pointer
   1240       pinIndex(length);
   1241       fUnion.fFields.fArray += length;
   1242       fUnion.fFields.fCapacity -= length;
   1243       setLength(oldLength - length);
   1244       return *this;
   1245     } else {
   1246       pinIndex(start);
   1247       if(length >= (oldLength - start)) {
   1248         // remove suffix by reducing the length (like truncate())
   1249         setLength(start);
   1250         fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
   1251         return *this;
   1252       }
   1253     }
   1254   }
   1255 
   1256   if(srcChars == 0) {
   1257     srcStart = srcLength = 0;
   1258   } else if(srcLength < 0) {
   1259     // get the srcLength if necessary
   1260     srcLength = u_strlen(srcChars + srcStart);
   1261   }
   1262 
   1263   // calculate the size of the string after the replace
   1264   int32_t newLength;
   1265 
   1266   // optimize append() onto a large-enough, owned string
   1267   if(start >= oldLength) {
   1268     newLength = oldLength + srcLength;
   1269     if(newLength <= getCapacity() && isBufferWritable()) {
   1270       UChar *oldArray = getArrayStart();
   1271       // Do not copy characters when
   1272       //   UChar *buffer=str.getAppendBuffer(...);
   1273       // is followed by
   1274       //   str.append(buffer, length);
   1275       // or
   1276       //   str.appendString(buffer, length)
   1277       // or similar.
   1278       if(srcChars + srcStart != oldArray + start || start > oldLength) {
   1279         us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength);
   1280       }
   1281       setLength(newLength);
   1282       return *this;
   1283     } else {
   1284       // pin the indices to legal values
   1285       start = oldLength;
   1286       length = 0;
   1287     }
   1288   } else {
   1289     // pin the indices to legal values
   1290     pinIndices(start, length);
   1291 
   1292     newLength = oldLength - length + srcLength;
   1293   }
   1294 
   1295   // the following may change fArray but will not copy the current contents;
   1296   // therefore we need to keep the current fArray
   1297   UChar oldStackBuffer[US_STACKBUF_SIZE];
   1298   UChar *oldArray;
   1299   if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
   1300     // copy the stack buffer contents because it will be overwritten with
   1301     // fUnion.fFields values
   1302     u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
   1303     oldArray = oldStackBuffer;
   1304   } else {
   1305     oldArray = getArrayStart();
   1306   }
   1307 
   1308   // clone our array and allocate a bigger array if needed
   1309   int32_t *bufferToDelete = 0;
   1310   if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
   1311                          FALSE, &bufferToDelete)
   1312   ) {
   1313     return *this;
   1314   }
   1315 
   1316   // now do the replace
   1317 
   1318   UChar *newArray = getArrayStart();
   1319   if(newArray != oldArray) {
   1320     // if fArray changed, then we need to copy everything except what will change
   1321     us_arrayCopy(oldArray, 0, newArray, 0, start);
   1322     us_arrayCopy(oldArray, start + length,
   1323                  newArray, start + srcLength,
   1324                  oldLength - (start + length));
   1325   } else if(length != srcLength) {
   1326     // fArray did not change; copy only the portion that isn't changing, leaving a hole
   1327     us_arrayCopy(oldArray, start + length,
   1328                  newArray, start + srcLength,
   1329                  oldLength - (start + length));
   1330   }
   1331 
   1332   // now fill in the hole with the new string
   1333   us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
   1334 
   1335   setLength(newLength);
   1336 
   1337   // delayed delete in case srcChars == fArray when we started, and
   1338   // to keep oldArray alive for the above operations
   1339   if (bufferToDelete) {
   1340     uprv_free(bufferToDelete);
   1341   }
   1342 
   1343   return *this;
   1344 }
   1345 
   1346 /**
   1347  * Replaceable API
   1348  */
   1349 void
   1350 UnicodeString::handleReplaceBetween(int32_t start,
   1351                                     int32_t limit,
   1352                                     const UnicodeString& text) {
   1353     replaceBetween(start, limit, text);
   1354 }
   1355 
   1356 /**
   1357  * Replaceable API
   1358  */
   1359 void
   1360 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
   1361     if (limit <= start) {
   1362         return; // Nothing to do; avoid bogus malloc call
   1363     }
   1364     UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
   1365     // Check to make sure text is not null.
   1366     if (text != NULL) {
   1367 	    extractBetween(start, limit, text, 0);
   1368 	    insert(dest, text, 0, limit - start);
   1369 	    uprv_free(text);
   1370     }
   1371 }
   1372 
   1373 /**
   1374  * Replaceable API
   1375  *
   1376  * NOTE: This is for the Replaceable class.  There is no rep.cpp,
   1377  * so we implement this function here.
   1378  */
   1379 UBool Replaceable::hasMetaData() const {
   1380     return TRUE;
   1381 }
   1382 
   1383 /**
   1384  * Replaceable API
   1385  */
   1386 UBool UnicodeString::hasMetaData() const {
   1387     return FALSE;
   1388 }
   1389 
   1390 UnicodeString&
   1391 UnicodeString::doReverse(int32_t start, int32_t length) {
   1392   if(length <= 1 || !cloneArrayIfNeeded()) {
   1393     return *this;
   1394   }
   1395 
   1396   // pin the indices to legal values
   1397   pinIndices(start, length);
   1398   if(length <= 1) {  // pinIndices() might have shrunk the length
   1399     return *this;
   1400   }
   1401 
   1402   UChar *left = getArrayStart() + start;
   1403   UChar *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
   1404   UChar swap;
   1405   UBool hasSupplementary = FALSE;
   1406 
   1407   // Before the loop we know left<right because length>=2.
   1408   do {
   1409     hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
   1410     hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
   1411     *right-- = swap;
   1412   } while(left < right);
   1413   // Make sure to test the middle code unit of an odd-length string.
   1414   // Redundant if the length is even.
   1415   hasSupplementary |= (UBool)U16_IS_LEAD(*left);
   1416 
   1417   /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
   1418   if(hasSupplementary) {
   1419     UChar swap2;
   1420 
   1421     left = getArrayStart() + start;
   1422     right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
   1423     while(left < right) {
   1424       if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
   1425         *left++ = swap2;
   1426         *left++ = swap;
   1427       } else {
   1428         ++left;
   1429       }
   1430     }
   1431   }
   1432 
   1433   return *this;
   1434 }
   1435 
   1436 UBool
   1437 UnicodeString::padLeading(int32_t targetLength,
   1438                           UChar padChar)
   1439 {
   1440   int32_t oldLength = length();
   1441   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
   1442     return FALSE;
   1443   } else {
   1444     // move contents up by padding width
   1445     UChar *array = getArrayStart();
   1446     int32_t start = targetLength - oldLength;
   1447     us_arrayCopy(array, 0, array, start, oldLength);
   1448 
   1449     // fill in padding character
   1450     while(--start >= 0) {
   1451       array[start] = padChar;
   1452     }
   1453     setLength(targetLength);
   1454     return TRUE;
   1455   }
   1456 }
   1457 
   1458 UBool
   1459 UnicodeString::padTrailing(int32_t targetLength,
   1460                            UChar padChar)
   1461 {
   1462   int32_t oldLength = length();
   1463   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
   1464     return FALSE;
   1465   } else {
   1466     // fill in padding character
   1467     UChar *array = getArrayStart();
   1468     int32_t length = targetLength;
   1469     while(--length >= oldLength) {
   1470       array[length] = padChar;
   1471     }
   1472     setLength(targetLength);
   1473     return TRUE;
   1474   }
   1475 }
   1476 
   1477 //========================================
   1478 // Hashing
   1479 //========================================
   1480 int32_t
   1481 UnicodeString::doHashCode() const
   1482 {
   1483     /* Delegate hash computation to uhash.  This makes UnicodeString
   1484      * hashing consistent with UChar* hashing.  */
   1485     int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());
   1486     if (hashCode == kInvalidHashCode) {
   1487         hashCode = kEmptyHashCode;
   1488     }
   1489     return hashCode;
   1490 }
   1491 
   1492 //========================================
   1493 // External Buffer
   1494 //========================================
   1495 
   1496 UChar *
   1497 UnicodeString::getBuffer(int32_t minCapacity) {
   1498   if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
   1499     fFlags|=kOpenGetBuffer;
   1500     fShortLength=0;
   1501     return getArrayStart();
   1502   } else {
   1503     return 0;
   1504   }
   1505 }
   1506 
   1507 void
   1508 UnicodeString::releaseBuffer(int32_t newLength) {
   1509   if(fFlags&kOpenGetBuffer && newLength>=-1) {
   1510     // set the new fLength
   1511     int32_t capacity=getCapacity();
   1512     if(newLength==-1) {
   1513       // the new length is the string length, capped by fCapacity
   1514       const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
   1515       while(p<limit && *p!=0) {
   1516         ++p;
   1517       }
   1518       newLength=(int32_t)(p-array);
   1519     } else if(newLength>capacity) {
   1520       newLength=capacity;
   1521     }
   1522     setLength(newLength);
   1523     fFlags&=~kOpenGetBuffer;
   1524   }
   1525 }
   1526 
   1527 //========================================
   1528 // Miscellaneous
   1529 //========================================
   1530 UBool
   1531 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
   1532                                   int32_t growCapacity,
   1533                                   UBool doCopyArray,
   1534                                   int32_t **pBufferToDelete,
   1535                                   UBool forceClone) {
   1536   // default parameters need to be static, therefore
   1537   // the defaults are -1 to have convenience defaults
   1538   if(newCapacity == -1) {
   1539     newCapacity = getCapacity();
   1540   }
   1541 
   1542   // while a getBuffer(minCapacity) is "open",
   1543   // prevent any modifications of the string by returning FALSE here
   1544   // if the string is bogus, then only an assignment or similar can revive it
   1545   if(!isWritable()) {
   1546     return FALSE;
   1547   }
   1548 
   1549   /*
   1550    * We need to make a copy of the array if
   1551    * the buffer is read-only, or
   1552    * the buffer is refCounted (shared), and refCount>1, or
   1553    * the buffer is too small.
   1554    * Return FALSE if memory could not be allocated.
   1555    */
   1556   if(forceClone ||
   1557      fFlags & kBufferIsReadonly ||
   1558      (fFlags & kRefCounted && refCount() > 1) ||
   1559      newCapacity > getCapacity()
   1560   ) {
   1561     // check growCapacity for default value and use of the stack buffer
   1562     if(growCapacity == -1) {
   1563       growCapacity = newCapacity;
   1564     } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
   1565       growCapacity = US_STACKBUF_SIZE;
   1566     }
   1567 
   1568     // save old values
   1569     UChar oldStackBuffer[US_STACKBUF_SIZE];
   1570     UChar *oldArray;
   1571     uint8_t flags = fFlags;
   1572 
   1573     if(flags&kUsingStackBuffer) {
   1574       if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
   1575         // copy the stack buffer contents because it will be overwritten with
   1576         // fUnion.fFields values
   1577         us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
   1578         oldArray = oldStackBuffer;
   1579       } else {
   1580         oldArray = 0; // no need to copy from stack buffer to itself
   1581       }
   1582     } else {
   1583       oldArray = fUnion.fFields.fArray;
   1584     }
   1585 
   1586     // allocate a new array
   1587     if(allocate(growCapacity) ||
   1588        (newCapacity < growCapacity && allocate(newCapacity))
   1589     ) {
   1590       if(doCopyArray && oldArray != 0) {
   1591         // copy the contents
   1592         // do not copy more than what fits - it may be smaller than before
   1593         int32_t minLength = length();
   1594         newCapacity = getCapacity();
   1595         if(newCapacity < minLength) {
   1596           minLength = newCapacity;
   1597           setLength(minLength);
   1598         }
   1599         us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
   1600       } else {
   1601         fShortLength = 0;
   1602       }
   1603 
   1604       // release the old array
   1605       if(flags & kRefCounted) {
   1606         // the array is refCounted; decrement and release if 0
   1607         int32_t *pRefCount = ((int32_t *)oldArray - 1);
   1608         if(umtx_atomic_dec(pRefCount) == 0) {
   1609           if(pBufferToDelete == 0) {
   1610             uprv_free(pRefCount);
   1611           } else {
   1612             // the caller requested to delete it himself
   1613             *pBufferToDelete = pRefCount;
   1614           }
   1615         }
   1616       }
   1617     } else {
   1618       // not enough memory for growCapacity and not even for the smaller newCapacity
   1619       // reset the old values for setToBogus() to release the array
   1620       if(!(flags&kUsingStackBuffer)) {
   1621         fUnion.fFields.fArray = oldArray;
   1622       }
   1623       fFlags = flags;
   1624       setToBogus();
   1625       return FALSE;
   1626     }
   1627   }
   1628   return TRUE;
   1629 }
   1630 
   1631 // UnicodeStringAppendable ------------------------------------------------- ***
   1632 
   1633 UBool
   1634 UnicodeStringAppendable::appendCodeUnit(UChar c) {
   1635   return str.doReplace(str.length(), 0, &c, 0, 1).isWritable();
   1636 }
   1637 
   1638 UBool
   1639 UnicodeStringAppendable::appendCodePoint(UChar32 c) {
   1640   UChar buffer[U16_MAX_LENGTH];
   1641   int32_t cLength = 0;
   1642   UBool isError = FALSE;
   1643   U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
   1644   return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable();
   1645 }
   1646 
   1647 UBool
   1648 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
   1649   return str.doReplace(str.length(), 0, s, 0, length).isWritable();
   1650 }
   1651 
   1652 UBool
   1653 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
   1654   return str.cloneArrayIfNeeded(str.length() + appendCapacity);
   1655 }
   1656 
   1657 UChar *
   1658 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
   1659                                          int32_t desiredCapacityHint,
   1660                                          UChar *scratch, int32_t scratchCapacity,
   1661                                          int32_t *resultCapacity) {
   1662   if(minCapacity < 1 || scratchCapacity < minCapacity) {
   1663     *resultCapacity = 0;
   1664     return NULL;
   1665   }
   1666   int32_t oldLength = str.length();
   1667   if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
   1668     *resultCapacity = str.getCapacity() - oldLength;
   1669     return str.getArrayStart() + oldLength;
   1670   }
   1671   *resultCapacity = scratchCapacity;
   1672   return scratch;
   1673 }
   1674 
   1675 U_NAMESPACE_END
   1676 
   1677 #ifdef U_STATIC_IMPLEMENTATION
   1678 /*
   1679 This should never be called. It is defined here to make sure that the
   1680 virtual vector deleting destructor is defined within unistr.cpp.
   1681 The vector deleting destructor is already a part of UObject,
   1682 but defining it here makes sure that it is included with this object file.
   1683 This makes sure that static library dependencies are kept to a minimum.
   1684 */
   1685 static void uprv_UnicodeStringDummy(void) {
   1686     U_NAMESPACE_USE
   1687     delete [] (new UnicodeString[2]);
   1688 }
   1689 #endif
   1690