Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 * Copyright (C) 1999-2010, International Business Machines Corporation and   *
      4 * others. All Rights Reserved.                                               *
      5 ******************************************************************************
      6 *
      7 * File unistr.cpp
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   09/25/98    stephen     Creation.
     13 *   04/20/99    stephen     Overhauled per 4/16 code review.
     14 *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
     15 *   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
     16 *                           Replaceable.
     17 *   06/25/01    grhoten     Removed the dependency on iostream
     18 ******************************************************************************
     19 */
     20 
     21 #include "unicode/utypes.h"
     22 #include "unicode/putil.h"
     23 #include "cstring.h"
     24 #include "cmemory.h"
     25 #include "unicode/ustring.h"
     26 #include "unicode/unistr.h"
     27 #include "uhash.h"
     28 #include "ustr_imp.h"
     29 #include "umutex.h"
     30 
     31 #if 0
     32 
     33 #if U_IOSTREAM_SOURCE >= 199711
     34 #include <iostream>
     35 using namespace std;
     36 #elif U_IOSTREAM_SOURCE >= 198506
     37 #include <iostream.h>
     38 #endif
     39 
     40 //DEBUGGING
     41 void
     42 print(const UnicodeString& s,
     43       const char *name)
     44 {
     45   UChar c;
     46   cout << name << ":|";
     47   for(int i = 0; i < s.length(); ++i) {
     48     c = s[i];
     49     if(c>= 0x007E || c < 0x0020)
     50       cout << "[0x" << hex << s[i] << "]";
     51     else
     52       cout << (char) s[i];
     53   }
     54   cout << '|' << endl;
     55 }
     56 
     57 void
     58 print(const UChar *s,
     59       int32_t len,
     60       const char *name)
     61 {
     62   UChar c;
     63   cout << name << ":|";
     64   for(int i = 0; i < len; ++i) {
     65     c = s[i];
     66     if(c>= 0x007E || c < 0x0020)
     67       cout << "[0x" << hex << s[i] << "]";
     68     else
     69       cout << (char) s[i];
     70   }
     71   cout << '|' << endl;
     72 }
     73 // END DEBUGGING
     74 #endif
     75 
     76 // Local function definitions for now
     77 
     78 // need to copy areas that may overlap
     79 static
     80 inline void
     81 us_arrayCopy(const UChar *src, int32_t srcStart,
     82          UChar *dst, int32_t dstStart, int32_t count)
     83 {
     84   if(count>0) {
     85     uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
     86   }
     87 }
     88 
     89 // u_unescapeAt() callback to get a UChar from a UnicodeString
     90 U_CDECL_BEGIN
     91 static UChar U_CALLCONV
     92 UnicodeString_charAt(int32_t offset, void *context) {
     93     return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset);
     94 }
     95 U_CDECL_END
     96 
     97 U_NAMESPACE_BEGIN
     98 
     99 /* The Replaceable virtual destructor can't be defined in the header
    100    due to how AIX works with multiple definitions of virtual functions.
    101 */
    102 Replaceable::~Replaceable() {}
    103 Replaceable::Replaceable() {}
    104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
    105 
    106 UnicodeString U_EXPORT2
    107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
    108     return
    109         UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
    110             append(s1).
    111                 append(s2);
    112 }
    113 
    114 //========================================
    115 // Reference Counting functions, put at top of file so that optimizing compilers
    116 //                               have a chance to automatically inline.
    117 //========================================
    118 
    119 void
    120 UnicodeString::addRef()
    121 {  umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}
    122 
    123 int32_t
    124 UnicodeString::removeRef()
    125 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}
    126 
    127 int32_t
    128 UnicodeString::refCount() const
    129 {
    130     umtx_lock(NULL);
    131     // Note: without the lock to force a memory barrier, we might see a very
    132     //       stale value on some multi-processor systems.
    133     int32_t  count = *((int32_t *)fUnion.fFields.fArray - 1);
    134     umtx_unlock(NULL);
    135     return count;
    136  }
    137 
    138 void
    139 UnicodeString::releaseArray() {
    140   if((fFlags & kRefCounted) && removeRef() == 0) {
    141     uprv_free((int32_t *)fUnion.fFields.fArray - 1);
    142   }
    143 }
    144 
    145 
    146 
    147 //========================================
    148 // Constructors
    149 //========================================
    150 UnicodeString::UnicodeString()
    151   : fShortLength(0),
    152     fFlags(kShortString)
    153 {}
    154 
    155 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
    156   : fShortLength(0),
    157     fFlags(0)
    158 {
    159   if(count <= 0 || (uint32_t)c > 0x10ffff) {
    160     // just allocate and do not do anything else
    161     allocate(capacity);
    162   } else {
    163     // count > 0, allocate and fill the new string with count c's
    164     int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;
    165     if(capacity < length) {
    166       capacity = length;
    167     }
    168     if(allocate(capacity)) {
    169       UChar *array = getArrayStart();
    170       int32_t i = 0;
    171 
    172       // fill the new string with c
    173       if(unitCount == 1) {
    174         // fill with length UChars
    175         while(i < length) {
    176           array[i++] = (UChar)c;
    177         }
    178       } else {
    179         // get the code units for c
    180         UChar units[UTF_MAX_CHAR_LENGTH];
    181         UTF_APPEND_CHAR_UNSAFE(units, i, c);
    182 
    183         // now it must be i==unitCount
    184         i = 0;
    185 
    186         // for Unicode, unitCount can only be 1, 2, 3, or 4
    187         // 1 is handled above
    188         while(i < length) {
    189           int32_t unitIdx = 0;
    190           while(unitIdx < unitCount) {
    191             array[i++]=units[unitIdx++];
    192           }
    193         }
    194       }
    195     }
    196     setLength(length);
    197   }
    198 }
    199 
    200 UnicodeString::UnicodeString(UChar ch)
    201   : fShortLength(1),
    202     fFlags(kShortString)
    203 {
    204   fUnion.fStackBuffer[0] = ch;
    205 }
    206 
    207 UnicodeString::UnicodeString(UChar32 ch)
    208   : fShortLength(0),
    209     fFlags(kShortString)
    210 {
    211   int32_t i = 0;
    212   UBool isError = FALSE;
    213   U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
    214   fShortLength = (int8_t)i;
    215 }
    216 
    217 UnicodeString::UnicodeString(const UChar *text)
    218   : fShortLength(0),
    219     fFlags(kShortString)
    220 {
    221   doReplace(0, 0, text, 0, -1);
    222 }
    223 
    224 UnicodeString::UnicodeString(const UChar *text,
    225                              int32_t textLength)
    226   : fShortLength(0),
    227     fFlags(kShortString)
    228 {
    229   doReplace(0, 0, text, 0, textLength);
    230 }
    231 
    232 UnicodeString::UnicodeString(UBool isTerminated,
    233                              const UChar *text,
    234                              int32_t textLength)
    235   : fShortLength(0),
    236     fFlags(kReadonlyAlias)
    237 {
    238   if(text == NULL) {
    239     // treat as an empty string, do not alias
    240     setToEmpty();
    241   } else if(textLength < -1 ||
    242             (textLength == -1 && !isTerminated) ||
    243             (textLength >= 0 && isTerminated && text[textLength] != 0)
    244   ) {
    245     setToBogus();
    246   } else {
    247     if(textLength == -1) {
    248       // text is terminated, or else it would have failed the above test
    249       textLength = u_strlen(text);
    250     }
    251     setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
    252   }
    253 }
    254 
    255 UnicodeString::UnicodeString(UChar *buff,
    256                              int32_t buffLength,
    257                              int32_t buffCapacity)
    258   : fShortLength(0),
    259     fFlags(kWritableAlias)
    260 {
    261   if(buff == NULL) {
    262     // treat as an empty string, do not alias
    263     setToEmpty();
    264   } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
    265     setToBogus();
    266   } else {
    267     if(buffLength == -1) {
    268       // fLength = u_strlen(buff); but do not look beyond buffCapacity
    269       const UChar *p = buff, *limit = buff + buffCapacity;
    270       while(p != limit && *p != 0) {
    271         ++p;
    272       }
    273       buffLength = (int32_t)(p - buff);
    274     }
    275     setArray(buff, buffLength, buffCapacity);
    276   }
    277 }
    278 
    279 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
    280   : fShortLength(0),
    281     fFlags(kShortString)
    282 {
    283   if(src==NULL) {
    284     // treat as an empty string
    285   } else {
    286     if(length<0) {
    287       length=(int32_t)uprv_strlen(src);
    288     }
    289     if(cloneArrayIfNeeded(length, length, FALSE)) {
    290       u_charsToUChars(src, getArrayStart(), length);
    291       setLength(length);
    292     } else {
    293       setToBogus();
    294     }
    295   }
    296 }
    297 
    298 #if U_CHARSET_IS_UTF8
    299 
    300 UnicodeString::UnicodeString(const char *codepageData)
    301   : fShortLength(0),
    302     fFlags(kShortString) {
    303   if(codepageData != 0) {
    304     setToUTF8(codepageData);
    305   }
    306 }
    307 
    308 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
    309   : fShortLength(0),
    310     fFlags(kShortString) {
    311   // if there's nothing to convert, do nothing
    312   if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
    313     return;
    314   }
    315   if(dataLength == -1) {
    316     dataLength = (int32_t)uprv_strlen(codepageData);
    317   }
    318   setToUTF8(StringPiece(codepageData, dataLength));
    319 }
    320 
    321 // else see unistr_cnv.cpp
    322 #endif
    323 
    324 UnicodeString::UnicodeString(const UnicodeString& that)
    325   : Replaceable(),
    326     fShortLength(0),
    327     fFlags(kShortString)
    328 {
    329   copyFrom(that);
    330 }
    331 
    332 UnicodeString::UnicodeString(const UnicodeString& that,
    333                              int32_t srcStart)
    334   : Replaceable(),
    335     fShortLength(0),
    336     fFlags(kShortString)
    337 {
    338   setTo(that, srcStart);
    339 }
    340 
    341 UnicodeString::UnicodeString(const UnicodeString& that,
    342                              int32_t srcStart,
    343                              int32_t srcLength)
    344   : Replaceable(),
    345     fShortLength(0),
    346     fFlags(kShortString)
    347 {
    348   setTo(that, srcStart, srcLength);
    349 }
    350 
    351 // Replaceable base class clone() default implementation, does not clone
    352 Replaceable *
    353 Replaceable::clone() const {
    354   return NULL;
    355 }
    356 
    357 // UnicodeString overrides clone() with a real implementation
    358 Replaceable *
    359 UnicodeString::clone() const {
    360   return new UnicodeString(*this);
    361 }
    362 
    363 //========================================
    364 // array allocation
    365 //========================================
    366 
    367 UBool
    368 UnicodeString::allocate(int32_t capacity) {
    369   if(capacity <= US_STACKBUF_SIZE) {
    370     fFlags = kShortString;
    371   } else {
    372     // count bytes for the refCounter and the string capacity, and
    373     // round up to a multiple of 16; then divide by 4 and allocate int32_t's
    374     // to be safely aligned for the refCount
    375     // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
    376     int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
    377     int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
    378     if(array != 0) {
    379       // set initial refCount and point behind the refCount
    380       *array++ = 1;
    381 
    382       // have fArray point to the first UChar
    383       fUnion.fFields.fArray = (UChar *)array;
    384       fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
    385       fFlags = kLongString;
    386     } else {
    387       fShortLength = 0;
    388       fUnion.fFields.fArray = 0;
    389       fUnion.fFields.fCapacity = 0;
    390       fFlags = kIsBogus;
    391       return FALSE;
    392     }
    393   }
    394   return TRUE;
    395 }
    396 
    397 //========================================
    398 // Destructor
    399 //========================================
    400 UnicodeString::~UnicodeString()
    401 {
    402   releaseArray();
    403 }
    404 
    405 //========================================
    406 // Factory methods
    407 //========================================
    408 
    409 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
    410   UnicodeString result;
    411   result.setToUTF8(utf8);
    412   return result;
    413 }
    414 
    415 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
    416   UnicodeString result;
    417   int32_t capacity;
    418   // Most UTF-32 strings will be BMP-only and result in a same-length
    419   // UTF-16 string. We overestimate the capacity just slightly,
    420   // just in case there are a few supplementary characters.
    421   if(length <= US_STACKBUF_SIZE) {
    422     capacity = US_STACKBUF_SIZE;
    423   } else {
    424     capacity = length + (length >> 4) + 4;
    425   }
    426   do {
    427     UChar *utf16 = result.getBuffer(capacity);
    428     int32_t length16;
    429     UErrorCode errorCode = U_ZERO_ERROR;
    430     u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
    431         utf32, length,
    432         0xfffd,  // Substitution character.
    433         NULL,    // Don't care about number of substitutions.
    434         &errorCode);
    435     result.releaseBuffer(length16);
    436     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
    437       capacity = length16 + 1;  // +1 for the terminating NUL.
    438       continue;
    439     } else if(U_FAILURE(errorCode)) {
    440       result.setToBogus();
    441     }
    442     break;
    443   } while(TRUE);
    444   return result;
    445 }
    446 
    447 //========================================
    448 // Assignment
    449 //========================================
    450 
    451 UnicodeString &
    452 UnicodeString::operator=(const UnicodeString &src) {
    453   return copyFrom(src);
    454 }
    455 
    456 UnicodeString &
    457 UnicodeString::fastCopyFrom(const UnicodeString &src) {
    458   return copyFrom(src, TRUE);
    459 }
    460 
    461 UnicodeString &
    462 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
    463   // if assigning to ourselves, do nothing
    464   if(this == 0 || this == &src) {
    465     return *this;
    466   }
    467 
    468   // is the right side bogus?
    469   if(&src == 0 || src.isBogus()) {
    470     setToBogus();
    471     return *this;
    472   }
    473 
    474   // delete the current contents
    475   releaseArray();
    476 
    477   if(src.isEmpty()) {
    478     // empty string - use the stack buffer
    479     setToEmpty();
    480     return *this;
    481   }
    482 
    483   // we always copy the length
    484   int32_t srcLength = src.length();
    485   setLength(srcLength);
    486 
    487   // fLength>0 and not an "open" src.getBuffer(minCapacity)
    488   switch(src.fFlags) {
    489   case kShortString:
    490     // short string using the stack buffer, do the same
    491     fFlags = kShortString;
    492     uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
    493     break;
    494   case kLongString:
    495     // src uses a refCounted string buffer, use that buffer with refCount
    496     // src is const, use a cast - we don't really change it
    497     ((UnicodeString &)src).addRef();
    498     // copy all fields, share the reference-counted buffer
    499     fUnion.fFields.fArray = src.fUnion.fFields.fArray;
    500     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
    501     fFlags = src.fFlags;
    502     break;
    503   case kReadonlyAlias:
    504     if(fastCopy) {
    505       // src is a readonly alias, do the same
    506       // -> maintain the readonly alias as such
    507       fUnion.fFields.fArray = src.fUnion.fFields.fArray;
    508       fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
    509       fFlags = src.fFlags;
    510       break;
    511     }
    512     // else if(!fastCopy) fall through to case kWritableAlias
    513     // -> allocate a new buffer and copy the contents
    514   case kWritableAlias:
    515     // src is a writable alias; we make a copy of that instead
    516     if(allocate(srcLength)) {
    517       uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
    518       break;
    519     }
    520     // if there is not enough memory, then fall through to setting to bogus
    521   default:
    522     // if src is bogus, set ourselves to bogus
    523     // do not call setToBogus() here because fArray and fFlags are not consistent here
    524     fShortLength = 0;
    525     fUnion.fFields.fArray = 0;
    526     fUnion.fFields.fCapacity = 0;
    527     fFlags = kIsBogus;
    528     break;
    529   }
    530 
    531   return *this;
    532 }
    533 
    534 //========================================
    535 // Miscellaneous operations
    536 //========================================
    537 
    538 UnicodeString UnicodeString::unescape() const {
    539     UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
    540     const UChar *array = getBuffer();
    541     int32_t len = length();
    542     int32_t prev = 0;
    543     for (int32_t i=0;;) {
    544         if (i == len) {
    545             result.append(array, prev, len - prev);
    546             break;
    547         }
    548         if (array[i++] == 0x5C /*'\\'*/) {
    549             result.append(array, prev, (i - 1) - prev);
    550             UChar32 c = unescapeAt(i); // advances i
    551             if (c < 0) {
    552                 result.remove(); // return empty string
    553                 break; // invalid escape sequence
    554             }
    555             result.append(c);
    556             prev = i;
    557         }
    558     }
    559     return result;
    560 }
    561 
    562 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
    563     return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
    564 }
    565 
    566 //========================================
    567 // Read-only implementation
    568 //========================================
    569 int8_t
    570 UnicodeString::doCompare( int32_t start,
    571               int32_t length,
    572               const UChar *srcChars,
    573               int32_t srcStart,
    574               int32_t srcLength) const
    575 {
    576   // compare illegal string values
    577   // treat const UChar *srcChars==NULL as an empty string
    578   if(isBogus()) {
    579     return -1;
    580   }
    581 
    582   // pin indices to legal values
    583   pinIndices(start, length);
    584 
    585   if(srcChars == NULL) {
    586     srcStart = srcLength = 0;
    587   }
    588 
    589   // get the correct pointer
    590   const UChar *chars = getArrayStart();
    591 
    592   chars += start;
    593   srcChars += srcStart;
    594 
    595   int32_t minLength;
    596   int8_t lengthResult;
    597 
    598   // get the srcLength if necessary
    599   if(srcLength < 0) {
    600     srcLength = u_strlen(srcChars + srcStart);
    601   }
    602 
    603   // are we comparing different lengths?
    604   if(length != srcLength) {
    605     if(length < srcLength) {
    606       minLength = length;
    607       lengthResult = -1;
    608     } else {
    609       minLength = srcLength;
    610       lengthResult = 1;
    611     }
    612   } else {
    613     minLength = length;
    614     lengthResult = 0;
    615   }
    616 
    617   /*
    618    * note that uprv_memcmp() returns an int but we return an int8_t;
    619    * we need to take care not to truncate the result -
    620    * one way to do this is to right-shift the value to
    621    * move the sign bit into the lower 8 bits and making sure that this
    622    * does not become 0 itself
    623    */
    624 
    625   if(minLength > 0 && chars != srcChars) {
    626     int32_t result;
    627 
    628 #   if U_IS_BIG_ENDIAN
    629       // big-endian: byte comparison works
    630       result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
    631       if(result != 0) {
    632         return (int8_t)(result >> 15 | 1);
    633       }
    634 #   else
    635       // little-endian: compare UChar units
    636       do {
    637         result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
    638         if(result != 0) {
    639           return (int8_t)(result >> 15 | 1);
    640         }
    641       } while(--minLength > 0);
    642 #   endif
    643   }
    644   return lengthResult;
    645 }
    646 
    647 /* String compare in code point order - doCompare() compares in code unit order. */
    648 int8_t
    649 UnicodeString::doCompareCodePointOrder(int32_t start,
    650                                        int32_t length,
    651                                        const UChar *srcChars,
    652                                        int32_t srcStart,
    653                                        int32_t srcLength) const
    654 {
    655   // compare illegal string values
    656   // treat const UChar *srcChars==NULL as an empty string
    657   if(isBogus()) {
    658     return -1;
    659   }
    660 
    661   // pin indices to legal values
    662   pinIndices(start, length);
    663 
    664   if(srcChars == NULL) {
    665     srcStart = srcLength = 0;
    666   }
    667 
    668   int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
    669   /* translate the 32-bit result into an 8-bit one */
    670   if(diff!=0) {
    671     return (int8_t)(diff >> 15 | 1);
    672   } else {
    673     return 0;
    674   }
    675 }
    676 
    677 int32_t
    678 UnicodeString::getLength() const {
    679     return length();
    680 }
    681 
    682 UChar
    683 UnicodeString::getCharAt(int32_t offset) const {
    684   return charAt(offset);
    685 }
    686 
    687 UChar32
    688 UnicodeString::getChar32At(int32_t offset) const {
    689   return char32At(offset);
    690 }
    691 
    692 int32_t
    693 UnicodeString::countChar32(int32_t start, int32_t length) const {
    694   pinIndices(start, length);
    695   // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
    696   return u_countChar32(getArrayStart()+start, length);
    697 }
    698 
    699 UBool
    700 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
    701   pinIndices(start, length);
    702   // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
    703   return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
    704 }
    705 
    706 int32_t
    707 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
    708   // pin index
    709   int32_t len = length();
    710   if(index<0) {
    711     index=0;
    712   } else if(index>len) {
    713     index=len;
    714   }
    715 
    716   const UChar *array = getArrayStart();
    717   if(delta>0) {
    718     UTF_FWD_N(array, index, len, delta);
    719   } else {
    720     UTF_BACK_N(array, 0, index, -delta);
    721   }
    722 
    723   return index;
    724 }
    725 
    726 void
    727 UnicodeString::doExtract(int32_t start,
    728              int32_t length,
    729              UChar *dst,
    730              int32_t dstStart) const
    731 {
    732   // pin indices to legal values
    733   pinIndices(start, length);
    734 
    735   // do not copy anything if we alias dst itself
    736   const UChar *array = getArrayStart();
    737   if(array + start != dst + dstStart) {
    738     us_arrayCopy(array, start, dst, dstStart, length);
    739   }
    740 }
    741 
    742 int32_t
    743 UnicodeString::extract(UChar *dest, int32_t destCapacity,
    744                        UErrorCode &errorCode) const {
    745   int32_t len = length();
    746   if(U_SUCCESS(errorCode)) {
    747     if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
    748       errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    749     } else {
    750       const UChar *array = getArrayStart();
    751       if(len>0 && len<=destCapacity && array!=dest) {
    752         uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
    753       }
    754       return u_terminateUChars(dest, destCapacity, len, &errorCode);
    755     }
    756   }
    757 
    758   return len;
    759 }
    760 
    761 int32_t
    762 UnicodeString::extract(int32_t start,
    763                        int32_t length,
    764                        char *target,
    765                        int32_t targetCapacity,
    766                        enum EInvariant) const
    767 {
    768   // if the arguments are illegal, then do nothing
    769   if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
    770     return 0;
    771   }
    772 
    773   // pin the indices to legal values
    774   pinIndices(start, length);
    775 
    776   if(length <= targetCapacity) {
    777     u_UCharsToChars(getArrayStart() + start, target, length);
    778   }
    779   UErrorCode status = U_ZERO_ERROR;
    780   return u_terminateChars(target, targetCapacity, length, &status);
    781 }
    782 
    783 UnicodeString
    784 UnicodeString::tempSubString(int32_t start, int32_t len) const {
    785   pinIndices(start, len);
    786   const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
    787   if(array==NULL) {
    788     array=fUnion.fStackBuffer;  // anything not NULL because that would make an empty string
    789     len=-2;  // bogus result string
    790   }
    791   return UnicodeString(FALSE, array + start, len);
    792 }
    793 
    794 int32_t
    795 UnicodeString::toUTF8(int32_t start, int32_t len,
    796                       char *target, int32_t capacity) const {
    797   pinIndices(start, len);
    798   int32_t length8;
    799   UErrorCode errorCode = U_ZERO_ERROR;
    800   u_strToUTF8WithSub(target, capacity, &length8,
    801                      getBuffer() + start, len,
    802                      0xFFFD,  // Standard substitution character.
    803                      NULL,    // Don't care about number of substitutions.
    804                      &errorCode);
    805   return length8;
    806 }
    807 
    808 #if U_CHARSET_IS_UTF8
    809 
    810 int32_t
    811 UnicodeString::extract(int32_t start, int32_t len,
    812                        char *target, uint32_t dstSize) const {
    813   // if the arguments are illegal, then do nothing
    814   if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
    815     return 0;
    816   }
    817   return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
    818 }
    819 
    820 // else see unistr_cnv.cpp
    821 #endif
    822 
    823 void
    824 UnicodeString::extractBetween(int32_t start,
    825                   int32_t limit,
    826                   UnicodeString& target) const {
    827   pinIndex(start);
    828   pinIndex(limit);
    829   doExtract(start, limit - start, target);
    830 }
    831 
    832 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
    833 // as many bytes as the source has UChars.
    834 // The "worst cases" are writing systems like Indic, Thai and CJK with
    835 // 3:1 bytes:UChars.
    836 void
    837 UnicodeString::toUTF8(ByteSink &sink) const {
    838   int32_t length16 = length();
    839   if(length16 != 0) {
    840     char stackBuffer[1024];
    841     int32_t capacity = (int32_t)sizeof(stackBuffer);
    842     UBool utf8IsOwned = FALSE;
    843     char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
    844                                       3*length16,
    845                                       stackBuffer, capacity,
    846                                       &capacity);
    847     int32_t length8 = 0;
    848     UErrorCode errorCode = U_ZERO_ERROR;
    849     u_strToUTF8WithSub(utf8, capacity, &length8,
    850                        getBuffer(), length16,
    851                        0xFFFD,  // Standard substitution character.
    852                        NULL,    // Don't care about number of substitutions.
    853                        &errorCode);
    854     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
    855       utf8 = (char *)uprv_malloc(length8);
    856       if(utf8 != NULL) {
    857         utf8IsOwned = TRUE;
    858         errorCode = U_ZERO_ERROR;
    859         u_strToUTF8WithSub(utf8, length8, &length8,
    860                            getBuffer(), length16,
    861                            0xFFFD,  // Standard substitution character.
    862                            NULL,    // Don't care about number of substitutions.
    863                            &errorCode);
    864       } else {
    865         errorCode = U_MEMORY_ALLOCATION_ERROR;
    866       }
    867     }
    868     if(U_SUCCESS(errorCode)) {
    869       sink.Append(utf8, length8);
    870       sink.Flush();
    871     }
    872     if(utf8IsOwned) {
    873       uprv_free(utf8);
    874     }
    875   }
    876 }
    877 
    878 int32_t
    879 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
    880   int32_t length32=0;
    881   if(U_SUCCESS(errorCode)) {
    882     // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
    883     u_strToUTF32WithSub(utf32, capacity, &length32,
    884         getBuffer(), length(),
    885         0xfffd,  // Substitution character.
    886         NULL,    // Don't care about number of substitutions.
    887         &errorCode);
    888   }
    889   return length32;
    890 }
    891 
    892 int32_t
    893 UnicodeString::indexOf(const UChar *srcChars,
    894                int32_t srcStart,
    895                int32_t srcLength,
    896                int32_t start,
    897                int32_t length) const
    898 {
    899   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
    900     return -1;
    901   }
    902 
    903   // UnicodeString does not find empty substrings
    904   if(srcLength < 0 && srcChars[srcStart] == 0) {
    905     return -1;
    906   }
    907 
    908   // get the indices within bounds
    909   pinIndices(start, length);
    910 
    911   // find the first occurrence of the substring
    912   const UChar *array = getArrayStart();
    913   const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
    914   if(match == NULL) {
    915     return -1;
    916   } else {
    917     return (int32_t)(match - array);
    918   }
    919 }
    920 
    921 int32_t
    922 UnicodeString::doIndexOf(UChar c,
    923              int32_t start,
    924              int32_t length) const
    925 {
    926   // pin indices
    927   pinIndices(start, length);
    928 
    929   // find the first occurrence of c
    930   const UChar *array = getArrayStart();
    931   const UChar *match = u_memchr(array + start, c, length);
    932   if(match == NULL) {
    933     return -1;
    934   } else {
    935     return (int32_t)(match - array);
    936   }
    937 }
    938 
    939 int32_t
    940 UnicodeString::doIndexOf(UChar32 c,
    941                          int32_t start,
    942                          int32_t length) const {
    943   // pin indices
    944   pinIndices(start, length);
    945 
    946   // find the first occurrence of c
    947   const UChar *array = getArrayStart();
    948   const UChar *match = u_memchr32(array + start, c, length);
    949   if(match == NULL) {
    950     return -1;
    951   } else {
    952     return (int32_t)(match - array);
    953   }
    954 }
    955 
    956 int32_t
    957 UnicodeString::lastIndexOf(const UChar *srcChars,
    958                int32_t srcStart,
    959                int32_t srcLength,
    960                int32_t start,
    961                int32_t length) const
    962 {
    963   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
    964     return -1;
    965   }
    966 
    967   // UnicodeString does not find empty substrings
    968   if(srcLength < 0 && srcChars[srcStart] == 0) {
    969     return -1;
    970   }
    971 
    972   // get the indices within bounds
    973   pinIndices(start, length);
    974 
    975   // find the last occurrence of the substring
    976   const UChar *array = getArrayStart();
    977   const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
    978   if(match == NULL) {
    979     return -1;
    980   } else {
    981     return (int32_t)(match - array);
    982   }
    983 }
    984 
    985 int32_t
    986 UnicodeString::doLastIndexOf(UChar c,
    987                  int32_t start,
    988                  int32_t length) const
    989 {
    990   if(isBogus()) {
    991     return -1;
    992   }
    993 
    994   // pin indices
    995   pinIndices(start, length);
    996 
    997   // find the last occurrence of c
    998   const UChar *array = getArrayStart();
    999   const UChar *match = u_memrchr(array + start, c, length);
   1000   if(match == NULL) {
   1001     return -1;
   1002   } else {
   1003     return (int32_t)(match - array);
   1004   }
   1005 }
   1006 
   1007 int32_t
   1008 UnicodeString::doLastIndexOf(UChar32 c,
   1009                              int32_t start,
   1010                              int32_t length) const {
   1011   // pin indices
   1012   pinIndices(start, length);
   1013 
   1014   // find the last occurrence of c
   1015   const UChar *array = getArrayStart();
   1016   const UChar *match = u_memrchr32(array + start, c, length);
   1017   if(match == NULL) {
   1018     return -1;
   1019   } else {
   1020     return (int32_t)(match - array);
   1021   }
   1022 }
   1023 
   1024 //========================================
   1025 // Write implementation
   1026 //========================================
   1027 
   1028 UnicodeString&
   1029 UnicodeString::findAndReplace(int32_t start,
   1030                   int32_t length,
   1031                   const UnicodeString& oldText,
   1032                   int32_t oldStart,
   1033                   int32_t oldLength,
   1034                   const UnicodeString& newText,
   1035                   int32_t newStart,
   1036                   int32_t newLength)
   1037 {
   1038   if(isBogus() || oldText.isBogus() || newText.isBogus()) {
   1039     return *this;
   1040   }
   1041 
   1042   pinIndices(start, length);
   1043   oldText.pinIndices(oldStart, oldLength);
   1044   newText.pinIndices(newStart, newLength);
   1045 
   1046   if(oldLength == 0) {
   1047     return *this;
   1048   }
   1049 
   1050   while(length > 0 && length >= oldLength) {
   1051     int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
   1052     if(pos < 0) {
   1053       // no more oldText's here: done
   1054       break;
   1055     } else {
   1056       // we found oldText, replace it by newText and go beyond it
   1057       replace(pos, oldLength, newText, newStart, newLength);
   1058       length -= pos + oldLength - start;
   1059       start = pos + newLength;
   1060     }
   1061   }
   1062 
   1063   return *this;
   1064 }
   1065 
   1066 
   1067 void
   1068 UnicodeString::setToBogus()
   1069 {
   1070   releaseArray();
   1071 
   1072   fShortLength = 0;
   1073   fUnion.fFields.fArray = 0;
   1074   fUnion.fFields.fCapacity = 0;
   1075   fFlags = kIsBogus;
   1076 }
   1077 
   1078 // turn a bogus string into an empty one
   1079 void
   1080 UnicodeString::unBogus() {
   1081   if(fFlags & kIsBogus) {
   1082     setToEmpty();
   1083   }
   1084 }
   1085 
   1086 // setTo() analogous to the readonly-aliasing constructor with the same signature
   1087 UnicodeString &
   1088 UnicodeString::setTo(UBool isTerminated,
   1089                      const UChar *text,
   1090                      int32_t textLength)
   1091 {
   1092   if(fFlags & kOpenGetBuffer) {
   1093     // do not modify a string that has an "open" getBuffer(minCapacity)
   1094     return *this;
   1095   }
   1096 
   1097   if(text == NULL) {
   1098     // treat as an empty string, do not alias
   1099     releaseArray();
   1100     setToEmpty();
   1101     return *this;
   1102   }
   1103 
   1104   if( textLength < -1 ||
   1105       (textLength == -1 && !isTerminated) ||
   1106       (textLength >= 0 && isTerminated && text[textLength] != 0)
   1107   ) {
   1108     setToBogus();
   1109     return *this;
   1110   }
   1111 
   1112   releaseArray();
   1113 
   1114   if(textLength == -1) {
   1115     // text is terminated, or else it would have failed the above test
   1116     textLength = u_strlen(text);
   1117   }
   1118   setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
   1119 
   1120   fFlags = kReadonlyAlias;
   1121   return *this;
   1122 }
   1123 
   1124 // setTo() analogous to the writable-aliasing constructor with the same signature
   1125 UnicodeString &
   1126 UnicodeString::setTo(UChar *buffer,
   1127                      int32_t buffLength,
   1128                      int32_t buffCapacity) {
   1129   if(fFlags & kOpenGetBuffer) {
   1130     // do not modify a string that has an "open" getBuffer(minCapacity)
   1131     return *this;
   1132   }
   1133 
   1134   if(buffer == NULL) {
   1135     // treat as an empty string, do not alias
   1136     releaseArray();
   1137     setToEmpty();
   1138     return *this;
   1139   }
   1140 
   1141   if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
   1142     setToBogus();
   1143     return *this;
   1144   } else if(buffLength == -1) {
   1145     // buffLength = u_strlen(buff); but do not look beyond buffCapacity
   1146     const UChar *p = buffer, *limit = buffer + buffCapacity;
   1147     while(p != limit && *p != 0) {
   1148       ++p;
   1149     }
   1150     buffLength = (int32_t)(p - buffer);
   1151   }
   1152 
   1153   releaseArray();
   1154 
   1155   setArray(buffer, buffLength, buffCapacity);
   1156   fFlags = kWritableAlias;
   1157   return *this;
   1158 }
   1159 
   1160 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
   1161   unBogus();
   1162   int32_t length = utf8.length();
   1163   int32_t capacity;
   1164   // The UTF-16 string will be at most as long as the UTF-8 string.
   1165   if(length <= US_STACKBUF_SIZE) {
   1166     capacity = US_STACKBUF_SIZE;
   1167   } else {
   1168     capacity = length + 1;  // +1 for the terminating NUL.
   1169   }
   1170   UChar *utf16 = getBuffer(capacity);
   1171   int32_t length16;
   1172   UErrorCode errorCode = U_ZERO_ERROR;
   1173   u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
   1174       utf8.data(), length,
   1175       0xfffd,  // Substitution character.
   1176       NULL,    // Don't care about number of substitutions.
   1177       &errorCode);
   1178   releaseBuffer(length16);
   1179   if(U_FAILURE(errorCode)) {
   1180     setToBogus();
   1181   }
   1182   return *this;
   1183 }
   1184 
   1185 UnicodeString&
   1186 UnicodeString::setCharAt(int32_t offset,
   1187              UChar c)
   1188 {
   1189   int32_t len = length();
   1190   if(cloneArrayIfNeeded() && len > 0) {
   1191     if(offset < 0) {
   1192       offset = 0;
   1193     } else if(offset >= len) {
   1194       offset = len - 1;
   1195     }
   1196 
   1197     getArrayStart()[offset] = c;
   1198   }
   1199   return *this;
   1200 }
   1201 
   1202 UnicodeString&
   1203 UnicodeString::doReplace( int32_t start,
   1204               int32_t length,
   1205               const UnicodeString& src,
   1206               int32_t srcStart,
   1207               int32_t srcLength)
   1208 {
   1209   if(!src.isBogus()) {
   1210     // pin the indices to legal values
   1211     src.pinIndices(srcStart, srcLength);
   1212 
   1213     // get the characters from src
   1214     // and replace the range in ourselves with them
   1215     return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
   1216   } else {
   1217     // remove the range
   1218     return doReplace(start, length, 0, 0, 0);
   1219   }
   1220 }
   1221 
   1222 UnicodeString&
   1223 UnicodeString::doReplace(int32_t start,
   1224              int32_t length,
   1225              const UChar *srcChars,
   1226              int32_t srcStart,
   1227              int32_t srcLength)
   1228 {
   1229   if(!isWritable()) {
   1230     return *this;
   1231   }
   1232 
   1233   int32_t oldLength = this->length();
   1234 
   1235   // optimize (read-only alias).remove(0, start) and .remove(start, end)
   1236   if((fFlags&kBufferIsReadonly) && srcLength == 0) {
   1237     if(start == 0) {
   1238       // remove prefix by adjusting the array pointer
   1239       pinIndex(length);
   1240       fUnion.fFields.fArray += length;
   1241       fUnion.fFields.fCapacity -= length;
   1242       setLength(oldLength - length);
   1243       return *this;
   1244     } else {
   1245       pinIndex(start);
   1246       if(length >= (oldLength - start)) {
   1247         // remove suffix by reducing the length (like truncate())
   1248         setLength(start);
   1249         fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
   1250         return *this;
   1251       }
   1252     }
   1253   }
   1254 
   1255   if(srcChars == 0) {
   1256     srcStart = srcLength = 0;
   1257   } else if(srcLength < 0) {
   1258     // get the srcLength if necessary
   1259     srcLength = u_strlen(srcChars + srcStart);
   1260   }
   1261 
   1262   // calculate the size of the string after the replace
   1263   int32_t newSize;
   1264 
   1265   // optimize append() onto a large-enough, owned string
   1266   if(start >= oldLength) {
   1267     newSize = oldLength + srcLength;
   1268     if(newSize <= getCapacity() && isBufferWritable()) {
   1269       us_arrayCopy(srcChars, srcStart, getArrayStart(), oldLength, srcLength);
   1270       setLength(newSize);
   1271       return *this;
   1272     } else {
   1273       // pin the indices to legal values
   1274       start = oldLength;
   1275       length = 0;
   1276     }
   1277   } else {
   1278     // pin the indices to legal values
   1279     pinIndices(start, length);
   1280 
   1281     newSize = oldLength - length + srcLength;
   1282   }
   1283 
   1284   // the following may change fArray but will not copy the current contents;
   1285   // therefore we need to keep the current fArray
   1286   UChar oldStackBuffer[US_STACKBUF_SIZE];
   1287   UChar *oldArray;
   1288   if((fFlags&kUsingStackBuffer) && (newSize > US_STACKBUF_SIZE)) {
   1289     // copy the stack buffer contents because it will be overwritten with
   1290     // fUnion.fFields values
   1291     u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
   1292     oldArray = oldStackBuffer;
   1293   } else {
   1294     oldArray = getArrayStart();
   1295   }
   1296 
   1297   // clone our array and allocate a bigger array if needed
   1298   int32_t *bufferToDelete = 0;
   1299   if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize,
   1300                          FALSE, &bufferToDelete)
   1301   ) {
   1302     return *this;
   1303   }
   1304 
   1305   // now do the replace
   1306 
   1307   UChar *newArray = getArrayStart();
   1308   if(newArray != oldArray) {
   1309     // if fArray changed, then we need to copy everything except what will change
   1310     us_arrayCopy(oldArray, 0, newArray, 0, start);
   1311     us_arrayCopy(oldArray, start + length,
   1312                  newArray, start + srcLength,
   1313                  oldLength - (start + length));
   1314   } else if(length != srcLength) {
   1315     // fArray did not change; copy only the portion that isn't changing, leaving a hole
   1316     us_arrayCopy(oldArray, start + length,
   1317                  newArray, start + srcLength,
   1318                  oldLength - (start + length));
   1319   }
   1320 
   1321   // now fill in the hole with the new string
   1322   us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
   1323 
   1324   setLength(newSize);
   1325 
   1326   // delayed delete in case srcChars == fArray when we started, and
   1327   // to keep oldArray alive for the above operations
   1328   if (bufferToDelete) {
   1329     uprv_free(bufferToDelete);
   1330   }
   1331 
   1332   return *this;
   1333 }
   1334 
   1335 /**
   1336  * Replaceable API
   1337  */
   1338 void
   1339 UnicodeString::handleReplaceBetween(int32_t start,
   1340                                     int32_t limit,
   1341                                     const UnicodeString& text) {
   1342     replaceBetween(start, limit, text);
   1343 }
   1344 
   1345 /**
   1346  * Replaceable API
   1347  */
   1348 void
   1349 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
   1350     if (limit <= start) {
   1351         return; // Nothing to do; avoid bogus malloc call
   1352     }
   1353     UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
   1354     // Check to make sure text is not null.
   1355     if (text != NULL) {
   1356 	    extractBetween(start, limit, text, 0);
   1357 	    insert(dest, text, 0, limit - start);
   1358 	    uprv_free(text);
   1359     }
   1360 }
   1361 
   1362 /**
   1363  * Replaceable API
   1364  *
   1365  * NOTE: This is for the Replaceable class.  There is no rep.cpp,
   1366  * so we implement this function here.
   1367  */
   1368 UBool Replaceable::hasMetaData() const {
   1369     return TRUE;
   1370 }
   1371 
   1372 /**
   1373  * Replaceable API
   1374  */
   1375 UBool UnicodeString::hasMetaData() const {
   1376     return FALSE;
   1377 }
   1378 
   1379 UnicodeString&
   1380 UnicodeString::doReverse(int32_t start, int32_t length) {
   1381   if(length <= 1 || !cloneArrayIfNeeded()) {
   1382     return *this;
   1383   }
   1384 
   1385   // pin the indices to legal values
   1386   pinIndices(start, length);
   1387   if(length <= 1) {  // pinIndices() might have shrunk the length
   1388     return *this;
   1389   }
   1390 
   1391   UChar *left = getArrayStart() + start;
   1392   UChar *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
   1393   UChar swap;
   1394   UBool hasSupplementary = FALSE;
   1395 
   1396   // Before the loop we know left<right because length>=2.
   1397   do {
   1398     hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
   1399     hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
   1400     *right-- = swap;
   1401   } while(left < right);
   1402   // Make sure to test the middle code unit of an odd-length string.
   1403   // Redundant if the length is even.
   1404   hasSupplementary |= (UBool)U16_IS_LEAD(*left);
   1405 
   1406   /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
   1407   if(hasSupplementary) {
   1408     UChar swap2;
   1409 
   1410     left = getArrayStart() + start;
   1411     right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
   1412     while(left < right) {
   1413       if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
   1414         *left++ = swap2;
   1415         *left++ = swap;
   1416       } else {
   1417         ++left;
   1418       }
   1419     }
   1420   }
   1421 
   1422   return *this;
   1423 }
   1424 
   1425 UBool
   1426 UnicodeString::padLeading(int32_t targetLength,
   1427                           UChar padChar)
   1428 {
   1429   int32_t oldLength = length();
   1430   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
   1431     return FALSE;
   1432   } else {
   1433     // move contents up by padding width
   1434     UChar *array = getArrayStart();
   1435     int32_t start = targetLength - oldLength;
   1436     us_arrayCopy(array, 0, array, start, oldLength);
   1437 
   1438     // fill in padding character
   1439     while(--start >= 0) {
   1440       array[start] = padChar;
   1441     }
   1442     setLength(targetLength);
   1443     return TRUE;
   1444   }
   1445 }
   1446 
   1447 UBool
   1448 UnicodeString::padTrailing(int32_t targetLength,
   1449                            UChar padChar)
   1450 {
   1451   int32_t oldLength = length();
   1452   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
   1453     return FALSE;
   1454   } else {
   1455     // fill in padding character
   1456     UChar *array = getArrayStart();
   1457     int32_t length = targetLength;
   1458     while(--length >= oldLength) {
   1459       array[length] = padChar;
   1460     }
   1461     setLength(targetLength);
   1462     return TRUE;
   1463   }
   1464 }
   1465 
   1466 //========================================
   1467 // Hashing
   1468 //========================================
   1469 int32_t
   1470 UnicodeString::doHashCode() const
   1471 {
   1472     /* Delegate hash computation to uhash.  This makes UnicodeString
   1473      * hashing consistent with UChar* hashing.  */
   1474     int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());
   1475     if (hashCode == kInvalidHashCode) {
   1476         hashCode = kEmptyHashCode;
   1477     }
   1478     return hashCode;
   1479 }
   1480 
   1481 //========================================
   1482 // External Buffer
   1483 //========================================
   1484 
   1485 UChar *
   1486 UnicodeString::getBuffer(int32_t minCapacity) {
   1487   if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
   1488     fFlags|=kOpenGetBuffer;
   1489     fShortLength=0;
   1490     return getArrayStart();
   1491   } else {
   1492     return 0;
   1493   }
   1494 }
   1495 
   1496 void
   1497 UnicodeString::releaseBuffer(int32_t newLength) {
   1498   if(fFlags&kOpenGetBuffer && newLength>=-1) {
   1499     // set the new fLength
   1500     int32_t capacity=getCapacity();
   1501     if(newLength==-1) {
   1502       // the new length is the string length, capped by fCapacity
   1503       const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
   1504       while(p<limit && *p!=0) {
   1505         ++p;
   1506       }
   1507       newLength=(int32_t)(p-array);
   1508     } else if(newLength>capacity) {
   1509       newLength=capacity;
   1510     }
   1511     setLength(newLength);
   1512     fFlags&=~kOpenGetBuffer;
   1513   }
   1514 }
   1515 
   1516 //========================================
   1517 // Miscellaneous
   1518 //========================================
   1519 UBool
   1520 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
   1521                                   int32_t growCapacity,
   1522                                   UBool doCopyArray,
   1523                                   int32_t **pBufferToDelete,
   1524                                   UBool forceClone) {
   1525   // default parameters need to be static, therefore
   1526   // the defaults are -1 to have convenience defaults
   1527   if(newCapacity == -1) {
   1528     newCapacity = getCapacity();
   1529   }
   1530 
   1531   // while a getBuffer(minCapacity) is "open",
   1532   // prevent any modifications of the string by returning FALSE here
   1533   // if the string is bogus, then only an assignment or similar can revive it
   1534   if(!isWritable()) {
   1535     return FALSE;
   1536   }
   1537 
   1538   /*
   1539    * We need to make a copy of the array if
   1540    * the buffer is read-only, or
   1541    * the buffer is refCounted (shared), and refCount>1, or
   1542    * the buffer is too small.
   1543    * Return FALSE if memory could not be allocated.
   1544    */
   1545   if(forceClone ||
   1546      fFlags & kBufferIsReadonly ||
   1547      (fFlags & kRefCounted && refCount() > 1) ||
   1548      newCapacity > getCapacity()
   1549   ) {
   1550     // check growCapacity for default value and use of the stack buffer
   1551     if(growCapacity == -1) {
   1552       growCapacity = newCapacity;
   1553     } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
   1554       growCapacity = US_STACKBUF_SIZE;
   1555     }
   1556 
   1557     // save old values
   1558     UChar oldStackBuffer[US_STACKBUF_SIZE];
   1559     UChar *oldArray;
   1560     uint8_t flags = fFlags;
   1561 
   1562     if(flags&kUsingStackBuffer) {
   1563       if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
   1564         // copy the stack buffer contents because it will be overwritten with
   1565         // fUnion.fFields values
   1566         us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
   1567         oldArray = oldStackBuffer;
   1568       } else {
   1569         oldArray = 0; // no need to copy from stack buffer to itself
   1570       }
   1571     } else {
   1572       oldArray = fUnion.fFields.fArray;
   1573     }
   1574 
   1575     // allocate a new array
   1576     if(allocate(growCapacity) ||
   1577        (newCapacity < growCapacity && allocate(newCapacity))
   1578     ) {
   1579       if(doCopyArray && oldArray != 0) {
   1580         // copy the contents
   1581         // do not copy more than what fits - it may be smaller than before
   1582         int32_t minLength = length();
   1583         newCapacity = getCapacity();
   1584         if(newCapacity < minLength) {
   1585           minLength = newCapacity;
   1586           setLength(minLength);
   1587         }
   1588         us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
   1589       } else {
   1590         fShortLength = 0;
   1591       }
   1592 
   1593       // release the old array
   1594       if(flags & kRefCounted) {
   1595         // the array is refCounted; decrement and release if 0
   1596         int32_t *pRefCount = ((int32_t *)oldArray - 1);
   1597         if(umtx_atomic_dec(pRefCount) == 0) {
   1598           if(pBufferToDelete == 0) {
   1599             uprv_free(pRefCount);
   1600           } else {
   1601             // the caller requested to delete it himself
   1602             *pBufferToDelete = pRefCount;
   1603           }
   1604         }
   1605       }
   1606     } else {
   1607       // not enough memory for growCapacity and not even for the smaller newCapacity
   1608       // reset the old values for setToBogus() to release the array
   1609       if(!(flags&kUsingStackBuffer)) {
   1610         fUnion.fFields.fArray = oldArray;
   1611       }
   1612       fFlags = flags;
   1613       setToBogus();
   1614       return FALSE;
   1615     }
   1616   }
   1617   return TRUE;
   1618 }
   1619 U_NAMESPACE_END
   1620 
   1621 #ifdef U_STATIC_IMPLEMENTATION
   1622 /*
   1623 This should never be called. It is defined here to make sure that the
   1624 virtual vector deleting destructor is defined within unistr.cpp.
   1625 The vector deleting destructor is already a part of UObject,
   1626 but defining it here makes sure that it is included with this object file.
   1627 This makes sure that static library dependencies are kept to a minimum.
   1628 */
   1629 static void uprv_UnicodeStringDummy(void) {
   1630     U_NAMESPACE_USE
   1631     delete [] (new UnicodeString[2]);
   1632 }
   1633 #endif
   1634