Home | History | Annotate | Download | only in common
      1 /*
      2 ******************************************************************************
      3 * Copyright (C) 1999-2010, International Business Machines Corporation and   *
      4 * others. All Rights Reserved.                                               *
      5 ******************************************************************************
      6 *
      7 * File unistr.cpp
      8 *
      9 * Modification History:
     10 *
     11 *   Date        Name        Description
     12 *   09/25/98    stephen     Creation.
     13 *   04/20/99    stephen     Overhauled per 4/16 code review.
     14 *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
     15 *   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
     16 *                           Replaceable.
     17 *   06/25/01    grhoten     Removed the dependency on iostream
     18 ******************************************************************************
     19 */
     20 
     21 #include "unicode/utypes.h"
     22 #include "unicode/putil.h"
     23 #include "cstring.h"
     24 #include "cmemory.h"
     25 #include "unicode/ustring.h"
     26 #include "unicode/unistr.h"
     27 #include "uhash.h"
     28 #include "ustr_imp.h"
     29 #include "umutex.h"
     30 
     31 #if 0
     32 
     33 #if U_IOSTREAM_SOURCE >= 199711
     34 #include <iostream>
     35 using namespace std;
     36 #elif U_IOSTREAM_SOURCE >= 198506
     37 #include <iostream.h>
     38 #endif
     39 
     40 //DEBUGGING
     41 void
     42 print(const UnicodeString& s,
     43       const char *name)
     44 {
     45   UChar c;
     46   cout << name << ":|";
     47   for(int i = 0; i < s.length(); ++i) {
     48     c = s[i];
     49     if(c>= 0x007E || c < 0x0020)
     50       cout << "[0x" << hex << s[i] << "]";
     51     else
     52       cout << (char) s[i];
     53   }
     54   cout << '|' << endl;
     55 }
     56 
     57 void
     58 print(const UChar *s,
     59       int32_t len,
     60       const char *name)
     61 {
     62   UChar c;
     63   cout << name << ":|";
     64   for(int i = 0; i < len; ++i) {
     65     c = s[i];
     66     if(c>= 0x007E || c < 0x0020)
     67       cout << "[0x" << hex << s[i] << "]";
     68     else
     69       cout << (char) s[i];
     70   }
     71   cout << '|' << endl;
     72 }
     73 // END DEBUGGING
     74 #endif
     75 
     76 // Local function definitions for now
     77 
     78 // need to copy areas that may overlap
     79 static
     80 inline void
     81 us_arrayCopy(const UChar *src, int32_t srcStart,
     82          UChar *dst, int32_t dstStart, int32_t count)
     83 {
     84   if(count>0) {
     85     uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
     86   }
     87 }
     88 
     89 // u_unescapeAt() callback to get a UChar from a UnicodeString
     90 U_CDECL_BEGIN
     91 static UChar U_CALLCONV
     92 UnicodeString_charAt(int32_t offset, void *context) {
     93     return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset);
     94 }
     95 U_CDECL_END
     96 
     97 U_NAMESPACE_BEGIN
     98 
     99 /* The Replaceable virtual destructor can't be defined in the header
    100    due to how AIX works with multiple definitions of virtual functions.
    101 */
    102 Replaceable::~Replaceable() {}
    103 Replaceable::Replaceable() {}
    104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
    105 
    106 UnicodeString U_EXPORT2
    107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
    108     return
    109         UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
    110             append(s1).
    111                 append(s2);
    112 }
    113 
    114 //========================================
    115 // Reference Counting functions, put at top of file so that optimizing compilers
    116 //                               have a chance to automatically inline.
    117 //========================================
    118 
    119 void
    120 UnicodeString::addRef()
    121 {  umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}
    122 
    123 int32_t
    124 UnicodeString::removeRef()
    125 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}
    126 
    127 int32_t
    128 UnicodeString::refCount() const
    129 {
    130     umtx_lock(NULL);
    131     // Note: without the lock to force a memory barrier, we might see a very
    132     //       stale value on some multi-processor systems.
    133     int32_t  count = *((int32_t *)fUnion.fFields.fArray - 1);
    134     umtx_unlock(NULL);
    135     return count;
    136  }
    137 
    138 void
    139 UnicodeString::releaseArray() {
    140   if((fFlags & kRefCounted) && removeRef() == 0) {
    141     uprv_free((int32_t *)fUnion.fFields.fArray - 1);
    142   }
    143 }
    144 
    145 
    146 
    147 //========================================
    148 // Constructors
    149 //========================================
    150 UnicodeString::UnicodeString()
    151   : fShortLength(0),
    152     fFlags(kShortString)
    153 {}
    154 
    155 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
    156   : fShortLength(0),
    157     fFlags(0)
    158 {
    159   if(count <= 0 || (uint32_t)c > 0x10ffff) {
    160     // just allocate and do not do anything else
    161     allocate(capacity);
    162   } else {
    163     // count > 0, allocate and fill the new string with count c's
    164     int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;
    165     if(capacity < length) {
    166       capacity = length;
    167     }
    168     if(allocate(capacity)) {
    169       UChar *array = getArrayStart();
    170       int32_t i = 0;
    171 
    172       // fill the new string with c
    173       if(unitCount == 1) {
    174         // fill with length UChars
    175         while(i < length) {
    176           array[i++] = (UChar)c;
    177         }
    178       } else {
    179         // get the code units for c
    180         UChar units[UTF_MAX_CHAR_LENGTH];
    181         UTF_APPEND_CHAR_UNSAFE(units, i, c);
    182 
    183         // now it must be i==unitCount
    184         i = 0;
    185 
    186         // for Unicode, unitCount can only be 1, 2, 3, or 4
    187         // 1 is handled above
    188         while(i < length) {
    189           int32_t unitIdx = 0;
    190           while(unitIdx < unitCount) {
    191             array[i++]=units[unitIdx++];
    192           }
    193         }
    194       }
    195     }
    196     setLength(length);
    197   }
    198 }
    199 
    200 UnicodeString::UnicodeString(UChar ch)
    201   : fShortLength(1),
    202     fFlags(kShortString)
    203 {
    204   fUnion.fStackBuffer[0] = ch;
    205 }
    206 
    207 UnicodeString::UnicodeString(UChar32 ch)
    208   : fShortLength(0),
    209     fFlags(kShortString)
    210 {
    211   int32_t i = 0;
    212   UBool isError = FALSE;
    213   U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
    214   fShortLength = (int8_t)i;
    215 }
    216 
    217 UnicodeString::UnicodeString(const UChar *text)
    218   : fShortLength(0),
    219     fFlags(kShortString)
    220 {
    221   doReplace(0, 0, text, 0, -1);
    222 }
    223 
    224 UnicodeString::UnicodeString(const UChar *text,
    225                              int32_t textLength)
    226   : fShortLength(0),
    227     fFlags(kShortString)
    228 {
    229   doReplace(0, 0, text, 0, textLength);
    230 }
    231 
    232 UnicodeString::UnicodeString(UBool isTerminated,
    233                              const UChar *text,
    234                              int32_t textLength)
    235   : fShortLength(0),
    236     fFlags(kReadonlyAlias)
    237 {
    238   if(text == NULL) {
    239     // treat as an empty string, do not alias
    240     setToEmpty();
    241   } else if(textLength < -1 ||
    242             (textLength == -1 && !isTerminated) ||
    243             (textLength >= 0 && isTerminated && text[textLength] != 0)
    244   ) {
    245     setToBogus();
    246   } else {
    247     if(textLength == -1) {
    248       // text is terminated, or else it would have failed the above test
    249       textLength = u_strlen(text);
    250     }
    251     setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
    252   }
    253 }
    254 
    255 UnicodeString::UnicodeString(UChar *buff,
    256                              int32_t buffLength,
    257                              int32_t buffCapacity)
    258   : fShortLength(0),
    259     fFlags(kWritableAlias)
    260 {
    261   if(buff == NULL) {
    262     // treat as an empty string, do not alias
    263     setToEmpty();
    264   } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
    265     setToBogus();
    266   } else {
    267     if(buffLength == -1) {
    268       // fLength = u_strlen(buff); but do not look beyond buffCapacity
    269       const UChar *p = buff, *limit = buff + buffCapacity;
    270       while(p != limit && *p != 0) {
    271         ++p;
    272       }
    273       buffLength = (int32_t)(p - buff);
    274     }
    275     setArray(buff, buffLength, buffCapacity);
    276   }
    277 }
    278 
    279 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
    280   : fShortLength(0),
    281     fFlags(kShortString)
    282 {
    283   if(src==NULL) {
    284     // treat as an empty string
    285   } else {
    286     if(length<0) {
    287       length=(int32_t)uprv_strlen(src);
    288     }
    289     if(cloneArrayIfNeeded(length, length, FALSE)) {
    290       u_charsToUChars(src, getArrayStart(), length);
    291       setLength(length);
    292     } else {
    293       setToBogus();
    294     }
    295   }
    296 }
    297 
    298 #if U_CHARSET_IS_UTF8
    299 
    300 UnicodeString::UnicodeString(const char *codepageData)
    301   : fShortLength(0),
    302     fFlags(kShortString) {
    303   if(codepageData != 0) {
    304     setToUTF8(codepageData);
    305   }
    306 }
    307 
    308 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
    309   : fShortLength(0),
    310     fFlags(kShortString) {
    311   // if there's nothing to convert, do nothing
    312   if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
    313     return;
    314   }
    315   if(dataLength == -1) {
    316     dataLength = (int32_t)uprv_strlen(codepageData);
    317   }
    318   setToUTF8(StringPiece(codepageData, dataLength));
    319 }
    320 
    321 // else see unistr_cnv.cpp
    322 #endif
    323 
    324 UnicodeString::UnicodeString(const UnicodeString& that)
    325   : Replaceable(),
    326     fShortLength(0),
    327     fFlags(kShortString)
    328 {
    329   copyFrom(that);
    330 }
    331 
    332 UnicodeString::UnicodeString(const UnicodeString& that,
    333                              int32_t srcStart)
    334   : Replaceable(),
    335     fShortLength(0),
    336     fFlags(kShortString)
    337 {
    338   setTo(that, srcStart);
    339 }
    340 
    341 UnicodeString::UnicodeString(const UnicodeString& that,
    342                              int32_t srcStart,
    343                              int32_t srcLength)
    344   : Replaceable(),
    345     fShortLength(0),
    346     fFlags(kShortString)
    347 {
    348   setTo(that, srcStart, srcLength);
    349 }
    350 
    351 // Replaceable base class clone() default implementation, does not clone
    352 Replaceable *
    353 Replaceable::clone() const {
    354   return NULL;
    355 }
    356 
    357 // UnicodeString overrides clone() with a real implementation
    358 Replaceable *
    359 UnicodeString::clone() const {
    360   return new UnicodeString(*this);
    361 }
    362 
    363 //========================================
    364 // array allocation
    365 //========================================
    366 
    367 UBool
    368 UnicodeString::allocate(int32_t capacity) {
    369   if(capacity <= US_STACKBUF_SIZE) {
    370     fFlags = kShortString;
    371   } else {
    372     // count bytes for the refCounter and the string capacity, and
    373     // round up to a multiple of 16; then divide by 4 and allocate int32_t's
    374     // to be safely aligned for the refCount
    375     // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
    376     int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
    377     int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
    378     if(array != 0) {
    379       // set initial refCount and point behind the refCount
    380       *array++ = 1;
    381 
    382       // have fArray point to the first UChar
    383       fUnion.fFields.fArray = (UChar *)array;
    384       fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
    385       fFlags = kLongString;
    386     } else {
    387       fShortLength = 0;
    388       fUnion.fFields.fArray = 0;
    389       fUnion.fFields.fCapacity = 0;
    390       fFlags = kIsBogus;
    391       return FALSE;
    392     }
    393   }
    394   return TRUE;
    395 }
    396 
    397 //========================================
    398 // Destructor
    399 //========================================
    400 UnicodeString::~UnicodeString()
    401 {
    402   releaseArray();
    403 }
    404 
    405 //========================================
    406 // Factory methods
    407 //========================================
    408 
    409 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
    410   UnicodeString result;
    411   result.setToUTF8(utf8);
    412   return result;
    413 }
    414 
    415 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
    416   UnicodeString result;
    417   int32_t capacity;
    418   // Most UTF-32 strings will be BMP-only and result in a same-length
    419   // UTF-16 string. We overestimate the capacity just slightly,
    420   // just in case there are a few supplementary characters.
    421   if(length <= US_STACKBUF_SIZE) {
    422     capacity = US_STACKBUF_SIZE;
    423   } else {
    424     capacity = length + (length >> 4) + 4;
    425   }
    426   do {
    427     UChar *utf16 = result.getBuffer(capacity);
    428     int32_t length16;
    429     UErrorCode errorCode = U_ZERO_ERROR;
    430     u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
    431         utf32, length,
    432         0xfffd,  // Substitution character.
    433         NULL,    // Don't care about number of substitutions.
    434         &errorCode);
    435     result.releaseBuffer(length16);
    436     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
    437       capacity = length16 + 1;  // +1 for the terminating NUL.
    438       continue;
    439     } else if(U_FAILURE(errorCode)) {
    440       result.setToBogus();
    441     }
    442     break;
    443   } while(TRUE);
    444   return result;
    445 }
    446 
    447 //========================================
    448 // Assignment
    449 //========================================
    450 
    451 UnicodeString &
    452 UnicodeString::operator=(const UnicodeString &src) {
    453   return copyFrom(src);
    454 }
    455 
    456 UnicodeString &
    457 UnicodeString::fastCopyFrom(const UnicodeString &src) {
    458   return copyFrom(src, TRUE);
    459 }
    460 
    461 UnicodeString &
    462 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
    463   // if assigning to ourselves, do nothing
    464   if(this == 0 || this == &src) {
    465     return *this;
    466   }
    467 
    468   // is the right side bogus?
    469   if(&src == 0 || src.isBogus()) {
    470     setToBogus();
    471     return *this;
    472   }
    473 
    474   // delete the current contents
    475   releaseArray();
    476 
    477   if(src.isEmpty()) {
    478     // empty string - use the stack buffer
    479     setToEmpty();
    480     return *this;
    481   }
    482 
    483   // we always copy the length
    484   int32_t srcLength = src.length();
    485   setLength(srcLength);
    486 
    487   // fLength>0 and not an "open" src.getBuffer(minCapacity)
    488   switch(src.fFlags) {
    489   case kShortString:
    490     // short string using the stack buffer, do the same
    491     fFlags = kShortString;
    492     uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
    493     break;
    494   case kLongString:
    495     // src uses a refCounted string buffer, use that buffer with refCount
    496     // src is const, use a cast - we don't really change it
    497     ((UnicodeString &)src).addRef();
    498     // copy all fields, share the reference-counted buffer
    499     fUnion.fFields.fArray = src.fUnion.fFields.fArray;
    500     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
    501     fFlags = src.fFlags;
    502     break;
    503   case kReadonlyAlias:
    504     if(fastCopy) {
    505       // src is a readonly alias, do the same
    506       // -> maintain the readonly alias as such
    507       fUnion.fFields.fArray = src.fUnion.fFields.fArray;
    508       fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
    509       fFlags = src.fFlags;
    510       break;
    511     }
    512     // else if(!fastCopy) fall through to case kWritableAlias
    513     // -> allocate a new buffer and copy the contents
    514   case kWritableAlias:
    515     // src is a writable alias; we make a copy of that instead
    516     if(allocate(srcLength)) {
    517       uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
    518       break;
    519     }
    520     // if there is not enough memory, then fall through to setting to bogus
    521   default:
    522     // if src is bogus, set ourselves to bogus
    523     // do not call setToBogus() here because fArray and fFlags are not consistent here
    524     fShortLength = 0;
    525     fUnion.fFields.fArray = 0;
    526     fUnion.fFields.fCapacity = 0;
    527     fFlags = kIsBogus;
    528     break;
    529   }
    530 
    531   return *this;
    532 }
    533 
    534 //========================================
    535 // Miscellaneous operations
    536 //========================================
    537 
    538 UnicodeString UnicodeString::unescape() const {
    539     UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
    540     const UChar *array = getBuffer();
    541     int32_t len = length();
    542     int32_t prev = 0;
    543     for (int32_t i=0;;) {
    544         if (i == len) {
    545             result.append(array, prev, len - prev);
    546             break;
    547         }
    548         if (array[i++] == 0x5C /*'\\'*/) {
    549             result.append(array, prev, (i - 1) - prev);
    550             UChar32 c = unescapeAt(i); // advances i
    551             if (c < 0) {
    552                 result.remove(); // return empty string
    553                 break; // invalid escape sequence
    554             }
    555             result.append(c);
    556             prev = i;
    557         }
    558     }
    559     return result;
    560 }
    561 
    562 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
    563     return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
    564 }
    565 
    566 //========================================
    567 // Read-only implementation
    568 //========================================
    569 int8_t
    570 UnicodeString::doCompare( int32_t start,
    571               int32_t length,
    572               const UChar *srcChars,
    573               int32_t srcStart,
    574               int32_t srcLength) const
    575 {
    576   // compare illegal string values
    577   // treat const UChar *srcChars==NULL as an empty string
    578   if(isBogus()) {
    579     return -1;
    580   }
    581 
    582   // pin indices to legal values
    583   pinIndices(start, length);
    584 
    585   if(srcChars == NULL) {
    586     srcStart = srcLength = 0;
    587   }
    588 
    589   // get the correct pointer
    590   const UChar *chars = getArrayStart();
    591 
    592   chars += start;
    593   srcChars += srcStart;
    594 
    595   int32_t minLength;
    596   int8_t lengthResult;
    597 
    598   // get the srcLength if necessary
    599   if(srcLength < 0) {
    600     srcLength = u_strlen(srcChars + srcStart);
    601   }
    602 
    603   // are we comparing different lengths?
    604   if(length != srcLength) {
    605     if(length < srcLength) {
    606       minLength = length;
    607       lengthResult = -1;
    608     } else {
    609       minLength = srcLength;
    610       lengthResult = 1;
    611     }
    612   } else {
    613     minLength = length;
    614     lengthResult = 0;
    615   }
    616 
    617   /*
    618    * note that uprv_memcmp() returns an int but we return an int8_t;
    619    * we need to take care not to truncate the result -
    620    * one way to do this is to right-shift the value to
    621    * move the sign bit into the lower 8 bits and making sure that this
    622    * does not become 0 itself
    623    */
    624 
    625   if(minLength > 0 && chars != srcChars) {
    626     int32_t result;
    627 
    628 #   if U_IS_BIG_ENDIAN
    629       // big-endian: byte comparison works
    630       result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
    631       if(result != 0) {
    632         return (int8_t)(result >> 15 | 1);
    633       }
    634 #   else
    635       // little-endian: compare UChar units
    636       do {
    637         result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
    638         if(result != 0) {
    639           return (int8_t)(result >> 15 | 1);
    640         }
    641       } while(--minLength > 0);
    642 #   endif
    643   }
    644   return lengthResult;
    645 }
    646 
    647 /* String compare in code point order - doCompare() compares in code unit order. */
    648 int8_t
    649 UnicodeString::doCompareCodePointOrder(int32_t start,
    650                                        int32_t length,
    651                                        const UChar *srcChars,
    652                                        int32_t srcStart,
    653                                        int32_t srcLength) const
    654 {
    655   // compare illegal string values
    656   // treat const UChar *srcChars==NULL as an empty string
    657   if(isBogus()) {
    658     return -1;
    659   }
    660 
    661   // pin indices to legal values
    662   pinIndices(start, length);
    663 
    664   if(srcChars == NULL) {
    665     srcStart = srcLength = 0;
    666   }
    667 
    668   int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
    669   /* translate the 32-bit result into an 8-bit one */
    670   if(diff!=0) {
    671     return (int8_t)(diff >> 15 | 1);
    672   } else {
    673     return 0;
    674   }
    675 }
    676 
    677 int32_t
    678 UnicodeString::getLength() const {
    679     return length();
    680 }
    681 
    682 UChar
    683 UnicodeString::getCharAt(int32_t offset) const {
    684   return charAt(offset);
    685 }
    686 
    687 UChar32
    688 UnicodeString::getChar32At(int32_t offset) const {
    689   return char32At(offset);
    690 }
    691 
    692 int32_t
    693 UnicodeString::countChar32(int32_t start, int32_t length) const {
    694   pinIndices(start, length);
    695   // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
    696   return u_countChar32(getArrayStart()+start, length);
    697 }
    698 
    699 UBool
    700 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
    701   pinIndices(start, length);
    702   // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
    703   return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
    704 }
    705 
    706 int32_t
    707 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
    708   // pin index
    709   int32_t len = length();
    710   if(index<0) {
    711     index=0;
    712   } else if(index>len) {
    713     index=len;
    714   }
    715 
    716   const UChar *array = getArrayStart();
    717   if(delta>0) {
    718     UTF_FWD_N(array, index, len, delta);
    719   } else {
    720     UTF_BACK_N(array, 0, index, -delta);
    721   }
    722 
    723   return index;
    724 }
    725 
    726 void
    727 UnicodeString::doExtract(int32_t start,
    728              int32_t length,
    729              UChar *dst,
    730              int32_t dstStart) const
    731 {
    732   // pin indices to legal values
    733   pinIndices(start, length);
    734 
    735   // do not copy anything if we alias dst itself
    736   const UChar *array = getArrayStart();
    737   if(array + start != dst + dstStart) {
    738     us_arrayCopy(array, start, dst, dstStart, length);
    739   }
    740 }
    741 
    742 int32_t
    743 UnicodeString::extract(UChar *dest, int32_t destCapacity,
    744                        UErrorCode &errorCode) const {
    745   int32_t len = length();
    746   if(U_SUCCESS(errorCode)) {
    747     if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
    748       errorCode=U_ILLEGAL_ARGUMENT_ERROR;
    749     } else {
    750       const UChar *array = getArrayStart();
    751       if(len>0 && len<=destCapacity && array!=dest) {
    752         uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
    753       }
    754       return u_terminateUChars(dest, destCapacity, len, &errorCode);
    755     }
    756   }
    757 
    758   return len;
    759 }
    760 
    761 int32_t
    762 UnicodeString::extract(int32_t start,
    763                        int32_t length,
    764                        char *target,
    765                        int32_t targetCapacity,
    766                        enum EInvariant) const
    767 {
    768   // if the arguments are illegal, then do nothing
    769   if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
    770     return 0;
    771   }
    772 
    773   // pin the indices to legal values
    774   pinIndices(start, length);
    775 
    776   if(length <= targetCapacity) {
    777     u_UCharsToChars(getArrayStart() + start, target, length);
    778   }
    779   UErrorCode status = U_ZERO_ERROR;
    780   return u_terminateChars(target, targetCapacity, length, &status);
    781 }
    782 
    783 UnicodeString
    784 UnicodeString::tempSubString(int32_t start, int32_t len) const {
    785   pinIndices(start, len);
    786   const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
    787   if(array==NULL) {
    788     array=fUnion.fStackBuffer;  // anything not NULL because that would make an empty string
    789     len=-2;  // bogus result string
    790   }
    791   return UnicodeString(FALSE, array + start, len);
    792 }
    793 
    794 int32_t
    795 UnicodeString::toUTF8(int32_t start, int32_t len,
    796                       char *target, int32_t capacity) const {
    797   pinIndices(start, len);
    798   int32_t length8;
    799   UErrorCode errorCode = U_ZERO_ERROR;
    800   u_strToUTF8WithSub(target, capacity, &length8,
    801                      getBuffer() + start, len,
    802                      0xFFFD,  // Standard substitution character.
    803                      NULL,    // Don't care about number of substitutions.
    804                      &errorCode);
    805   return length8;
    806 }
    807 
    808 #if U_CHARSET_IS_UTF8
    809 
    810 int32_t
    811 UnicodeString::extract(int32_t start, int32_t len,
    812                        char *target, uint32_t dstSize) const {
    813   // if the arguments are illegal, then do nothing
    814   if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
    815     return 0;
    816   }
    817   return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
    818 }
    819 
    820 // else see unistr_cnv.cpp
    821 #endif
    822 
    823 void
    824 UnicodeString::extractBetween(int32_t start,
    825                   int32_t limit,
    826                   UnicodeString& target) const {
    827   pinIndex(start);
    828   pinIndex(limit);
    829   doExtract(start, limit - start, target);
    830 }
    831 
    832 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
    833 // as many bytes as the source has UChars.
    834 // The "worst cases" are writing systems like Indic, Thai and CJK with
    835 // 3:1 bytes:UChars.
    836 void
    837 UnicodeString::toUTF8(ByteSink &sink) const {
    838   int32_t length16 = length();
    839   if(length16 != 0) {
    840     char stackBuffer[1024];
    841     int32_t capacity = (int32_t)sizeof(stackBuffer);
    842     UBool utf8IsOwned = FALSE;
    843     char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
    844                                       3*length16,
    845                                       stackBuffer, capacity,
    846                                       &capacity);
    847     int32_t length8 = 0;
    848     UErrorCode errorCode = U_ZERO_ERROR;
    849     u_strToUTF8WithSub(utf8, capacity, &length8,
    850                        getBuffer(), length16,
    851                        0xFFFD,  // Standard substitution character.
    852                        NULL,    // Don't care about number of substitutions.
    853                        &errorCode);
    854     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
    855       utf8 = (char *)uprv_malloc(length8);
    856       if(utf8 != NULL) {
    857         utf8IsOwned = TRUE;
    858         errorCode = U_ZERO_ERROR;
    859         u_strToUTF8WithSub(utf8, length8, &length8,
    860                            getBuffer(), length16,
    861                            0xFFFD,  // Standard substitution character.
    862                            NULL,    // Don't care about number of substitutions.
    863                            &errorCode);
    864       } else {
    865         errorCode = U_MEMORY_ALLOCATION_ERROR;
    866       }
    867     }
    868     if(U_SUCCESS(errorCode)) {
    869       sink.Append(utf8, length8);
    870     }
    871     if(utf8IsOwned) {
    872       uprv_free(utf8);
    873     }
    874   }
    875 }
    876 
    877 int32_t
    878 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
    879   int32_t length32=0;
    880   if(U_SUCCESS(errorCode)) {
    881     // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
    882     u_strToUTF32WithSub(utf32, capacity, &length32,
    883         getBuffer(), length(),
    884         0xfffd,  // Substitution character.
    885         NULL,    // Don't care about number of substitutions.
    886         &errorCode);
    887   }
    888   return length32;
    889 }
    890 
    891 int32_t
    892 UnicodeString::indexOf(const UChar *srcChars,
    893                int32_t srcStart,
    894                int32_t srcLength,
    895                int32_t start,
    896                int32_t length) const
    897 {
    898   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
    899     return -1;
    900   }
    901 
    902   // UnicodeString does not find empty substrings
    903   if(srcLength < 0 && srcChars[srcStart] == 0) {
    904     return -1;
    905   }
    906 
    907   // get the indices within bounds
    908   pinIndices(start, length);
    909 
    910   // find the first occurrence of the substring
    911   const UChar *array = getArrayStart();
    912   const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
    913   if(match == NULL) {
    914     return -1;
    915   } else {
    916     return (int32_t)(match - array);
    917   }
    918 }
    919 
    920 int32_t
    921 UnicodeString::doIndexOf(UChar c,
    922              int32_t start,
    923              int32_t length) const
    924 {
    925   // pin indices
    926   pinIndices(start, length);
    927 
    928   // find the first occurrence of c
    929   const UChar *array = getArrayStart();
    930   const UChar *match = u_memchr(array + start, c, length);
    931   if(match == NULL) {
    932     return -1;
    933   } else {
    934     return (int32_t)(match - array);
    935   }
    936 }
    937 
    938 int32_t
    939 UnicodeString::doIndexOf(UChar32 c,
    940                          int32_t start,
    941                          int32_t length) const {
    942   // pin indices
    943   pinIndices(start, length);
    944 
    945   // find the first occurrence of c
    946   const UChar *array = getArrayStart();
    947   const UChar *match = u_memchr32(array + start, c, length);
    948   if(match == NULL) {
    949     return -1;
    950   } else {
    951     return (int32_t)(match - array);
    952   }
    953 }
    954 
    955 int32_t
    956 UnicodeString::lastIndexOf(const UChar *srcChars,
    957                int32_t srcStart,
    958                int32_t srcLength,
    959                int32_t start,
    960                int32_t length) const
    961 {
    962   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
    963     return -1;
    964   }
    965 
    966   // UnicodeString does not find empty substrings
    967   if(srcLength < 0 && srcChars[srcStart] == 0) {
    968     return -1;
    969   }
    970 
    971   // get the indices within bounds
    972   pinIndices(start, length);
    973 
    974   // find the last occurrence of the substring
    975   const UChar *array = getArrayStart();
    976   const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
    977   if(match == NULL) {
    978     return -1;
    979   } else {
    980     return (int32_t)(match - array);
    981   }
    982 }
    983 
    984 int32_t
    985 UnicodeString::doLastIndexOf(UChar c,
    986                  int32_t start,
    987                  int32_t length) const
    988 {
    989   if(isBogus()) {
    990     return -1;
    991   }
    992 
    993   // pin indices
    994   pinIndices(start, length);
    995 
    996   // find the last occurrence of c
    997   const UChar *array = getArrayStart();
    998   const UChar *match = u_memrchr(array + start, c, length);
    999   if(match == NULL) {
   1000     return -1;
   1001   } else {
   1002     return (int32_t)(match - array);
   1003   }
   1004 }
   1005 
   1006 int32_t
   1007 UnicodeString::doLastIndexOf(UChar32 c,
   1008                              int32_t start,
   1009                              int32_t length) const {
   1010   // pin indices
   1011   pinIndices(start, length);
   1012 
   1013   // find the last occurrence of c
   1014   const UChar *array = getArrayStart();
   1015   const UChar *match = u_memrchr32(array + start, c, length);
   1016   if(match == NULL) {
   1017     return -1;
   1018   } else {
   1019     return (int32_t)(match - array);
   1020   }
   1021 }
   1022 
   1023 //========================================
   1024 // Write implementation
   1025 //========================================
   1026 
   1027 UnicodeString&
   1028 UnicodeString::findAndReplace(int32_t start,
   1029                   int32_t length,
   1030                   const UnicodeString& oldText,
   1031                   int32_t oldStart,
   1032                   int32_t oldLength,
   1033                   const UnicodeString& newText,
   1034                   int32_t newStart,
   1035                   int32_t newLength)
   1036 {
   1037   if(isBogus() || oldText.isBogus() || newText.isBogus()) {
   1038     return *this;
   1039   }
   1040 
   1041   pinIndices(start, length);
   1042   oldText.pinIndices(oldStart, oldLength);
   1043   newText.pinIndices(newStart, newLength);
   1044 
   1045   if(oldLength == 0) {
   1046     return *this;
   1047   }
   1048 
   1049   while(length > 0 && length >= oldLength) {
   1050     int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
   1051     if(pos < 0) {
   1052       // no more oldText's here: done
   1053       break;
   1054     } else {
   1055       // we found oldText, replace it by newText and go beyond it
   1056       replace(pos, oldLength, newText, newStart, newLength);
   1057       length -= pos + oldLength - start;
   1058       start = pos + newLength;
   1059     }
   1060   }
   1061 
   1062   return *this;
   1063 }
   1064 
   1065 
   1066 void
   1067 UnicodeString::setToBogus()
   1068 {
   1069   releaseArray();
   1070 
   1071   fShortLength = 0;
   1072   fUnion.fFields.fArray = 0;
   1073   fUnion.fFields.fCapacity = 0;
   1074   fFlags = kIsBogus;
   1075 }
   1076 
   1077 // turn a bogus string into an empty one
   1078 void
   1079 UnicodeString::unBogus() {
   1080   if(fFlags & kIsBogus) {
   1081     setToEmpty();
   1082   }
   1083 }
   1084 
   1085 // setTo() analogous to the readonly-aliasing constructor with the same signature
   1086 UnicodeString &
   1087 UnicodeString::setTo(UBool isTerminated,
   1088                      const UChar *text,
   1089                      int32_t textLength)
   1090 {
   1091   if(fFlags & kOpenGetBuffer) {
   1092     // do not modify a string that has an "open" getBuffer(minCapacity)
   1093     return *this;
   1094   }
   1095 
   1096   if(text == NULL) {
   1097     // treat as an empty string, do not alias
   1098     releaseArray();
   1099     setToEmpty();
   1100     return *this;
   1101   }
   1102 
   1103   if( textLength < -1 ||
   1104       (textLength == -1 && !isTerminated) ||
   1105       (textLength >= 0 && isTerminated && text[textLength] != 0)
   1106   ) {
   1107     setToBogus();
   1108     return *this;
   1109   }
   1110 
   1111   releaseArray();
   1112 
   1113   if(textLength == -1) {
   1114     // text is terminated, or else it would have failed the above test
   1115     textLength = u_strlen(text);
   1116   }
   1117   setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
   1118 
   1119   fFlags = kReadonlyAlias;
   1120   return *this;
   1121 }
   1122 
   1123 // setTo() analogous to the writable-aliasing constructor with the same signature
   1124 UnicodeString &
   1125 UnicodeString::setTo(UChar *buffer,
   1126                      int32_t buffLength,
   1127                      int32_t buffCapacity) {
   1128   if(fFlags & kOpenGetBuffer) {
   1129     // do not modify a string that has an "open" getBuffer(minCapacity)
   1130     return *this;
   1131   }
   1132 
   1133   if(buffer == NULL) {
   1134     // treat as an empty string, do not alias
   1135     releaseArray();
   1136     setToEmpty();
   1137     return *this;
   1138   }
   1139 
   1140   if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
   1141     setToBogus();
   1142     return *this;
   1143   } else if(buffLength == -1) {
   1144     // buffLength = u_strlen(buff); but do not look beyond buffCapacity
   1145     const UChar *p = buffer, *limit = buffer + buffCapacity;
   1146     while(p != limit && *p != 0) {
   1147       ++p;
   1148     }
   1149     buffLength = (int32_t)(p - buffer);
   1150   }
   1151 
   1152   releaseArray();
   1153 
   1154   setArray(buffer, buffLength, buffCapacity);
   1155   fFlags = kWritableAlias;
   1156   return *this;
   1157 }
   1158 
   1159 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
   1160   unBogus();
   1161   int32_t length = utf8.length();
   1162   int32_t capacity;
   1163   // The UTF-16 string will be at most as long as the UTF-8 string.
   1164   if(length <= US_STACKBUF_SIZE) {
   1165     capacity = US_STACKBUF_SIZE;
   1166   } else {
   1167     capacity = length + 1;  // +1 for the terminating NUL.
   1168   }
   1169   UChar *utf16 = getBuffer(capacity);
   1170   int32_t length16;
   1171   UErrorCode errorCode = U_ZERO_ERROR;
   1172   u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
   1173       utf8.data(), length,
   1174       0xfffd,  // Substitution character.
   1175       NULL,    // Don't care about number of substitutions.
   1176       &errorCode);
   1177   releaseBuffer(length16);
   1178   if(U_FAILURE(errorCode)) {
   1179     setToBogus();
   1180   }
   1181   return *this;
   1182 }
   1183 
   1184 UnicodeString&
   1185 UnicodeString::setCharAt(int32_t offset,
   1186              UChar c)
   1187 {
   1188   int32_t len = length();
   1189   if(cloneArrayIfNeeded() && len > 0) {
   1190     if(offset < 0) {
   1191       offset = 0;
   1192     } else if(offset >= len) {
   1193       offset = len - 1;
   1194     }
   1195 
   1196     getArrayStart()[offset] = c;
   1197   }
   1198   return *this;
   1199 }
   1200 
   1201 UnicodeString&
   1202 UnicodeString::doReplace( int32_t start,
   1203               int32_t length,
   1204               const UnicodeString& src,
   1205               int32_t srcStart,
   1206               int32_t srcLength)
   1207 {
   1208   if(!src.isBogus()) {
   1209     // pin the indices to legal values
   1210     src.pinIndices(srcStart, srcLength);
   1211 
   1212     // get the characters from src
   1213     // and replace the range in ourselves with them
   1214     return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
   1215   } else {
   1216     // remove the range
   1217     return doReplace(start, length, 0, 0, 0);
   1218   }
   1219 }
   1220 
   1221 UnicodeString&
   1222 UnicodeString::doReplace(int32_t start,
   1223              int32_t length,
   1224              const UChar *srcChars,
   1225              int32_t srcStart,
   1226              int32_t srcLength)
   1227 {
   1228   if(!isWritable()) {
   1229     return *this;
   1230   }
   1231 
   1232   int32_t oldLength = this->length();
   1233 
   1234   // optimize (read-only alias).remove(0, start) and .remove(start, end)
   1235   if((fFlags&kBufferIsReadonly) && srcLength == 0) {
   1236     if(start == 0) {
   1237       // remove prefix by adjusting the array pointer
   1238       pinIndex(length);
   1239       fUnion.fFields.fArray += length;
   1240       fUnion.fFields.fCapacity -= length;
   1241       setLength(oldLength - length);
   1242       return *this;
   1243     } else {
   1244       pinIndex(start);
   1245       if(length >= (oldLength - start)) {
   1246         // remove suffix by reducing the length (like truncate())
   1247         setLength(start);
   1248         fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
   1249         return *this;
   1250       }
   1251     }
   1252   }
   1253 
   1254   if(srcChars == 0) {
   1255     srcStart = srcLength = 0;
   1256   } else if(srcLength < 0) {
   1257     // get the srcLength if necessary
   1258     srcLength = u_strlen(srcChars + srcStart);
   1259   }
   1260 
   1261   // calculate the size of the string after the replace
   1262   int32_t newSize;
   1263 
   1264   // optimize append() onto a large-enough, owned string
   1265   if(start >= oldLength) {
   1266     newSize = oldLength + srcLength;
   1267     if(newSize <= getCapacity() && isBufferWritable()) {
   1268       us_arrayCopy(srcChars, srcStart, getArrayStart(), oldLength, srcLength);
   1269       setLength(newSize);
   1270       return *this;
   1271     } else {
   1272       // pin the indices to legal values
   1273       start = oldLength;
   1274       length = 0;
   1275     }
   1276   } else {
   1277     // pin the indices to legal values
   1278     pinIndices(start, length);
   1279 
   1280     newSize = oldLength - length + srcLength;
   1281   }
   1282 
   1283   // the following may change fArray but will not copy the current contents;
   1284   // therefore we need to keep the current fArray
   1285   UChar oldStackBuffer[US_STACKBUF_SIZE];
   1286   UChar *oldArray;
   1287   if((fFlags&kUsingStackBuffer) && (newSize > US_STACKBUF_SIZE)) {
   1288     // copy the stack buffer contents because it will be overwritten with
   1289     // fUnion.fFields values
   1290     u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
   1291     oldArray = oldStackBuffer;
   1292   } else {
   1293     oldArray = getArrayStart();
   1294   }
   1295 
   1296   // clone our array and allocate a bigger array if needed
   1297   int32_t *bufferToDelete = 0;
   1298   if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize,
   1299                          FALSE, &bufferToDelete)
   1300   ) {
   1301     return *this;
   1302   }
   1303 
   1304   // now do the replace
   1305 
   1306   UChar *newArray = getArrayStart();
   1307   if(newArray != oldArray) {
   1308     // if fArray changed, then we need to copy everything except what will change
   1309     us_arrayCopy(oldArray, 0, newArray, 0, start);
   1310     us_arrayCopy(oldArray, start + length,
   1311                  newArray, start + srcLength,
   1312                  oldLength - (start + length));
   1313   } else if(length != srcLength) {
   1314     // fArray did not change; copy only the portion that isn't changing, leaving a hole
   1315     us_arrayCopy(oldArray, start + length,
   1316                  newArray, start + srcLength,
   1317                  oldLength - (start + length));
   1318   }
   1319 
   1320   // now fill in the hole with the new string
   1321   us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
   1322 
   1323   setLength(newSize);
   1324 
   1325   // delayed delete in case srcChars == fArray when we started, and
   1326   // to keep oldArray alive for the above operations
   1327   if (bufferToDelete) {
   1328     uprv_free(bufferToDelete);
   1329   }
   1330 
   1331   return *this;
   1332 }
   1333 
   1334 /**
   1335  * Replaceable API
   1336  */
   1337 void
   1338 UnicodeString::handleReplaceBetween(int32_t start,
   1339                                     int32_t limit,
   1340                                     const UnicodeString& text) {
   1341     replaceBetween(start, limit, text);
   1342 }
   1343 
   1344 /**
   1345  * Replaceable API
   1346  */
   1347 void
   1348 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
   1349     if (limit <= start) {
   1350         return; // Nothing to do; avoid bogus malloc call
   1351     }
   1352     UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
   1353     // Check to make sure text is not null.
   1354     if (text != NULL) {
   1355 	    extractBetween(start, limit, text, 0);
   1356 	    insert(dest, text, 0, limit - start);
   1357 	    uprv_free(text);
   1358     }
   1359 }
   1360 
   1361 /**
   1362  * Replaceable API
   1363  *
   1364  * NOTE: This is for the Replaceable class.  There is no rep.cpp,
   1365  * so we implement this function here.
   1366  */
   1367 UBool Replaceable::hasMetaData() const {
   1368     return TRUE;
   1369 }
   1370 
   1371 /**
   1372  * Replaceable API
   1373  */
   1374 UBool UnicodeString::hasMetaData() const {
   1375     return FALSE;
   1376 }
   1377 
   1378 UnicodeString&
   1379 UnicodeString::doReverse(int32_t start,
   1380              int32_t length)
   1381 {
   1382   if(this->length() <= 1 || !cloneArrayIfNeeded()) {
   1383     return *this;
   1384   }
   1385 
   1386   // pin the indices to legal values
   1387   pinIndices(start, length);
   1388 
   1389   UChar *left = getArrayStart() + start;
   1390   UChar *right = left + length;
   1391   UChar swap;
   1392   UBool hasSupplementary = FALSE;
   1393 
   1394   while(left < --right) {
   1395     hasSupplementary |= (UBool)UTF_IS_LEAD(swap = *left);
   1396     hasSupplementary |= (UBool)UTF_IS_LEAD(*left++ = *right);
   1397     *right = swap;
   1398   }
   1399 
   1400   /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
   1401   if(hasSupplementary) {
   1402     UChar swap2;
   1403 
   1404     left = getArrayStart() + start;
   1405     right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
   1406     while(left < right) {
   1407       if(UTF_IS_TRAIL(swap = *left) && UTF_IS_LEAD(swap2 = *(left + 1))) {
   1408         *left++ = swap2;
   1409         *left++ = swap;
   1410       } else {
   1411         ++left;
   1412       }
   1413     }
   1414   }
   1415 
   1416   return *this;
   1417 }
   1418 
   1419 UBool
   1420 UnicodeString::padLeading(int32_t targetLength,
   1421                           UChar padChar)
   1422 {
   1423   int32_t oldLength = length();
   1424   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
   1425     return FALSE;
   1426   } else {
   1427     // move contents up by padding width
   1428     UChar *array = getArrayStart();
   1429     int32_t start = targetLength - oldLength;
   1430     us_arrayCopy(array, 0, array, start, oldLength);
   1431 
   1432     // fill in padding character
   1433     while(--start >= 0) {
   1434       array[start] = padChar;
   1435     }
   1436     setLength(targetLength);
   1437     return TRUE;
   1438   }
   1439 }
   1440 
   1441 UBool
   1442 UnicodeString::padTrailing(int32_t targetLength,
   1443                            UChar padChar)
   1444 {
   1445   int32_t oldLength = length();
   1446   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
   1447     return FALSE;
   1448   } else {
   1449     // fill in padding character
   1450     UChar *array = getArrayStart();
   1451     int32_t length = targetLength;
   1452     while(--length >= oldLength) {
   1453       array[length] = padChar;
   1454     }
   1455     setLength(targetLength);
   1456     return TRUE;
   1457   }
   1458 }
   1459 
   1460 //========================================
   1461 // Hashing
   1462 //========================================
   1463 int32_t
   1464 UnicodeString::doHashCode() const
   1465 {
   1466     /* Delegate hash computation to uhash.  This makes UnicodeString
   1467      * hashing consistent with UChar* hashing.  */
   1468     int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());
   1469     if (hashCode == kInvalidHashCode) {
   1470         hashCode = kEmptyHashCode;
   1471     }
   1472     return hashCode;
   1473 }
   1474 
   1475 //========================================
   1476 // External Buffer
   1477 //========================================
   1478 
   1479 UChar *
   1480 UnicodeString::getBuffer(int32_t minCapacity) {
   1481   if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
   1482     fFlags|=kOpenGetBuffer;
   1483     fShortLength=0;
   1484     return getArrayStart();
   1485   } else {
   1486     return 0;
   1487   }
   1488 }
   1489 
   1490 void
   1491 UnicodeString::releaseBuffer(int32_t newLength) {
   1492   if(fFlags&kOpenGetBuffer && newLength>=-1) {
   1493     // set the new fLength
   1494     int32_t capacity=getCapacity();
   1495     if(newLength==-1) {
   1496       // the new length is the string length, capped by fCapacity
   1497       const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
   1498       while(p<limit && *p!=0) {
   1499         ++p;
   1500       }
   1501       newLength=(int32_t)(p-array);
   1502     } else if(newLength>capacity) {
   1503       newLength=capacity;
   1504     }
   1505     setLength(newLength);
   1506     fFlags&=~kOpenGetBuffer;
   1507   }
   1508 }
   1509 
   1510 //========================================
   1511 // Miscellaneous
   1512 //========================================
   1513 UBool
   1514 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
   1515                                   int32_t growCapacity,
   1516                                   UBool doCopyArray,
   1517                                   int32_t **pBufferToDelete,
   1518                                   UBool forceClone) {
   1519   // default parameters need to be static, therefore
   1520   // the defaults are -1 to have convenience defaults
   1521   if(newCapacity == -1) {
   1522     newCapacity = getCapacity();
   1523   }
   1524 
   1525   // while a getBuffer(minCapacity) is "open",
   1526   // prevent any modifications of the string by returning FALSE here
   1527   // if the string is bogus, then only an assignment or similar can revive it
   1528   if(!isWritable()) {
   1529     return FALSE;
   1530   }
   1531 
   1532   /*
   1533    * We need to make a copy of the array if
   1534    * the buffer is read-only, or
   1535    * the buffer is refCounted (shared), and refCount>1, or
   1536    * the buffer is too small.
   1537    * Return FALSE if memory could not be allocated.
   1538    */
   1539   if(forceClone ||
   1540      fFlags & kBufferIsReadonly ||
   1541      fFlags & kRefCounted && refCount() > 1 ||
   1542      newCapacity > getCapacity()
   1543   ) {
   1544     // check growCapacity for default value and use of the stack buffer
   1545     if(growCapacity == -1) {
   1546       growCapacity = newCapacity;
   1547     } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
   1548       growCapacity = US_STACKBUF_SIZE;
   1549     }
   1550 
   1551     // save old values
   1552     UChar oldStackBuffer[US_STACKBUF_SIZE];
   1553     UChar *oldArray;
   1554     uint8_t flags = fFlags;
   1555 
   1556     if(flags&kUsingStackBuffer) {
   1557       if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
   1558         // copy the stack buffer contents because it will be overwritten with
   1559         // fUnion.fFields values
   1560         us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
   1561         oldArray = oldStackBuffer;
   1562       } else {
   1563         oldArray = 0; // no need to copy from stack buffer to itself
   1564       }
   1565     } else {
   1566       oldArray = fUnion.fFields.fArray;
   1567     }
   1568 
   1569     // allocate a new array
   1570     if(allocate(growCapacity) ||
   1571        newCapacity < growCapacity && allocate(newCapacity)
   1572     ) {
   1573       if(doCopyArray && oldArray != 0) {
   1574         // copy the contents
   1575         // do not copy more than what fits - it may be smaller than before
   1576         int32_t minLength = length();
   1577         newCapacity = getCapacity();
   1578         if(newCapacity < minLength) {
   1579           minLength = newCapacity;
   1580           setLength(minLength);
   1581         }
   1582         us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
   1583       } else {
   1584         fShortLength = 0;
   1585       }
   1586 
   1587       // release the old array
   1588       if(flags & kRefCounted) {
   1589         // the array is refCounted; decrement and release if 0
   1590         int32_t *pRefCount = ((int32_t *)oldArray - 1);
   1591         if(umtx_atomic_dec(pRefCount) == 0) {
   1592           if(pBufferToDelete == 0) {
   1593             uprv_free(pRefCount);
   1594           } else {
   1595             // the caller requested to delete it himself
   1596             *pBufferToDelete = pRefCount;
   1597           }
   1598         }
   1599       }
   1600     } else {
   1601       // not enough memory for growCapacity and not even for the smaller newCapacity
   1602       // reset the old values for setToBogus() to release the array
   1603       if(!(flags&kUsingStackBuffer)) {
   1604         fUnion.fFields.fArray = oldArray;
   1605       }
   1606       fFlags = flags;
   1607       setToBogus();
   1608       return FALSE;
   1609     }
   1610   }
   1611   return TRUE;
   1612 }
   1613 U_NAMESPACE_END
   1614 
   1615 #ifdef U_STATIC_IMPLEMENTATION
   1616 /*
   1617 This should never be called. It is defined here to make sure that the
   1618 virtual vector deleting destructor is defined within unistr.cpp.
   1619 The vector deleting destructor is already a part of UObject,
   1620 but defining it here makes sure that it is included with this object file.
   1621 This makes sure that static library dependencies are kept to a minimum.
   1622 */
   1623 static void uprv_UnicodeStringDummy(void) {
   1624     U_NAMESPACE_USE
   1625     delete [] (new UnicodeString[2]);
   1626 }
   1627 #endif
   1628