1 /* 2 ****************************************************************************** 3 * Copyright (C) 1999-2010, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 * 7 * File unistr.cpp 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 09/25/98 stephen Creation. 13 * 04/20/99 stephen Overhauled per 4/16 code review. 14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX 15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from 16 * Replaceable. 17 * 06/25/01 grhoten Removed the dependency on iostream 18 ****************************************************************************** 19 */ 20 21 #include "unicode/utypes.h" 22 #include "unicode/putil.h" 23 #include "cstring.h" 24 #include "cmemory.h" 25 #include "unicode/ustring.h" 26 #include "unicode/unistr.h" 27 #include "uhash.h" 28 #include "ustr_imp.h" 29 #include "umutex.h" 30 31 #if 0 32 33 #if U_IOSTREAM_SOURCE >= 199711 34 #include <iostream> 35 using namespace std; 36 #elif U_IOSTREAM_SOURCE >= 198506 37 #include <iostream.h> 38 #endif 39 40 //DEBUGGING 41 void 42 print(const UnicodeString& s, 43 const char *name) 44 { 45 UChar c; 46 cout << name << ":|"; 47 for(int i = 0; i < s.length(); ++i) { 48 c = s[i]; 49 if(c>= 0x007E || c < 0x0020) 50 cout << "[0x" << hex << s[i] << "]"; 51 else 52 cout << (char) s[i]; 53 } 54 cout << '|' << endl; 55 } 56 57 void 58 print(const UChar *s, 59 int32_t len, 60 const char *name) 61 { 62 UChar c; 63 cout << name << ":|"; 64 for(int i = 0; i < len; ++i) { 65 c = s[i]; 66 if(c>= 0x007E || c < 0x0020) 67 cout << "[0x" << hex << s[i] << "]"; 68 else 69 cout << (char) s[i]; 70 } 71 cout << '|' << endl; 72 } 73 // END DEBUGGING 74 #endif 75 76 // Local function definitions for now 77 78 // need to copy areas that may overlap 79 static 80 inline void 81 us_arrayCopy(const UChar *src, int32_t srcStart, 82 UChar *dst, int32_t dstStart, int32_t count) 83 { 84 if(count>0) { 85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src))); 86 } 87 } 88 89 // u_unescapeAt() callback to get a UChar from a UnicodeString 90 U_CDECL_BEGIN 91 static UChar U_CALLCONV 92 UnicodeString_charAt(int32_t offset, void *context) { 93 return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset); 94 } 95 U_CDECL_END 96 97 U_NAMESPACE_BEGIN 98 99 /* The Replaceable virtual destructor can't be defined in the header 100 due to how AIX works with multiple definitions of virtual functions. 101 */ 102 Replaceable::~Replaceable() {} 103 Replaceable::Replaceable() {} 104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString) 105 106 UnicodeString U_EXPORT2 107 operator+ (const UnicodeString &s1, const UnicodeString &s2) { 108 return 109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0). 110 append(s1). 111 append(s2); 112 } 113 114 //======================================== 115 // Reference Counting functions, put at top of file so that optimizing compilers 116 // have a chance to automatically inline. 117 //======================================== 118 119 void 120 UnicodeString::addRef() 121 { umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);} 122 123 int32_t 124 UnicodeString::removeRef() 125 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);} 126 127 int32_t 128 UnicodeString::refCount() const 129 { 130 umtx_lock(NULL); 131 // Note: without the lock to force a memory barrier, we might see a very 132 // stale value on some multi-processor systems. 133 int32_t count = *((int32_t *)fUnion.fFields.fArray - 1); 134 umtx_unlock(NULL); 135 return count; 136 } 137 138 void 139 UnicodeString::releaseArray() { 140 if((fFlags & kRefCounted) && removeRef() == 0) { 141 uprv_free((int32_t *)fUnion.fFields.fArray - 1); 142 } 143 } 144 145 146 147 //======================================== 148 // Constructors 149 //======================================== 150 UnicodeString::UnicodeString() 151 : fShortLength(0), 152 fFlags(kShortString) 153 {} 154 155 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) 156 : fShortLength(0), 157 fFlags(0) 158 { 159 if(count <= 0 || (uint32_t)c > 0x10ffff) { 160 // just allocate and do not do anything else 161 allocate(capacity); 162 } else { 163 // count > 0, allocate and fill the new string with count c's 164 int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount; 165 if(capacity < length) { 166 capacity = length; 167 } 168 if(allocate(capacity)) { 169 UChar *array = getArrayStart(); 170 int32_t i = 0; 171 172 // fill the new string with c 173 if(unitCount == 1) { 174 // fill with length UChars 175 while(i < length) { 176 array[i++] = (UChar)c; 177 } 178 } else { 179 // get the code units for c 180 UChar units[UTF_MAX_CHAR_LENGTH]; 181 UTF_APPEND_CHAR_UNSAFE(units, i, c); 182 183 // now it must be i==unitCount 184 i = 0; 185 186 // for Unicode, unitCount can only be 1, 2, 3, or 4 187 // 1 is handled above 188 while(i < length) { 189 int32_t unitIdx = 0; 190 while(unitIdx < unitCount) { 191 array[i++]=units[unitIdx++]; 192 } 193 } 194 } 195 } 196 setLength(length); 197 } 198 } 199 200 UnicodeString::UnicodeString(UChar ch) 201 : fShortLength(1), 202 fFlags(kShortString) 203 { 204 fUnion.fStackBuffer[0] = ch; 205 } 206 207 UnicodeString::UnicodeString(UChar32 ch) 208 : fShortLength(0), 209 fFlags(kShortString) 210 { 211 int32_t i = 0; 212 UBool isError = FALSE; 213 U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError); 214 fShortLength = (int8_t)i; 215 } 216 217 UnicodeString::UnicodeString(const UChar *text) 218 : fShortLength(0), 219 fFlags(kShortString) 220 { 221 doReplace(0, 0, text, 0, -1); 222 } 223 224 UnicodeString::UnicodeString(const UChar *text, 225 int32_t textLength) 226 : fShortLength(0), 227 fFlags(kShortString) 228 { 229 doReplace(0, 0, text, 0, textLength); 230 } 231 232 UnicodeString::UnicodeString(UBool isTerminated, 233 const UChar *text, 234 int32_t textLength) 235 : fShortLength(0), 236 fFlags(kReadonlyAlias) 237 { 238 if(text == NULL) { 239 // treat as an empty string, do not alias 240 setToEmpty(); 241 } else if(textLength < -1 || 242 (textLength == -1 && !isTerminated) || 243 (textLength >= 0 && isTerminated && text[textLength] != 0) 244 ) { 245 setToBogus(); 246 } else { 247 if(textLength == -1) { 248 // text is terminated, or else it would have failed the above test 249 textLength = u_strlen(text); 250 } 251 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); 252 } 253 } 254 255 UnicodeString::UnicodeString(UChar *buff, 256 int32_t buffLength, 257 int32_t buffCapacity) 258 : fShortLength(0), 259 fFlags(kWritableAlias) 260 { 261 if(buff == NULL) { 262 // treat as an empty string, do not alias 263 setToEmpty(); 264 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { 265 setToBogus(); 266 } else { 267 if(buffLength == -1) { 268 // fLength = u_strlen(buff); but do not look beyond buffCapacity 269 const UChar *p = buff, *limit = buff + buffCapacity; 270 while(p != limit && *p != 0) { 271 ++p; 272 } 273 buffLength = (int32_t)(p - buff); 274 } 275 setArray(buff, buffLength, buffCapacity); 276 } 277 } 278 279 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) 280 : fShortLength(0), 281 fFlags(kShortString) 282 { 283 if(src==NULL) { 284 // treat as an empty string 285 } else { 286 if(length<0) { 287 length=(int32_t)uprv_strlen(src); 288 } 289 if(cloneArrayIfNeeded(length, length, FALSE)) { 290 u_charsToUChars(src, getArrayStart(), length); 291 setLength(length); 292 } else { 293 setToBogus(); 294 } 295 } 296 } 297 298 #if U_CHARSET_IS_UTF8 299 300 UnicodeString::UnicodeString(const char *codepageData) 301 : fShortLength(0), 302 fFlags(kShortString) { 303 if(codepageData != 0) { 304 setToUTF8(codepageData); 305 } 306 } 307 308 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) 309 : fShortLength(0), 310 fFlags(kShortString) { 311 // if there's nothing to convert, do nothing 312 if(codepageData == 0 || dataLength == 0 || dataLength < -1) { 313 return; 314 } 315 if(dataLength == -1) { 316 dataLength = (int32_t)uprv_strlen(codepageData); 317 } 318 setToUTF8(StringPiece(codepageData, dataLength)); 319 } 320 321 // else see unistr_cnv.cpp 322 #endif 323 324 UnicodeString::UnicodeString(const UnicodeString& that) 325 : Replaceable(), 326 fShortLength(0), 327 fFlags(kShortString) 328 { 329 copyFrom(that); 330 } 331 332 UnicodeString::UnicodeString(const UnicodeString& that, 333 int32_t srcStart) 334 : Replaceable(), 335 fShortLength(0), 336 fFlags(kShortString) 337 { 338 setTo(that, srcStart); 339 } 340 341 UnicodeString::UnicodeString(const UnicodeString& that, 342 int32_t srcStart, 343 int32_t srcLength) 344 : Replaceable(), 345 fShortLength(0), 346 fFlags(kShortString) 347 { 348 setTo(that, srcStart, srcLength); 349 } 350 351 // Replaceable base class clone() default implementation, does not clone 352 Replaceable * 353 Replaceable::clone() const { 354 return NULL; 355 } 356 357 // UnicodeString overrides clone() with a real implementation 358 Replaceable * 359 UnicodeString::clone() const { 360 return new UnicodeString(*this); 361 } 362 363 //======================================== 364 // array allocation 365 //======================================== 366 367 UBool 368 UnicodeString::allocate(int32_t capacity) { 369 if(capacity <= US_STACKBUF_SIZE) { 370 fFlags = kShortString; 371 } else { 372 // count bytes for the refCounter and the string capacity, and 373 // round up to a multiple of 16; then divide by 4 and allocate int32_t's 374 // to be safely aligned for the refCount 375 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer() 376 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2); 377 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words ); 378 if(array != 0) { 379 // set initial refCount and point behind the refCount 380 *array++ = 1; 381 382 // have fArray point to the first UChar 383 fUnion.fFields.fArray = (UChar *)array; 384 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR)); 385 fFlags = kLongString; 386 } else { 387 fShortLength = 0; 388 fUnion.fFields.fArray = 0; 389 fUnion.fFields.fCapacity = 0; 390 fFlags = kIsBogus; 391 return FALSE; 392 } 393 } 394 return TRUE; 395 } 396 397 //======================================== 398 // Destructor 399 //======================================== 400 UnicodeString::~UnicodeString() 401 { 402 releaseArray(); 403 } 404 405 //======================================== 406 // Factory methods 407 //======================================== 408 409 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) { 410 UnicodeString result; 411 result.setToUTF8(utf8); 412 return result; 413 } 414 415 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) { 416 UnicodeString result; 417 int32_t capacity; 418 // Most UTF-32 strings will be BMP-only and result in a same-length 419 // UTF-16 string. We overestimate the capacity just slightly, 420 // just in case there are a few supplementary characters. 421 if(length <= US_STACKBUF_SIZE) { 422 capacity = US_STACKBUF_SIZE; 423 } else { 424 capacity = length + (length >> 4) + 4; 425 } 426 do { 427 UChar *utf16 = result.getBuffer(capacity); 428 int32_t length16; 429 UErrorCode errorCode = U_ZERO_ERROR; 430 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16, 431 utf32, length, 432 0xfffd, // Substitution character. 433 NULL, // Don't care about number of substitutions. 434 &errorCode); 435 result.releaseBuffer(length16); 436 if(errorCode == U_BUFFER_OVERFLOW_ERROR) { 437 capacity = length16 + 1; // +1 for the terminating NUL. 438 continue; 439 } else if(U_FAILURE(errorCode)) { 440 result.setToBogus(); 441 } 442 break; 443 } while(TRUE); 444 return result; 445 } 446 447 //======================================== 448 // Assignment 449 //======================================== 450 451 UnicodeString & 452 UnicodeString::operator=(const UnicodeString &src) { 453 return copyFrom(src); 454 } 455 456 UnicodeString & 457 UnicodeString::fastCopyFrom(const UnicodeString &src) { 458 return copyFrom(src, TRUE); 459 } 460 461 UnicodeString & 462 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { 463 // if assigning to ourselves, do nothing 464 if(this == 0 || this == &src) { 465 return *this; 466 } 467 468 // is the right side bogus? 469 if(&src == 0 || src.isBogus()) { 470 setToBogus(); 471 return *this; 472 } 473 474 // delete the current contents 475 releaseArray(); 476 477 if(src.isEmpty()) { 478 // empty string - use the stack buffer 479 setToEmpty(); 480 return *this; 481 } 482 483 // we always copy the length 484 int32_t srcLength = src.length(); 485 setLength(srcLength); 486 487 // fLength>0 and not an "open" src.getBuffer(minCapacity) 488 switch(src.fFlags) { 489 case kShortString: 490 // short string using the stack buffer, do the same 491 fFlags = kShortString; 492 uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR); 493 break; 494 case kLongString: 495 // src uses a refCounted string buffer, use that buffer with refCount 496 // src is const, use a cast - we don't really change it 497 ((UnicodeString &)src).addRef(); 498 // copy all fields, share the reference-counted buffer 499 fUnion.fFields.fArray = src.fUnion.fFields.fArray; 500 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; 501 fFlags = src.fFlags; 502 break; 503 case kReadonlyAlias: 504 if(fastCopy) { 505 // src is a readonly alias, do the same 506 // -> maintain the readonly alias as such 507 fUnion.fFields.fArray = src.fUnion.fFields.fArray; 508 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; 509 fFlags = src.fFlags; 510 break; 511 } 512 // else if(!fastCopy) fall through to case kWritableAlias 513 // -> allocate a new buffer and copy the contents 514 case kWritableAlias: 515 // src is a writable alias; we make a copy of that instead 516 if(allocate(srcLength)) { 517 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR); 518 break; 519 } 520 // if there is not enough memory, then fall through to setting to bogus 521 default: 522 // if src is bogus, set ourselves to bogus 523 // do not call setToBogus() here because fArray and fFlags are not consistent here 524 fShortLength = 0; 525 fUnion.fFields.fArray = 0; 526 fUnion.fFields.fCapacity = 0; 527 fFlags = kIsBogus; 528 break; 529 } 530 531 return *this; 532 } 533 534 //======================================== 535 // Miscellaneous operations 536 //======================================== 537 538 UnicodeString UnicodeString::unescape() const { 539 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity 540 const UChar *array = getBuffer(); 541 int32_t len = length(); 542 int32_t prev = 0; 543 for (int32_t i=0;;) { 544 if (i == len) { 545 result.append(array, prev, len - prev); 546 break; 547 } 548 if (array[i++] == 0x5C /*'\\'*/) { 549 result.append(array, prev, (i - 1) - prev); 550 UChar32 c = unescapeAt(i); // advances i 551 if (c < 0) { 552 result.remove(); // return empty string 553 break; // invalid escape sequence 554 } 555 result.append(c); 556 prev = i; 557 } 558 } 559 return result; 560 } 561 562 UChar32 UnicodeString::unescapeAt(int32_t &offset) const { 563 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this); 564 } 565 566 //======================================== 567 // Read-only implementation 568 //======================================== 569 int8_t 570 UnicodeString::doCompare( int32_t start, 571 int32_t length, 572 const UChar *srcChars, 573 int32_t srcStart, 574 int32_t srcLength) const 575 { 576 // compare illegal string values 577 // treat const UChar *srcChars==NULL as an empty string 578 if(isBogus()) { 579 return -1; 580 } 581 582 // pin indices to legal values 583 pinIndices(start, length); 584 585 if(srcChars == NULL) { 586 srcStart = srcLength = 0; 587 } 588 589 // get the correct pointer 590 const UChar *chars = getArrayStart(); 591 592 chars += start; 593 srcChars += srcStart; 594 595 int32_t minLength; 596 int8_t lengthResult; 597 598 // get the srcLength if necessary 599 if(srcLength < 0) { 600 srcLength = u_strlen(srcChars + srcStart); 601 } 602 603 // are we comparing different lengths? 604 if(length != srcLength) { 605 if(length < srcLength) { 606 minLength = length; 607 lengthResult = -1; 608 } else { 609 minLength = srcLength; 610 lengthResult = 1; 611 } 612 } else { 613 minLength = length; 614 lengthResult = 0; 615 } 616 617 /* 618 * note that uprv_memcmp() returns an int but we return an int8_t; 619 * we need to take care not to truncate the result - 620 * one way to do this is to right-shift the value to 621 * move the sign bit into the lower 8 bits and making sure that this 622 * does not become 0 itself 623 */ 624 625 if(minLength > 0 && chars != srcChars) { 626 int32_t result; 627 628 # if U_IS_BIG_ENDIAN 629 // big-endian: byte comparison works 630 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar)); 631 if(result != 0) { 632 return (int8_t)(result >> 15 | 1); 633 } 634 # else 635 // little-endian: compare UChar units 636 do { 637 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++)); 638 if(result != 0) { 639 return (int8_t)(result >> 15 | 1); 640 } 641 } while(--minLength > 0); 642 # endif 643 } 644 return lengthResult; 645 } 646 647 /* String compare in code point order - doCompare() compares in code unit order. */ 648 int8_t 649 UnicodeString::doCompareCodePointOrder(int32_t start, 650 int32_t length, 651 const UChar *srcChars, 652 int32_t srcStart, 653 int32_t srcLength) const 654 { 655 // compare illegal string values 656 // treat const UChar *srcChars==NULL as an empty string 657 if(isBogus()) { 658 return -1; 659 } 660 661 // pin indices to legal values 662 pinIndices(start, length); 663 664 if(srcChars == NULL) { 665 srcStart = srcLength = 0; 666 } 667 668 int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + srcStart, srcLength, FALSE, TRUE); 669 /* translate the 32-bit result into an 8-bit one */ 670 if(diff!=0) { 671 return (int8_t)(diff >> 15 | 1); 672 } else { 673 return 0; 674 } 675 } 676 677 int32_t 678 UnicodeString::getLength() const { 679 return length(); 680 } 681 682 UChar 683 UnicodeString::getCharAt(int32_t offset) const { 684 return charAt(offset); 685 } 686 687 UChar32 688 UnicodeString::getChar32At(int32_t offset) const { 689 return char32At(offset); 690 } 691 692 int32_t 693 UnicodeString::countChar32(int32_t start, int32_t length) const { 694 pinIndices(start, length); 695 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL 696 return u_countChar32(getArrayStart()+start, length); 697 } 698 699 UBool 700 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const { 701 pinIndices(start, length); 702 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL 703 return u_strHasMoreChar32Than(getArrayStart()+start, length, number); 704 } 705 706 int32_t 707 UnicodeString::moveIndex32(int32_t index, int32_t delta) const { 708 // pin index 709 int32_t len = length(); 710 if(index<0) { 711 index=0; 712 } else if(index>len) { 713 index=len; 714 } 715 716 const UChar *array = getArrayStart(); 717 if(delta>0) { 718 UTF_FWD_N(array, index, len, delta); 719 } else { 720 UTF_BACK_N(array, 0, index, -delta); 721 } 722 723 return index; 724 } 725 726 void 727 UnicodeString::doExtract(int32_t start, 728 int32_t length, 729 UChar *dst, 730 int32_t dstStart) const 731 { 732 // pin indices to legal values 733 pinIndices(start, length); 734 735 // do not copy anything if we alias dst itself 736 const UChar *array = getArrayStart(); 737 if(array + start != dst + dstStart) { 738 us_arrayCopy(array, start, dst, dstStart, length); 739 } 740 } 741 742 int32_t 743 UnicodeString::extract(UChar *dest, int32_t destCapacity, 744 UErrorCode &errorCode) const { 745 int32_t len = length(); 746 if(U_SUCCESS(errorCode)) { 747 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { 748 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 749 } else { 750 const UChar *array = getArrayStart(); 751 if(len>0 && len<=destCapacity && array!=dest) { 752 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR); 753 } 754 return u_terminateUChars(dest, destCapacity, len, &errorCode); 755 } 756 } 757 758 return len; 759 } 760 761 int32_t 762 UnicodeString::extract(int32_t start, 763 int32_t length, 764 char *target, 765 int32_t targetCapacity, 766 enum EInvariant) const 767 { 768 // if the arguments are illegal, then do nothing 769 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) { 770 return 0; 771 } 772 773 // pin the indices to legal values 774 pinIndices(start, length); 775 776 if(length <= targetCapacity) { 777 u_UCharsToChars(getArrayStart() + start, target, length); 778 } 779 UErrorCode status = U_ZERO_ERROR; 780 return u_terminateChars(target, targetCapacity, length, &status); 781 } 782 783 UnicodeString 784 UnicodeString::tempSubString(int32_t start, int32_t len) const { 785 pinIndices(start, len); 786 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer 787 if(array==NULL) { 788 array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string 789 len=-2; // bogus result string 790 } 791 return UnicodeString(FALSE, array + start, len); 792 } 793 794 int32_t 795 UnicodeString::toUTF8(int32_t start, int32_t len, 796 char *target, int32_t capacity) const { 797 pinIndices(start, len); 798 int32_t length8; 799 UErrorCode errorCode = U_ZERO_ERROR; 800 u_strToUTF8WithSub(target, capacity, &length8, 801 getBuffer() + start, len, 802 0xFFFD, // Standard substitution character. 803 NULL, // Don't care about number of substitutions. 804 &errorCode); 805 return length8; 806 } 807 808 #if U_CHARSET_IS_UTF8 809 810 int32_t 811 UnicodeString::extract(int32_t start, int32_t len, 812 char *target, uint32_t dstSize) const { 813 // if the arguments are illegal, then do nothing 814 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) { 815 return 0; 816 } 817 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff); 818 } 819 820 // else see unistr_cnv.cpp 821 #endif 822 823 void 824 UnicodeString::extractBetween(int32_t start, 825 int32_t limit, 826 UnicodeString& target) const { 827 pinIndex(start); 828 pinIndex(limit); 829 doExtract(start, limit - start, target); 830 } 831 832 // When converting from UTF-16 to UTF-8, the result will have at most 3 times 833 // as many bytes as the source has UChars. 834 // The "worst cases" are writing systems like Indic, Thai and CJK with 835 // 3:1 bytes:UChars. 836 void 837 UnicodeString::toUTF8(ByteSink &sink) const { 838 int32_t length16 = length(); 839 if(length16 != 0) { 840 char stackBuffer[1024]; 841 int32_t capacity = (int32_t)sizeof(stackBuffer); 842 UBool utf8IsOwned = FALSE; 843 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity, 844 3*length16, 845 stackBuffer, capacity, 846 &capacity); 847 int32_t length8 = 0; 848 UErrorCode errorCode = U_ZERO_ERROR; 849 u_strToUTF8WithSub(utf8, capacity, &length8, 850 getBuffer(), length16, 851 0xFFFD, // Standard substitution character. 852 NULL, // Don't care about number of substitutions. 853 &errorCode); 854 if(errorCode == U_BUFFER_OVERFLOW_ERROR) { 855 utf8 = (char *)uprv_malloc(length8); 856 if(utf8 != NULL) { 857 utf8IsOwned = TRUE; 858 errorCode = U_ZERO_ERROR; 859 u_strToUTF8WithSub(utf8, length8, &length8, 860 getBuffer(), length16, 861 0xFFFD, // Standard substitution character. 862 NULL, // Don't care about number of substitutions. 863 &errorCode); 864 } else { 865 errorCode = U_MEMORY_ALLOCATION_ERROR; 866 } 867 } 868 if(U_SUCCESS(errorCode)) { 869 sink.Append(utf8, length8); 870 sink.Flush(); 871 } 872 if(utf8IsOwned) { 873 uprv_free(utf8); 874 } 875 } 876 } 877 878 int32_t 879 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const { 880 int32_t length32=0; 881 if(U_SUCCESS(errorCode)) { 882 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments. 883 u_strToUTF32WithSub(utf32, capacity, &length32, 884 getBuffer(), length(), 885 0xfffd, // Substitution character. 886 NULL, // Don't care about number of substitutions. 887 &errorCode); 888 } 889 return length32; 890 } 891 892 int32_t 893 UnicodeString::indexOf(const UChar *srcChars, 894 int32_t srcStart, 895 int32_t srcLength, 896 int32_t start, 897 int32_t length) const 898 { 899 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { 900 return -1; 901 } 902 903 // UnicodeString does not find empty substrings 904 if(srcLength < 0 && srcChars[srcStart] == 0) { 905 return -1; 906 } 907 908 // get the indices within bounds 909 pinIndices(start, length); 910 911 // find the first occurrence of the substring 912 const UChar *array = getArrayStart(); 913 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength); 914 if(match == NULL) { 915 return -1; 916 } else { 917 return (int32_t)(match - array); 918 } 919 } 920 921 int32_t 922 UnicodeString::doIndexOf(UChar c, 923 int32_t start, 924 int32_t length) const 925 { 926 // pin indices 927 pinIndices(start, length); 928 929 // find the first occurrence of c 930 const UChar *array = getArrayStart(); 931 const UChar *match = u_memchr(array + start, c, length); 932 if(match == NULL) { 933 return -1; 934 } else { 935 return (int32_t)(match - array); 936 } 937 } 938 939 int32_t 940 UnicodeString::doIndexOf(UChar32 c, 941 int32_t start, 942 int32_t length) const { 943 // pin indices 944 pinIndices(start, length); 945 946 // find the first occurrence of c 947 const UChar *array = getArrayStart(); 948 const UChar *match = u_memchr32(array + start, c, length); 949 if(match == NULL) { 950 return -1; 951 } else { 952 return (int32_t)(match - array); 953 } 954 } 955 956 int32_t 957 UnicodeString::lastIndexOf(const UChar *srcChars, 958 int32_t srcStart, 959 int32_t srcLength, 960 int32_t start, 961 int32_t length) const 962 { 963 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { 964 return -1; 965 } 966 967 // UnicodeString does not find empty substrings 968 if(srcLength < 0 && srcChars[srcStart] == 0) { 969 return -1; 970 } 971 972 // get the indices within bounds 973 pinIndices(start, length); 974 975 // find the last occurrence of the substring 976 const UChar *array = getArrayStart(); 977 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength); 978 if(match == NULL) { 979 return -1; 980 } else { 981 return (int32_t)(match - array); 982 } 983 } 984 985 int32_t 986 UnicodeString::doLastIndexOf(UChar c, 987 int32_t start, 988 int32_t length) const 989 { 990 if(isBogus()) { 991 return -1; 992 } 993 994 // pin indices 995 pinIndices(start, length); 996 997 // find the last occurrence of c 998 const UChar *array = getArrayStart(); 999 const UChar *match = u_memrchr(array + start, c, length); 1000 if(match == NULL) { 1001 return -1; 1002 } else { 1003 return (int32_t)(match - array); 1004 } 1005 } 1006 1007 int32_t 1008 UnicodeString::doLastIndexOf(UChar32 c, 1009 int32_t start, 1010 int32_t length) const { 1011 // pin indices 1012 pinIndices(start, length); 1013 1014 // find the last occurrence of c 1015 const UChar *array = getArrayStart(); 1016 const UChar *match = u_memrchr32(array + start, c, length); 1017 if(match == NULL) { 1018 return -1; 1019 } else { 1020 return (int32_t)(match - array); 1021 } 1022 } 1023 1024 //======================================== 1025 // Write implementation 1026 //======================================== 1027 1028 UnicodeString& 1029 UnicodeString::findAndReplace(int32_t start, 1030 int32_t length, 1031 const UnicodeString& oldText, 1032 int32_t oldStart, 1033 int32_t oldLength, 1034 const UnicodeString& newText, 1035 int32_t newStart, 1036 int32_t newLength) 1037 { 1038 if(isBogus() || oldText.isBogus() || newText.isBogus()) { 1039 return *this; 1040 } 1041 1042 pinIndices(start, length); 1043 oldText.pinIndices(oldStart, oldLength); 1044 newText.pinIndices(newStart, newLength); 1045 1046 if(oldLength == 0) { 1047 return *this; 1048 } 1049 1050 while(length > 0 && length >= oldLength) { 1051 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length); 1052 if(pos < 0) { 1053 // no more oldText's here: done 1054 break; 1055 } else { 1056 // we found oldText, replace it by newText and go beyond it 1057 replace(pos, oldLength, newText, newStart, newLength); 1058 length -= pos + oldLength - start; 1059 start = pos + newLength; 1060 } 1061 } 1062 1063 return *this; 1064 } 1065 1066 1067 void 1068 UnicodeString::setToBogus() 1069 { 1070 releaseArray(); 1071 1072 fShortLength = 0; 1073 fUnion.fFields.fArray = 0; 1074 fUnion.fFields.fCapacity = 0; 1075 fFlags = kIsBogus; 1076 } 1077 1078 // turn a bogus string into an empty one 1079 void 1080 UnicodeString::unBogus() { 1081 if(fFlags & kIsBogus) { 1082 setToEmpty(); 1083 } 1084 } 1085 1086 // setTo() analogous to the readonly-aliasing constructor with the same signature 1087 UnicodeString & 1088 UnicodeString::setTo(UBool isTerminated, 1089 const UChar *text, 1090 int32_t textLength) 1091 { 1092 if(fFlags & kOpenGetBuffer) { 1093 // do not modify a string that has an "open" getBuffer(minCapacity) 1094 return *this; 1095 } 1096 1097 if(text == NULL) { 1098 // treat as an empty string, do not alias 1099 releaseArray(); 1100 setToEmpty(); 1101 return *this; 1102 } 1103 1104 if( textLength < -1 || 1105 (textLength == -1 && !isTerminated) || 1106 (textLength >= 0 && isTerminated && text[textLength] != 0) 1107 ) { 1108 setToBogus(); 1109 return *this; 1110 } 1111 1112 releaseArray(); 1113 1114 if(textLength == -1) { 1115 // text is terminated, or else it would have failed the above test 1116 textLength = u_strlen(text); 1117 } 1118 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); 1119 1120 fFlags = kReadonlyAlias; 1121 return *this; 1122 } 1123 1124 // setTo() analogous to the writable-aliasing constructor with the same signature 1125 UnicodeString & 1126 UnicodeString::setTo(UChar *buffer, 1127 int32_t buffLength, 1128 int32_t buffCapacity) { 1129 if(fFlags & kOpenGetBuffer) { 1130 // do not modify a string that has an "open" getBuffer(minCapacity) 1131 return *this; 1132 } 1133 1134 if(buffer == NULL) { 1135 // treat as an empty string, do not alias 1136 releaseArray(); 1137 setToEmpty(); 1138 return *this; 1139 } 1140 1141 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { 1142 setToBogus(); 1143 return *this; 1144 } else if(buffLength == -1) { 1145 // buffLength = u_strlen(buff); but do not look beyond buffCapacity 1146 const UChar *p = buffer, *limit = buffer + buffCapacity; 1147 while(p != limit && *p != 0) { 1148 ++p; 1149 } 1150 buffLength = (int32_t)(p - buffer); 1151 } 1152 1153 releaseArray(); 1154 1155 setArray(buffer, buffLength, buffCapacity); 1156 fFlags = kWritableAlias; 1157 return *this; 1158 } 1159 1160 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) { 1161 unBogus(); 1162 int32_t length = utf8.length(); 1163 int32_t capacity; 1164 // The UTF-16 string will be at most as long as the UTF-8 string. 1165 if(length <= US_STACKBUF_SIZE) { 1166 capacity = US_STACKBUF_SIZE; 1167 } else { 1168 capacity = length + 1; // +1 for the terminating NUL. 1169 } 1170 UChar *utf16 = getBuffer(capacity); 1171 int32_t length16; 1172 UErrorCode errorCode = U_ZERO_ERROR; 1173 u_strFromUTF8WithSub(utf16, getCapacity(), &length16, 1174 utf8.data(), length, 1175 0xfffd, // Substitution character. 1176 NULL, // Don't care about number of substitutions. 1177 &errorCode); 1178 releaseBuffer(length16); 1179 if(U_FAILURE(errorCode)) { 1180 setToBogus(); 1181 } 1182 return *this; 1183 } 1184 1185 UnicodeString& 1186 UnicodeString::setCharAt(int32_t offset, 1187 UChar c) 1188 { 1189 int32_t len = length(); 1190 if(cloneArrayIfNeeded() && len > 0) { 1191 if(offset < 0) { 1192 offset = 0; 1193 } else if(offset >= len) { 1194 offset = len - 1; 1195 } 1196 1197 getArrayStart()[offset] = c; 1198 } 1199 return *this; 1200 } 1201 1202 UnicodeString& 1203 UnicodeString::doReplace( int32_t start, 1204 int32_t length, 1205 const UnicodeString& src, 1206 int32_t srcStart, 1207 int32_t srcLength) 1208 { 1209 if(!src.isBogus()) { 1210 // pin the indices to legal values 1211 src.pinIndices(srcStart, srcLength); 1212 1213 // get the characters from src 1214 // and replace the range in ourselves with them 1215 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength); 1216 } else { 1217 // remove the range 1218 return doReplace(start, length, 0, 0, 0); 1219 } 1220 } 1221 1222 UnicodeString& 1223 UnicodeString::doReplace(int32_t start, 1224 int32_t length, 1225 const UChar *srcChars, 1226 int32_t srcStart, 1227 int32_t srcLength) 1228 { 1229 if(!isWritable()) { 1230 return *this; 1231 } 1232 1233 int32_t oldLength = this->length(); 1234 1235 // optimize (read-only alias).remove(0, start) and .remove(start, end) 1236 if((fFlags&kBufferIsReadonly) && srcLength == 0) { 1237 if(start == 0) { 1238 // remove prefix by adjusting the array pointer 1239 pinIndex(length); 1240 fUnion.fFields.fArray += length; 1241 fUnion.fFields.fCapacity -= length; 1242 setLength(oldLength - length); 1243 return *this; 1244 } else { 1245 pinIndex(start); 1246 if(length >= (oldLength - start)) { 1247 // remove suffix by reducing the length (like truncate()) 1248 setLength(start); 1249 fUnion.fFields.fCapacity = start; // not NUL-terminated any more 1250 return *this; 1251 } 1252 } 1253 } 1254 1255 if(srcChars == 0) { 1256 srcStart = srcLength = 0; 1257 } else if(srcLength < 0) { 1258 // get the srcLength if necessary 1259 srcLength = u_strlen(srcChars + srcStart); 1260 } 1261 1262 // calculate the size of the string after the replace 1263 int32_t newSize; 1264 1265 // optimize append() onto a large-enough, owned string 1266 if(start >= oldLength) { 1267 newSize = oldLength + srcLength; 1268 if(newSize <= getCapacity() && isBufferWritable()) { 1269 us_arrayCopy(srcChars, srcStart, getArrayStart(), oldLength, srcLength); 1270 setLength(newSize); 1271 return *this; 1272 } else { 1273 // pin the indices to legal values 1274 start = oldLength; 1275 length = 0; 1276 } 1277 } else { 1278 // pin the indices to legal values 1279 pinIndices(start, length); 1280 1281 newSize = oldLength - length + srcLength; 1282 } 1283 1284 // the following may change fArray but will not copy the current contents; 1285 // therefore we need to keep the current fArray 1286 UChar oldStackBuffer[US_STACKBUF_SIZE]; 1287 UChar *oldArray; 1288 if((fFlags&kUsingStackBuffer) && (newSize > US_STACKBUF_SIZE)) { 1289 // copy the stack buffer contents because it will be overwritten with 1290 // fUnion.fFields values 1291 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength); 1292 oldArray = oldStackBuffer; 1293 } else { 1294 oldArray = getArrayStart(); 1295 } 1296 1297 // clone our array and allocate a bigger array if needed 1298 int32_t *bufferToDelete = 0; 1299 if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize, 1300 FALSE, &bufferToDelete) 1301 ) { 1302 return *this; 1303 } 1304 1305 // now do the replace 1306 1307 UChar *newArray = getArrayStart(); 1308 if(newArray != oldArray) { 1309 // if fArray changed, then we need to copy everything except what will change 1310 us_arrayCopy(oldArray, 0, newArray, 0, start); 1311 us_arrayCopy(oldArray, start + length, 1312 newArray, start + srcLength, 1313 oldLength - (start + length)); 1314 } else if(length != srcLength) { 1315 // fArray did not change; copy only the portion that isn't changing, leaving a hole 1316 us_arrayCopy(oldArray, start + length, 1317 newArray, start + srcLength, 1318 oldLength - (start + length)); 1319 } 1320 1321 // now fill in the hole with the new string 1322 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength); 1323 1324 setLength(newSize); 1325 1326 // delayed delete in case srcChars == fArray when we started, and 1327 // to keep oldArray alive for the above operations 1328 if (bufferToDelete) { 1329 uprv_free(bufferToDelete); 1330 } 1331 1332 return *this; 1333 } 1334 1335 /** 1336 * Replaceable API 1337 */ 1338 void 1339 UnicodeString::handleReplaceBetween(int32_t start, 1340 int32_t limit, 1341 const UnicodeString& text) { 1342 replaceBetween(start, limit, text); 1343 } 1344 1345 /** 1346 * Replaceable API 1347 */ 1348 void 1349 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) { 1350 if (limit <= start) { 1351 return; // Nothing to do; avoid bogus malloc call 1352 } 1353 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) ); 1354 // Check to make sure text is not null. 1355 if (text != NULL) { 1356 extractBetween(start, limit, text, 0); 1357 insert(dest, text, 0, limit - start); 1358 uprv_free(text); 1359 } 1360 } 1361 1362 /** 1363 * Replaceable API 1364 * 1365 * NOTE: This is for the Replaceable class. There is no rep.cpp, 1366 * so we implement this function here. 1367 */ 1368 UBool Replaceable::hasMetaData() const { 1369 return TRUE; 1370 } 1371 1372 /** 1373 * Replaceable API 1374 */ 1375 UBool UnicodeString::hasMetaData() const { 1376 return FALSE; 1377 } 1378 1379 UnicodeString& 1380 UnicodeString::doReverse(int32_t start, int32_t length) { 1381 if(length <= 1 || !cloneArrayIfNeeded()) { 1382 return *this; 1383 } 1384 1385 // pin the indices to legal values 1386 pinIndices(start, length); 1387 if(length <= 1) { // pinIndices() might have shrunk the length 1388 return *this; 1389 } 1390 1391 UChar *left = getArrayStart() + start; 1392 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2) 1393 UChar swap; 1394 UBool hasSupplementary = FALSE; 1395 1396 // Before the loop we know left<right because length>=2. 1397 do { 1398 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left); 1399 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right); 1400 *right-- = swap; 1401 } while(left < right); 1402 // Make sure to test the middle code unit of an odd-length string. 1403 // Redundant if the length is even. 1404 hasSupplementary |= (UBool)U16_IS_LEAD(*left); 1405 1406 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */ 1407 if(hasSupplementary) { 1408 UChar swap2; 1409 1410 left = getArrayStart() + start; 1411 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right 1412 while(left < right) { 1413 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) { 1414 *left++ = swap2; 1415 *left++ = swap; 1416 } else { 1417 ++left; 1418 } 1419 } 1420 } 1421 1422 return *this; 1423 } 1424 1425 UBool 1426 UnicodeString::padLeading(int32_t targetLength, 1427 UChar padChar) 1428 { 1429 int32_t oldLength = length(); 1430 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { 1431 return FALSE; 1432 } else { 1433 // move contents up by padding width 1434 UChar *array = getArrayStart(); 1435 int32_t start = targetLength - oldLength; 1436 us_arrayCopy(array, 0, array, start, oldLength); 1437 1438 // fill in padding character 1439 while(--start >= 0) { 1440 array[start] = padChar; 1441 } 1442 setLength(targetLength); 1443 return TRUE; 1444 } 1445 } 1446 1447 UBool 1448 UnicodeString::padTrailing(int32_t targetLength, 1449 UChar padChar) 1450 { 1451 int32_t oldLength = length(); 1452 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { 1453 return FALSE; 1454 } else { 1455 // fill in padding character 1456 UChar *array = getArrayStart(); 1457 int32_t length = targetLength; 1458 while(--length >= oldLength) { 1459 array[length] = padChar; 1460 } 1461 setLength(targetLength); 1462 return TRUE; 1463 } 1464 } 1465 1466 //======================================== 1467 // Hashing 1468 //======================================== 1469 int32_t 1470 UnicodeString::doHashCode() const 1471 { 1472 /* Delegate hash computation to uhash. This makes UnicodeString 1473 * hashing consistent with UChar* hashing. */ 1474 int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length()); 1475 if (hashCode == kInvalidHashCode) { 1476 hashCode = kEmptyHashCode; 1477 } 1478 return hashCode; 1479 } 1480 1481 //======================================== 1482 // External Buffer 1483 //======================================== 1484 1485 UChar * 1486 UnicodeString::getBuffer(int32_t minCapacity) { 1487 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { 1488 fFlags|=kOpenGetBuffer; 1489 fShortLength=0; 1490 return getArrayStart(); 1491 } else { 1492 return 0; 1493 } 1494 } 1495 1496 void 1497 UnicodeString::releaseBuffer(int32_t newLength) { 1498 if(fFlags&kOpenGetBuffer && newLength>=-1) { 1499 // set the new fLength 1500 int32_t capacity=getCapacity(); 1501 if(newLength==-1) { 1502 // the new length is the string length, capped by fCapacity 1503 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity; 1504 while(p<limit && *p!=0) { 1505 ++p; 1506 } 1507 newLength=(int32_t)(p-array); 1508 } else if(newLength>capacity) { 1509 newLength=capacity; 1510 } 1511 setLength(newLength); 1512 fFlags&=~kOpenGetBuffer; 1513 } 1514 } 1515 1516 //======================================== 1517 // Miscellaneous 1518 //======================================== 1519 UBool 1520 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, 1521 int32_t growCapacity, 1522 UBool doCopyArray, 1523 int32_t **pBufferToDelete, 1524 UBool forceClone) { 1525 // default parameters need to be static, therefore 1526 // the defaults are -1 to have convenience defaults 1527 if(newCapacity == -1) { 1528 newCapacity = getCapacity(); 1529 } 1530 1531 // while a getBuffer(minCapacity) is "open", 1532 // prevent any modifications of the string by returning FALSE here 1533 // if the string is bogus, then only an assignment or similar can revive it 1534 if(!isWritable()) { 1535 return FALSE; 1536 } 1537 1538 /* 1539 * We need to make a copy of the array if 1540 * the buffer is read-only, or 1541 * the buffer is refCounted (shared), and refCount>1, or 1542 * the buffer is too small. 1543 * Return FALSE if memory could not be allocated. 1544 */ 1545 if(forceClone || 1546 fFlags & kBufferIsReadonly || 1547 (fFlags & kRefCounted && refCount() > 1) || 1548 newCapacity > getCapacity() 1549 ) { 1550 // check growCapacity for default value and use of the stack buffer 1551 if(growCapacity == -1) { 1552 growCapacity = newCapacity; 1553 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) { 1554 growCapacity = US_STACKBUF_SIZE; 1555 } 1556 1557 // save old values 1558 UChar oldStackBuffer[US_STACKBUF_SIZE]; 1559 UChar *oldArray; 1560 uint8_t flags = fFlags; 1561 1562 if(flags&kUsingStackBuffer) { 1563 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) { 1564 // copy the stack buffer contents because it will be overwritten with 1565 // fUnion.fFields values 1566 us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength); 1567 oldArray = oldStackBuffer; 1568 } else { 1569 oldArray = 0; // no need to copy from stack buffer to itself 1570 } 1571 } else { 1572 oldArray = fUnion.fFields.fArray; 1573 } 1574 1575 // allocate a new array 1576 if(allocate(growCapacity) || 1577 (newCapacity < growCapacity && allocate(newCapacity)) 1578 ) { 1579 if(doCopyArray && oldArray != 0) { 1580 // copy the contents 1581 // do not copy more than what fits - it may be smaller than before 1582 int32_t minLength = length(); 1583 newCapacity = getCapacity(); 1584 if(newCapacity < minLength) { 1585 minLength = newCapacity; 1586 setLength(minLength); 1587 } 1588 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength); 1589 } else { 1590 fShortLength = 0; 1591 } 1592 1593 // release the old array 1594 if(flags & kRefCounted) { 1595 // the array is refCounted; decrement and release if 0 1596 int32_t *pRefCount = ((int32_t *)oldArray - 1); 1597 if(umtx_atomic_dec(pRefCount) == 0) { 1598 if(pBufferToDelete == 0) { 1599 uprv_free(pRefCount); 1600 } else { 1601 // the caller requested to delete it himself 1602 *pBufferToDelete = pRefCount; 1603 } 1604 } 1605 } 1606 } else { 1607 // not enough memory for growCapacity and not even for the smaller newCapacity 1608 // reset the old values for setToBogus() to release the array 1609 if(!(flags&kUsingStackBuffer)) { 1610 fUnion.fFields.fArray = oldArray; 1611 } 1612 fFlags = flags; 1613 setToBogus(); 1614 return FALSE; 1615 } 1616 } 1617 return TRUE; 1618 } 1619 U_NAMESPACE_END 1620 1621 #ifdef U_STATIC_IMPLEMENTATION 1622 /* 1623 This should never be called. It is defined here to make sure that the 1624 virtual vector deleting destructor is defined within unistr.cpp. 1625 The vector deleting destructor is already a part of UObject, 1626 but defining it here makes sure that it is included with this object file. 1627 This makes sure that static library dependencies are kept to a minimum. 1628 */ 1629 static void uprv_UnicodeStringDummy(void) { 1630 U_NAMESPACE_USE 1631 delete [] (new UnicodeString[2]); 1632 } 1633 #endif 1634