1 /* 2 ****************************************************************************** 3 * Copyright (C) 1999-2010, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 * 7 * File unistr.cpp 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 09/25/98 stephen Creation. 13 * 04/20/99 stephen Overhauled per 4/16 code review. 14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX 15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from 16 * Replaceable. 17 * 06/25/01 grhoten Removed the dependency on iostream 18 ****************************************************************************** 19 */ 20 21 #include "unicode/utypes.h" 22 #include "unicode/putil.h" 23 #include "cstring.h" 24 #include "cmemory.h" 25 #include "unicode/ustring.h" 26 #include "unicode/unistr.h" 27 #include "uhash.h" 28 #include "ustr_imp.h" 29 #include "umutex.h" 30 31 #if 0 32 33 #if U_IOSTREAM_SOURCE >= 199711 34 #include <iostream> 35 using namespace std; 36 #elif U_IOSTREAM_SOURCE >= 198506 37 #include <iostream.h> 38 #endif 39 40 //DEBUGGING 41 void 42 print(const UnicodeString& s, 43 const char *name) 44 { 45 UChar c; 46 cout << name << ":|"; 47 for(int i = 0; i < s.length(); ++i) { 48 c = s[i]; 49 if(c>= 0x007E || c < 0x0020) 50 cout << "[0x" << hex << s[i] << "]"; 51 else 52 cout << (char) s[i]; 53 } 54 cout << '|' << endl; 55 } 56 57 void 58 print(const UChar *s, 59 int32_t len, 60 const char *name) 61 { 62 UChar c; 63 cout << name << ":|"; 64 for(int i = 0; i < len; ++i) { 65 c = s[i]; 66 if(c>= 0x007E || c < 0x0020) 67 cout << "[0x" << hex << s[i] << "]"; 68 else 69 cout << (char) s[i]; 70 } 71 cout << '|' << endl; 72 } 73 // END DEBUGGING 74 #endif 75 76 // Local function definitions for now 77 78 // need to copy areas that may overlap 79 static 80 inline void 81 us_arrayCopy(const UChar *src, int32_t srcStart, 82 UChar *dst, int32_t dstStart, int32_t count) 83 { 84 if(count>0) { 85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src))); 86 } 87 } 88 89 // u_unescapeAt() callback to get a UChar from a UnicodeString 90 U_CDECL_BEGIN 91 static UChar U_CALLCONV 92 UnicodeString_charAt(int32_t offset, void *context) { 93 return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset); 94 } 95 U_CDECL_END 96 97 U_NAMESPACE_BEGIN 98 99 /* The Replaceable virtual destructor can't be defined in the header 100 due to how AIX works with multiple definitions of virtual functions. 101 */ 102 Replaceable::~Replaceable() {} 103 Replaceable::Replaceable() {} 104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString) 105 106 UnicodeString U_EXPORT2 107 operator+ (const UnicodeString &s1, const UnicodeString &s2) { 108 return 109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0). 110 append(s1). 111 append(s2); 112 } 113 114 //======================================== 115 // Reference Counting functions, put at top of file so that optimizing compilers 116 // have a chance to automatically inline. 117 //======================================== 118 119 void 120 UnicodeString::addRef() 121 { umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);} 122 123 int32_t 124 UnicodeString::removeRef() 125 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);} 126 127 int32_t 128 UnicodeString::refCount() const 129 { 130 umtx_lock(NULL); 131 // Note: without the lock to force a memory barrier, we might see a very 132 // stale value on some multi-processor systems. 133 int32_t count = *((int32_t *)fUnion.fFields.fArray - 1); 134 umtx_unlock(NULL); 135 return count; 136 } 137 138 void 139 UnicodeString::releaseArray() { 140 if((fFlags & kRefCounted) && removeRef() == 0) { 141 uprv_free((int32_t *)fUnion.fFields.fArray - 1); 142 } 143 } 144 145 146 147 //======================================== 148 // Constructors 149 //======================================== 150 UnicodeString::UnicodeString() 151 : fShortLength(0), 152 fFlags(kShortString) 153 {} 154 155 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) 156 : fShortLength(0), 157 fFlags(0) 158 { 159 if(count <= 0 || (uint32_t)c > 0x10ffff) { 160 // just allocate and do not do anything else 161 allocate(capacity); 162 } else { 163 // count > 0, allocate and fill the new string with count c's 164 int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount; 165 if(capacity < length) { 166 capacity = length; 167 } 168 if(allocate(capacity)) { 169 UChar *array = getArrayStart(); 170 int32_t i = 0; 171 172 // fill the new string with c 173 if(unitCount == 1) { 174 // fill with length UChars 175 while(i < length) { 176 array[i++] = (UChar)c; 177 } 178 } else { 179 // get the code units for c 180 UChar units[UTF_MAX_CHAR_LENGTH]; 181 UTF_APPEND_CHAR_UNSAFE(units, i, c); 182 183 // now it must be i==unitCount 184 i = 0; 185 186 // for Unicode, unitCount can only be 1, 2, 3, or 4 187 // 1 is handled above 188 while(i < length) { 189 int32_t unitIdx = 0; 190 while(unitIdx < unitCount) { 191 array[i++]=units[unitIdx++]; 192 } 193 } 194 } 195 } 196 setLength(length); 197 } 198 } 199 200 UnicodeString::UnicodeString(UChar ch) 201 : fShortLength(1), 202 fFlags(kShortString) 203 { 204 fUnion.fStackBuffer[0] = ch; 205 } 206 207 UnicodeString::UnicodeString(UChar32 ch) 208 : fShortLength(0), 209 fFlags(kShortString) 210 { 211 int32_t i = 0; 212 UBool isError = FALSE; 213 U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError); 214 fShortLength = (int8_t)i; 215 } 216 217 UnicodeString::UnicodeString(const UChar *text) 218 : fShortLength(0), 219 fFlags(kShortString) 220 { 221 doReplace(0, 0, text, 0, -1); 222 } 223 224 UnicodeString::UnicodeString(const UChar *text, 225 int32_t textLength) 226 : fShortLength(0), 227 fFlags(kShortString) 228 { 229 doReplace(0, 0, text, 0, textLength); 230 } 231 232 UnicodeString::UnicodeString(UBool isTerminated, 233 const UChar *text, 234 int32_t textLength) 235 : fShortLength(0), 236 fFlags(kReadonlyAlias) 237 { 238 if(text == NULL) { 239 // treat as an empty string, do not alias 240 setToEmpty(); 241 } else if(textLength < -1 || 242 (textLength == -1 && !isTerminated) || 243 (textLength >= 0 && isTerminated && text[textLength] != 0) 244 ) { 245 setToBogus(); 246 } else { 247 if(textLength == -1) { 248 // text is terminated, or else it would have failed the above test 249 textLength = u_strlen(text); 250 } 251 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); 252 } 253 } 254 255 UnicodeString::UnicodeString(UChar *buff, 256 int32_t buffLength, 257 int32_t buffCapacity) 258 : fShortLength(0), 259 fFlags(kWritableAlias) 260 { 261 if(buff == NULL) { 262 // treat as an empty string, do not alias 263 setToEmpty(); 264 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { 265 setToBogus(); 266 } else { 267 if(buffLength == -1) { 268 // fLength = u_strlen(buff); but do not look beyond buffCapacity 269 const UChar *p = buff, *limit = buff + buffCapacity; 270 while(p != limit && *p != 0) { 271 ++p; 272 } 273 buffLength = (int32_t)(p - buff); 274 } 275 setArray(buff, buffLength, buffCapacity); 276 } 277 } 278 279 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) 280 : fShortLength(0), 281 fFlags(kShortString) 282 { 283 if(src==NULL) { 284 // treat as an empty string 285 } else { 286 if(length<0) { 287 length=(int32_t)uprv_strlen(src); 288 } 289 if(cloneArrayIfNeeded(length, length, FALSE)) { 290 u_charsToUChars(src, getArrayStart(), length); 291 setLength(length); 292 } else { 293 setToBogus(); 294 } 295 } 296 } 297 298 #if U_CHARSET_IS_UTF8 299 300 UnicodeString::UnicodeString(const char *codepageData) 301 : fShortLength(0), 302 fFlags(kShortString) { 303 if(codepageData != 0) { 304 setToUTF8(codepageData); 305 } 306 } 307 308 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) 309 : fShortLength(0), 310 fFlags(kShortString) { 311 // if there's nothing to convert, do nothing 312 if(codepageData == 0 || dataLength == 0 || dataLength < -1) { 313 return; 314 } 315 if(dataLength == -1) { 316 dataLength = (int32_t)uprv_strlen(codepageData); 317 } 318 setToUTF8(StringPiece(codepageData, dataLength)); 319 } 320 321 // else see unistr_cnv.cpp 322 #endif 323 324 UnicodeString::UnicodeString(const UnicodeString& that) 325 : Replaceable(), 326 fShortLength(0), 327 fFlags(kShortString) 328 { 329 copyFrom(that); 330 } 331 332 UnicodeString::UnicodeString(const UnicodeString& that, 333 int32_t srcStart) 334 : Replaceable(), 335 fShortLength(0), 336 fFlags(kShortString) 337 { 338 setTo(that, srcStart); 339 } 340 341 UnicodeString::UnicodeString(const UnicodeString& that, 342 int32_t srcStart, 343 int32_t srcLength) 344 : Replaceable(), 345 fShortLength(0), 346 fFlags(kShortString) 347 { 348 setTo(that, srcStart, srcLength); 349 } 350 351 // Replaceable base class clone() default implementation, does not clone 352 Replaceable * 353 Replaceable::clone() const { 354 return NULL; 355 } 356 357 // UnicodeString overrides clone() with a real implementation 358 Replaceable * 359 UnicodeString::clone() const { 360 return new UnicodeString(*this); 361 } 362 363 //======================================== 364 // array allocation 365 //======================================== 366 367 UBool 368 UnicodeString::allocate(int32_t capacity) { 369 if(capacity <= US_STACKBUF_SIZE) { 370 fFlags = kShortString; 371 } else { 372 // count bytes for the refCounter and the string capacity, and 373 // round up to a multiple of 16; then divide by 4 and allocate int32_t's 374 // to be safely aligned for the refCount 375 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer() 376 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2); 377 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words ); 378 if(array != 0) { 379 // set initial refCount and point behind the refCount 380 *array++ = 1; 381 382 // have fArray point to the first UChar 383 fUnion.fFields.fArray = (UChar *)array; 384 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR)); 385 fFlags = kLongString; 386 } else { 387 fShortLength = 0; 388 fUnion.fFields.fArray = 0; 389 fUnion.fFields.fCapacity = 0; 390 fFlags = kIsBogus; 391 return FALSE; 392 } 393 } 394 return TRUE; 395 } 396 397 //======================================== 398 // Destructor 399 //======================================== 400 UnicodeString::~UnicodeString() 401 { 402 releaseArray(); 403 } 404 405 //======================================== 406 // Factory methods 407 //======================================== 408 409 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) { 410 UnicodeString result; 411 result.setToUTF8(utf8); 412 return result; 413 } 414 415 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) { 416 UnicodeString result; 417 int32_t capacity; 418 // Most UTF-32 strings will be BMP-only and result in a same-length 419 // UTF-16 string. We overestimate the capacity just slightly, 420 // just in case there are a few supplementary characters. 421 if(length <= US_STACKBUF_SIZE) { 422 capacity = US_STACKBUF_SIZE; 423 } else { 424 capacity = length + (length >> 4) + 4; 425 } 426 do { 427 UChar *utf16 = result.getBuffer(capacity); 428 int32_t length16; 429 UErrorCode errorCode = U_ZERO_ERROR; 430 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16, 431 utf32, length, 432 0xfffd, // Substitution character. 433 NULL, // Don't care about number of substitutions. 434 &errorCode); 435 result.releaseBuffer(length16); 436 if(errorCode == U_BUFFER_OVERFLOW_ERROR) { 437 capacity = length16 + 1; // +1 for the terminating NUL. 438 continue; 439 } else if(U_FAILURE(errorCode)) { 440 result.setToBogus(); 441 } 442 break; 443 } while(TRUE); 444 return result; 445 } 446 447 //======================================== 448 // Assignment 449 //======================================== 450 451 UnicodeString & 452 UnicodeString::operator=(const UnicodeString &src) { 453 return copyFrom(src); 454 } 455 456 UnicodeString & 457 UnicodeString::fastCopyFrom(const UnicodeString &src) { 458 return copyFrom(src, TRUE); 459 } 460 461 UnicodeString & 462 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { 463 // if assigning to ourselves, do nothing 464 if(this == 0 || this == &src) { 465 return *this; 466 } 467 468 // is the right side bogus? 469 if(&src == 0 || src.isBogus()) { 470 setToBogus(); 471 return *this; 472 } 473 474 // delete the current contents 475 releaseArray(); 476 477 if(src.isEmpty()) { 478 // empty string - use the stack buffer 479 setToEmpty(); 480 return *this; 481 } 482 483 // we always copy the length 484 int32_t srcLength = src.length(); 485 setLength(srcLength); 486 487 // fLength>0 and not an "open" src.getBuffer(minCapacity) 488 switch(src.fFlags) { 489 case kShortString: 490 // short string using the stack buffer, do the same 491 fFlags = kShortString; 492 uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR); 493 break; 494 case kLongString: 495 // src uses a refCounted string buffer, use that buffer with refCount 496 // src is const, use a cast - we don't really change it 497 ((UnicodeString &)src).addRef(); 498 // copy all fields, share the reference-counted buffer 499 fUnion.fFields.fArray = src.fUnion.fFields.fArray; 500 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; 501 fFlags = src.fFlags; 502 break; 503 case kReadonlyAlias: 504 if(fastCopy) { 505 // src is a readonly alias, do the same 506 // -> maintain the readonly alias as such 507 fUnion.fFields.fArray = src.fUnion.fFields.fArray; 508 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; 509 fFlags = src.fFlags; 510 break; 511 } 512 // else if(!fastCopy) fall through to case kWritableAlias 513 // -> allocate a new buffer and copy the contents 514 case kWritableAlias: 515 // src is a writable alias; we make a copy of that instead 516 if(allocate(srcLength)) { 517 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR); 518 break; 519 } 520 // if there is not enough memory, then fall through to setting to bogus 521 default: 522 // if src is bogus, set ourselves to bogus 523 // do not call setToBogus() here because fArray and fFlags are not consistent here 524 fShortLength = 0; 525 fUnion.fFields.fArray = 0; 526 fUnion.fFields.fCapacity = 0; 527 fFlags = kIsBogus; 528 break; 529 } 530 531 return *this; 532 } 533 534 //======================================== 535 // Miscellaneous operations 536 //======================================== 537 538 UnicodeString UnicodeString::unescape() const { 539 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity 540 const UChar *array = getBuffer(); 541 int32_t len = length(); 542 int32_t prev = 0; 543 for (int32_t i=0;;) { 544 if (i == len) { 545 result.append(array, prev, len - prev); 546 break; 547 } 548 if (array[i++] == 0x5C /*'\\'*/) { 549 result.append(array, prev, (i - 1) - prev); 550 UChar32 c = unescapeAt(i); // advances i 551 if (c < 0) { 552 result.remove(); // return empty string 553 break; // invalid escape sequence 554 } 555 result.append(c); 556 prev = i; 557 } 558 } 559 return result; 560 } 561 562 UChar32 UnicodeString::unescapeAt(int32_t &offset) const { 563 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this); 564 } 565 566 //======================================== 567 // Read-only implementation 568 //======================================== 569 int8_t 570 UnicodeString::doCompare( int32_t start, 571 int32_t length, 572 const UChar *srcChars, 573 int32_t srcStart, 574 int32_t srcLength) const 575 { 576 // compare illegal string values 577 // treat const UChar *srcChars==NULL as an empty string 578 if(isBogus()) { 579 return -1; 580 } 581 582 // pin indices to legal values 583 pinIndices(start, length); 584 585 if(srcChars == NULL) { 586 srcStart = srcLength = 0; 587 } 588 589 // get the correct pointer 590 const UChar *chars = getArrayStart(); 591 592 chars += start; 593 srcChars += srcStart; 594 595 int32_t minLength; 596 int8_t lengthResult; 597 598 // get the srcLength if necessary 599 if(srcLength < 0) { 600 srcLength = u_strlen(srcChars + srcStart); 601 } 602 603 // are we comparing different lengths? 604 if(length != srcLength) { 605 if(length < srcLength) { 606 minLength = length; 607 lengthResult = -1; 608 } else { 609 minLength = srcLength; 610 lengthResult = 1; 611 } 612 } else { 613 minLength = length; 614 lengthResult = 0; 615 } 616 617 /* 618 * note that uprv_memcmp() returns an int but we return an int8_t; 619 * we need to take care not to truncate the result - 620 * one way to do this is to right-shift the value to 621 * move the sign bit into the lower 8 bits and making sure that this 622 * does not become 0 itself 623 */ 624 625 if(minLength > 0 && chars != srcChars) { 626 int32_t result; 627 628 # if U_IS_BIG_ENDIAN 629 // big-endian: byte comparison works 630 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar)); 631 if(result != 0) { 632 return (int8_t)(result >> 15 | 1); 633 } 634 # else 635 // little-endian: compare UChar units 636 do { 637 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++)); 638 if(result != 0) { 639 return (int8_t)(result >> 15 | 1); 640 } 641 } while(--minLength > 0); 642 # endif 643 } 644 return lengthResult; 645 } 646 647 /* String compare in code point order - doCompare() compares in code unit order. */ 648 int8_t 649 UnicodeString::doCompareCodePointOrder(int32_t start, 650 int32_t length, 651 const UChar *srcChars, 652 int32_t srcStart, 653 int32_t srcLength) const 654 { 655 // compare illegal string values 656 // treat const UChar *srcChars==NULL as an empty string 657 if(isBogus()) { 658 return -1; 659 } 660 661 // pin indices to legal values 662 pinIndices(start, length); 663 664 if(srcChars == NULL) { 665 srcStart = srcLength = 0; 666 } 667 668 int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + srcStart, srcLength, FALSE, TRUE); 669 /* translate the 32-bit result into an 8-bit one */ 670 if(diff!=0) { 671 return (int8_t)(diff >> 15 | 1); 672 } else { 673 return 0; 674 } 675 } 676 677 int32_t 678 UnicodeString::getLength() const { 679 return length(); 680 } 681 682 UChar 683 UnicodeString::getCharAt(int32_t offset) const { 684 return charAt(offset); 685 } 686 687 UChar32 688 UnicodeString::getChar32At(int32_t offset) const { 689 return char32At(offset); 690 } 691 692 int32_t 693 UnicodeString::countChar32(int32_t start, int32_t length) const { 694 pinIndices(start, length); 695 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL 696 return u_countChar32(getArrayStart()+start, length); 697 } 698 699 UBool 700 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const { 701 pinIndices(start, length); 702 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL 703 return u_strHasMoreChar32Than(getArrayStart()+start, length, number); 704 } 705 706 int32_t 707 UnicodeString::moveIndex32(int32_t index, int32_t delta) const { 708 // pin index 709 int32_t len = length(); 710 if(index<0) { 711 index=0; 712 } else if(index>len) { 713 index=len; 714 } 715 716 const UChar *array = getArrayStart(); 717 if(delta>0) { 718 UTF_FWD_N(array, index, len, delta); 719 } else { 720 UTF_BACK_N(array, 0, index, -delta); 721 } 722 723 return index; 724 } 725 726 void 727 UnicodeString::doExtract(int32_t start, 728 int32_t length, 729 UChar *dst, 730 int32_t dstStart) const 731 { 732 // pin indices to legal values 733 pinIndices(start, length); 734 735 // do not copy anything if we alias dst itself 736 const UChar *array = getArrayStart(); 737 if(array + start != dst + dstStart) { 738 us_arrayCopy(array, start, dst, dstStart, length); 739 } 740 } 741 742 int32_t 743 UnicodeString::extract(UChar *dest, int32_t destCapacity, 744 UErrorCode &errorCode) const { 745 int32_t len = length(); 746 if(U_SUCCESS(errorCode)) { 747 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { 748 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 749 } else { 750 const UChar *array = getArrayStart(); 751 if(len>0 && len<=destCapacity && array!=dest) { 752 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR); 753 } 754 return u_terminateUChars(dest, destCapacity, len, &errorCode); 755 } 756 } 757 758 return len; 759 } 760 761 int32_t 762 UnicodeString::extract(int32_t start, 763 int32_t length, 764 char *target, 765 int32_t targetCapacity, 766 enum EInvariant) const 767 { 768 // if the arguments are illegal, then do nothing 769 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) { 770 return 0; 771 } 772 773 // pin the indices to legal values 774 pinIndices(start, length); 775 776 if(length <= targetCapacity) { 777 u_UCharsToChars(getArrayStart() + start, target, length); 778 } 779 UErrorCode status = U_ZERO_ERROR; 780 return u_terminateChars(target, targetCapacity, length, &status); 781 } 782 783 UnicodeString 784 UnicodeString::tempSubString(int32_t start, int32_t len) const { 785 pinIndices(start, len); 786 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer 787 if(array==NULL) { 788 array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string 789 len=-2; // bogus result string 790 } 791 return UnicodeString(FALSE, array + start, len); 792 } 793 794 int32_t 795 UnicodeString::toUTF8(int32_t start, int32_t len, 796 char *target, int32_t capacity) const { 797 pinIndices(start, len); 798 int32_t length8; 799 UErrorCode errorCode = U_ZERO_ERROR; 800 u_strToUTF8WithSub(target, capacity, &length8, 801 getBuffer() + start, len, 802 0xFFFD, // Standard substitution character. 803 NULL, // Don't care about number of substitutions. 804 &errorCode); 805 return length8; 806 } 807 808 #if U_CHARSET_IS_UTF8 809 810 int32_t 811 UnicodeString::extract(int32_t start, int32_t len, 812 char *target, uint32_t dstSize) const { 813 // if the arguments are illegal, then do nothing 814 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) { 815 return 0; 816 } 817 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff); 818 } 819 820 // else see unistr_cnv.cpp 821 #endif 822 823 void 824 UnicodeString::extractBetween(int32_t start, 825 int32_t limit, 826 UnicodeString& target) const { 827 pinIndex(start); 828 pinIndex(limit); 829 doExtract(start, limit - start, target); 830 } 831 832 // When converting from UTF-16 to UTF-8, the result will have at most 3 times 833 // as many bytes as the source has UChars. 834 // The "worst cases" are writing systems like Indic, Thai and CJK with 835 // 3:1 bytes:UChars. 836 void 837 UnicodeString::toUTF8(ByteSink &sink) const { 838 int32_t length16 = length(); 839 if(length16 != 0) { 840 char stackBuffer[1024]; 841 int32_t capacity = (int32_t)sizeof(stackBuffer); 842 UBool utf8IsOwned = FALSE; 843 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity, 844 3*length16, 845 stackBuffer, capacity, 846 &capacity); 847 int32_t length8 = 0; 848 UErrorCode errorCode = U_ZERO_ERROR; 849 u_strToUTF8WithSub(utf8, capacity, &length8, 850 getBuffer(), length16, 851 0xFFFD, // Standard substitution character. 852 NULL, // Don't care about number of substitutions. 853 &errorCode); 854 if(errorCode == U_BUFFER_OVERFLOW_ERROR) { 855 utf8 = (char *)uprv_malloc(length8); 856 if(utf8 != NULL) { 857 utf8IsOwned = TRUE; 858 errorCode = U_ZERO_ERROR; 859 u_strToUTF8WithSub(utf8, length8, &length8, 860 getBuffer(), length16, 861 0xFFFD, // Standard substitution character. 862 NULL, // Don't care about number of substitutions. 863 &errorCode); 864 } else { 865 errorCode = U_MEMORY_ALLOCATION_ERROR; 866 } 867 } 868 if(U_SUCCESS(errorCode)) { 869 sink.Append(utf8, length8); 870 } 871 if(utf8IsOwned) { 872 uprv_free(utf8); 873 } 874 } 875 } 876 877 int32_t 878 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const { 879 int32_t length32=0; 880 if(U_SUCCESS(errorCode)) { 881 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments. 882 u_strToUTF32WithSub(utf32, capacity, &length32, 883 getBuffer(), length(), 884 0xfffd, // Substitution character. 885 NULL, // Don't care about number of substitutions. 886 &errorCode); 887 } 888 return length32; 889 } 890 891 int32_t 892 UnicodeString::indexOf(const UChar *srcChars, 893 int32_t srcStart, 894 int32_t srcLength, 895 int32_t start, 896 int32_t length) const 897 { 898 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { 899 return -1; 900 } 901 902 // UnicodeString does not find empty substrings 903 if(srcLength < 0 && srcChars[srcStart] == 0) { 904 return -1; 905 } 906 907 // get the indices within bounds 908 pinIndices(start, length); 909 910 // find the first occurrence of the substring 911 const UChar *array = getArrayStart(); 912 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength); 913 if(match == NULL) { 914 return -1; 915 } else { 916 return (int32_t)(match - array); 917 } 918 } 919 920 int32_t 921 UnicodeString::doIndexOf(UChar c, 922 int32_t start, 923 int32_t length) const 924 { 925 // pin indices 926 pinIndices(start, length); 927 928 // find the first occurrence of c 929 const UChar *array = getArrayStart(); 930 const UChar *match = u_memchr(array + start, c, length); 931 if(match == NULL) { 932 return -1; 933 } else { 934 return (int32_t)(match - array); 935 } 936 } 937 938 int32_t 939 UnicodeString::doIndexOf(UChar32 c, 940 int32_t start, 941 int32_t length) const { 942 // pin indices 943 pinIndices(start, length); 944 945 // find the first occurrence of c 946 const UChar *array = getArrayStart(); 947 const UChar *match = u_memchr32(array + start, c, length); 948 if(match == NULL) { 949 return -1; 950 } else { 951 return (int32_t)(match - array); 952 } 953 } 954 955 int32_t 956 UnicodeString::lastIndexOf(const UChar *srcChars, 957 int32_t srcStart, 958 int32_t srcLength, 959 int32_t start, 960 int32_t length) const 961 { 962 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { 963 return -1; 964 } 965 966 // UnicodeString does not find empty substrings 967 if(srcLength < 0 && srcChars[srcStart] == 0) { 968 return -1; 969 } 970 971 // get the indices within bounds 972 pinIndices(start, length); 973 974 // find the last occurrence of the substring 975 const UChar *array = getArrayStart(); 976 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength); 977 if(match == NULL) { 978 return -1; 979 } else { 980 return (int32_t)(match - array); 981 } 982 } 983 984 int32_t 985 UnicodeString::doLastIndexOf(UChar c, 986 int32_t start, 987 int32_t length) const 988 { 989 if(isBogus()) { 990 return -1; 991 } 992 993 // pin indices 994 pinIndices(start, length); 995 996 // find the last occurrence of c 997 const UChar *array = getArrayStart(); 998 const UChar *match = u_memrchr(array + start, c, length); 999 if(match == NULL) { 1000 return -1; 1001 } else { 1002 return (int32_t)(match - array); 1003 } 1004 } 1005 1006 int32_t 1007 UnicodeString::doLastIndexOf(UChar32 c, 1008 int32_t start, 1009 int32_t length) const { 1010 // pin indices 1011 pinIndices(start, length); 1012 1013 // find the last occurrence of c 1014 const UChar *array = getArrayStart(); 1015 const UChar *match = u_memrchr32(array + start, c, length); 1016 if(match == NULL) { 1017 return -1; 1018 } else { 1019 return (int32_t)(match - array); 1020 } 1021 } 1022 1023 //======================================== 1024 // Write implementation 1025 //======================================== 1026 1027 UnicodeString& 1028 UnicodeString::findAndReplace(int32_t start, 1029 int32_t length, 1030 const UnicodeString& oldText, 1031 int32_t oldStart, 1032 int32_t oldLength, 1033 const UnicodeString& newText, 1034 int32_t newStart, 1035 int32_t newLength) 1036 { 1037 if(isBogus() || oldText.isBogus() || newText.isBogus()) { 1038 return *this; 1039 } 1040 1041 pinIndices(start, length); 1042 oldText.pinIndices(oldStart, oldLength); 1043 newText.pinIndices(newStart, newLength); 1044 1045 if(oldLength == 0) { 1046 return *this; 1047 } 1048 1049 while(length > 0 && length >= oldLength) { 1050 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length); 1051 if(pos < 0) { 1052 // no more oldText's here: done 1053 break; 1054 } else { 1055 // we found oldText, replace it by newText and go beyond it 1056 replace(pos, oldLength, newText, newStart, newLength); 1057 length -= pos + oldLength - start; 1058 start = pos + newLength; 1059 } 1060 } 1061 1062 return *this; 1063 } 1064 1065 1066 void 1067 UnicodeString::setToBogus() 1068 { 1069 releaseArray(); 1070 1071 fShortLength = 0; 1072 fUnion.fFields.fArray = 0; 1073 fUnion.fFields.fCapacity = 0; 1074 fFlags = kIsBogus; 1075 } 1076 1077 // turn a bogus string into an empty one 1078 void 1079 UnicodeString::unBogus() { 1080 if(fFlags & kIsBogus) { 1081 setToEmpty(); 1082 } 1083 } 1084 1085 // setTo() analogous to the readonly-aliasing constructor with the same signature 1086 UnicodeString & 1087 UnicodeString::setTo(UBool isTerminated, 1088 const UChar *text, 1089 int32_t textLength) 1090 { 1091 if(fFlags & kOpenGetBuffer) { 1092 // do not modify a string that has an "open" getBuffer(minCapacity) 1093 return *this; 1094 } 1095 1096 if(text == NULL) { 1097 // treat as an empty string, do not alias 1098 releaseArray(); 1099 setToEmpty(); 1100 return *this; 1101 } 1102 1103 if( textLength < -1 || 1104 (textLength == -1 && !isTerminated) || 1105 (textLength >= 0 && isTerminated && text[textLength] != 0) 1106 ) { 1107 setToBogus(); 1108 return *this; 1109 } 1110 1111 releaseArray(); 1112 1113 if(textLength == -1) { 1114 // text is terminated, or else it would have failed the above test 1115 textLength = u_strlen(text); 1116 } 1117 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); 1118 1119 fFlags = kReadonlyAlias; 1120 return *this; 1121 } 1122 1123 // setTo() analogous to the writable-aliasing constructor with the same signature 1124 UnicodeString & 1125 UnicodeString::setTo(UChar *buffer, 1126 int32_t buffLength, 1127 int32_t buffCapacity) { 1128 if(fFlags & kOpenGetBuffer) { 1129 // do not modify a string that has an "open" getBuffer(minCapacity) 1130 return *this; 1131 } 1132 1133 if(buffer == NULL) { 1134 // treat as an empty string, do not alias 1135 releaseArray(); 1136 setToEmpty(); 1137 return *this; 1138 } 1139 1140 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { 1141 setToBogus(); 1142 return *this; 1143 } else if(buffLength == -1) { 1144 // buffLength = u_strlen(buff); but do not look beyond buffCapacity 1145 const UChar *p = buffer, *limit = buffer + buffCapacity; 1146 while(p != limit && *p != 0) { 1147 ++p; 1148 } 1149 buffLength = (int32_t)(p - buffer); 1150 } 1151 1152 releaseArray(); 1153 1154 setArray(buffer, buffLength, buffCapacity); 1155 fFlags = kWritableAlias; 1156 return *this; 1157 } 1158 1159 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) { 1160 unBogus(); 1161 int32_t length = utf8.length(); 1162 int32_t capacity; 1163 // The UTF-16 string will be at most as long as the UTF-8 string. 1164 if(length <= US_STACKBUF_SIZE) { 1165 capacity = US_STACKBUF_SIZE; 1166 } else { 1167 capacity = length + 1; // +1 for the terminating NUL. 1168 } 1169 UChar *utf16 = getBuffer(capacity); 1170 int32_t length16; 1171 UErrorCode errorCode = U_ZERO_ERROR; 1172 u_strFromUTF8WithSub(utf16, getCapacity(), &length16, 1173 utf8.data(), length, 1174 0xfffd, // Substitution character. 1175 NULL, // Don't care about number of substitutions. 1176 &errorCode); 1177 releaseBuffer(length16); 1178 if(U_FAILURE(errorCode)) { 1179 setToBogus(); 1180 } 1181 return *this; 1182 } 1183 1184 UnicodeString& 1185 UnicodeString::setCharAt(int32_t offset, 1186 UChar c) 1187 { 1188 int32_t len = length(); 1189 if(cloneArrayIfNeeded() && len > 0) { 1190 if(offset < 0) { 1191 offset = 0; 1192 } else if(offset >= len) { 1193 offset = len - 1; 1194 } 1195 1196 getArrayStart()[offset] = c; 1197 } 1198 return *this; 1199 } 1200 1201 UnicodeString& 1202 UnicodeString::doReplace( int32_t start, 1203 int32_t length, 1204 const UnicodeString& src, 1205 int32_t srcStart, 1206 int32_t srcLength) 1207 { 1208 if(!src.isBogus()) { 1209 // pin the indices to legal values 1210 src.pinIndices(srcStart, srcLength); 1211 1212 // get the characters from src 1213 // and replace the range in ourselves with them 1214 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength); 1215 } else { 1216 // remove the range 1217 return doReplace(start, length, 0, 0, 0); 1218 } 1219 } 1220 1221 UnicodeString& 1222 UnicodeString::doReplace(int32_t start, 1223 int32_t length, 1224 const UChar *srcChars, 1225 int32_t srcStart, 1226 int32_t srcLength) 1227 { 1228 if(!isWritable()) { 1229 return *this; 1230 } 1231 1232 int32_t oldLength = this->length(); 1233 1234 // optimize (read-only alias).remove(0, start) and .remove(start, end) 1235 if((fFlags&kBufferIsReadonly) && srcLength == 0) { 1236 if(start == 0) { 1237 // remove prefix by adjusting the array pointer 1238 pinIndex(length); 1239 fUnion.fFields.fArray += length; 1240 fUnion.fFields.fCapacity -= length; 1241 setLength(oldLength - length); 1242 return *this; 1243 } else { 1244 pinIndex(start); 1245 if(length >= (oldLength - start)) { 1246 // remove suffix by reducing the length (like truncate()) 1247 setLength(start); 1248 fUnion.fFields.fCapacity = start; // not NUL-terminated any more 1249 return *this; 1250 } 1251 } 1252 } 1253 1254 if(srcChars == 0) { 1255 srcStart = srcLength = 0; 1256 } else if(srcLength < 0) { 1257 // get the srcLength if necessary 1258 srcLength = u_strlen(srcChars + srcStart); 1259 } 1260 1261 // calculate the size of the string after the replace 1262 int32_t newSize; 1263 1264 // optimize append() onto a large-enough, owned string 1265 if(start >= oldLength) { 1266 newSize = oldLength + srcLength; 1267 if(newSize <= getCapacity() && isBufferWritable()) { 1268 us_arrayCopy(srcChars, srcStart, getArrayStart(), oldLength, srcLength); 1269 setLength(newSize); 1270 return *this; 1271 } else { 1272 // pin the indices to legal values 1273 start = oldLength; 1274 length = 0; 1275 } 1276 } else { 1277 // pin the indices to legal values 1278 pinIndices(start, length); 1279 1280 newSize = oldLength - length + srcLength; 1281 } 1282 1283 // the following may change fArray but will not copy the current contents; 1284 // therefore we need to keep the current fArray 1285 UChar oldStackBuffer[US_STACKBUF_SIZE]; 1286 UChar *oldArray; 1287 if((fFlags&kUsingStackBuffer) && (newSize > US_STACKBUF_SIZE)) { 1288 // copy the stack buffer contents because it will be overwritten with 1289 // fUnion.fFields values 1290 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength); 1291 oldArray = oldStackBuffer; 1292 } else { 1293 oldArray = getArrayStart(); 1294 } 1295 1296 // clone our array and allocate a bigger array if needed 1297 int32_t *bufferToDelete = 0; 1298 if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize, 1299 FALSE, &bufferToDelete) 1300 ) { 1301 return *this; 1302 } 1303 1304 // now do the replace 1305 1306 UChar *newArray = getArrayStart(); 1307 if(newArray != oldArray) { 1308 // if fArray changed, then we need to copy everything except what will change 1309 us_arrayCopy(oldArray, 0, newArray, 0, start); 1310 us_arrayCopy(oldArray, start + length, 1311 newArray, start + srcLength, 1312 oldLength - (start + length)); 1313 } else if(length != srcLength) { 1314 // fArray did not change; copy only the portion that isn't changing, leaving a hole 1315 us_arrayCopy(oldArray, start + length, 1316 newArray, start + srcLength, 1317 oldLength - (start + length)); 1318 } 1319 1320 // now fill in the hole with the new string 1321 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength); 1322 1323 setLength(newSize); 1324 1325 // delayed delete in case srcChars == fArray when we started, and 1326 // to keep oldArray alive for the above operations 1327 if (bufferToDelete) { 1328 uprv_free(bufferToDelete); 1329 } 1330 1331 return *this; 1332 } 1333 1334 /** 1335 * Replaceable API 1336 */ 1337 void 1338 UnicodeString::handleReplaceBetween(int32_t start, 1339 int32_t limit, 1340 const UnicodeString& text) { 1341 replaceBetween(start, limit, text); 1342 } 1343 1344 /** 1345 * Replaceable API 1346 */ 1347 void 1348 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) { 1349 if (limit <= start) { 1350 return; // Nothing to do; avoid bogus malloc call 1351 } 1352 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) ); 1353 // Check to make sure text is not null. 1354 if (text != NULL) { 1355 extractBetween(start, limit, text, 0); 1356 insert(dest, text, 0, limit - start); 1357 uprv_free(text); 1358 } 1359 } 1360 1361 /** 1362 * Replaceable API 1363 * 1364 * NOTE: This is for the Replaceable class. There is no rep.cpp, 1365 * so we implement this function here. 1366 */ 1367 UBool Replaceable::hasMetaData() const { 1368 return TRUE; 1369 } 1370 1371 /** 1372 * Replaceable API 1373 */ 1374 UBool UnicodeString::hasMetaData() const { 1375 return FALSE; 1376 } 1377 1378 UnicodeString& 1379 UnicodeString::doReverse(int32_t start, 1380 int32_t length) 1381 { 1382 if(this->length() <= 1 || !cloneArrayIfNeeded()) { 1383 return *this; 1384 } 1385 1386 // pin the indices to legal values 1387 pinIndices(start, length); 1388 1389 UChar *left = getArrayStart() + start; 1390 UChar *right = left + length; 1391 UChar swap; 1392 UBool hasSupplementary = FALSE; 1393 1394 while(left < --right) { 1395 hasSupplementary |= (UBool)UTF_IS_LEAD(swap = *left); 1396 hasSupplementary |= (UBool)UTF_IS_LEAD(*left++ = *right); 1397 *right = swap; 1398 } 1399 1400 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */ 1401 if(hasSupplementary) { 1402 UChar swap2; 1403 1404 left = getArrayStart() + start; 1405 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right 1406 while(left < right) { 1407 if(UTF_IS_TRAIL(swap = *left) && UTF_IS_LEAD(swap2 = *(left + 1))) { 1408 *left++ = swap2; 1409 *left++ = swap; 1410 } else { 1411 ++left; 1412 } 1413 } 1414 } 1415 1416 return *this; 1417 } 1418 1419 UBool 1420 UnicodeString::padLeading(int32_t targetLength, 1421 UChar padChar) 1422 { 1423 int32_t oldLength = length(); 1424 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { 1425 return FALSE; 1426 } else { 1427 // move contents up by padding width 1428 UChar *array = getArrayStart(); 1429 int32_t start = targetLength - oldLength; 1430 us_arrayCopy(array, 0, array, start, oldLength); 1431 1432 // fill in padding character 1433 while(--start >= 0) { 1434 array[start] = padChar; 1435 } 1436 setLength(targetLength); 1437 return TRUE; 1438 } 1439 } 1440 1441 UBool 1442 UnicodeString::padTrailing(int32_t targetLength, 1443 UChar padChar) 1444 { 1445 int32_t oldLength = length(); 1446 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { 1447 return FALSE; 1448 } else { 1449 // fill in padding character 1450 UChar *array = getArrayStart(); 1451 int32_t length = targetLength; 1452 while(--length >= oldLength) { 1453 array[length] = padChar; 1454 } 1455 setLength(targetLength); 1456 return TRUE; 1457 } 1458 } 1459 1460 //======================================== 1461 // Hashing 1462 //======================================== 1463 int32_t 1464 UnicodeString::doHashCode() const 1465 { 1466 /* Delegate hash computation to uhash. This makes UnicodeString 1467 * hashing consistent with UChar* hashing. */ 1468 int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length()); 1469 if (hashCode == kInvalidHashCode) { 1470 hashCode = kEmptyHashCode; 1471 } 1472 return hashCode; 1473 } 1474 1475 //======================================== 1476 // External Buffer 1477 //======================================== 1478 1479 UChar * 1480 UnicodeString::getBuffer(int32_t minCapacity) { 1481 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { 1482 fFlags|=kOpenGetBuffer; 1483 fShortLength=0; 1484 return getArrayStart(); 1485 } else { 1486 return 0; 1487 } 1488 } 1489 1490 void 1491 UnicodeString::releaseBuffer(int32_t newLength) { 1492 if(fFlags&kOpenGetBuffer && newLength>=-1) { 1493 // set the new fLength 1494 int32_t capacity=getCapacity(); 1495 if(newLength==-1) { 1496 // the new length is the string length, capped by fCapacity 1497 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity; 1498 while(p<limit && *p!=0) { 1499 ++p; 1500 } 1501 newLength=(int32_t)(p-array); 1502 } else if(newLength>capacity) { 1503 newLength=capacity; 1504 } 1505 setLength(newLength); 1506 fFlags&=~kOpenGetBuffer; 1507 } 1508 } 1509 1510 //======================================== 1511 // Miscellaneous 1512 //======================================== 1513 UBool 1514 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, 1515 int32_t growCapacity, 1516 UBool doCopyArray, 1517 int32_t **pBufferToDelete, 1518 UBool forceClone) { 1519 // default parameters need to be static, therefore 1520 // the defaults are -1 to have convenience defaults 1521 if(newCapacity == -1) { 1522 newCapacity = getCapacity(); 1523 } 1524 1525 // while a getBuffer(minCapacity) is "open", 1526 // prevent any modifications of the string by returning FALSE here 1527 // if the string is bogus, then only an assignment or similar can revive it 1528 if(!isWritable()) { 1529 return FALSE; 1530 } 1531 1532 /* 1533 * We need to make a copy of the array if 1534 * the buffer is read-only, or 1535 * the buffer is refCounted (shared), and refCount>1, or 1536 * the buffer is too small. 1537 * Return FALSE if memory could not be allocated. 1538 */ 1539 if(forceClone || 1540 fFlags & kBufferIsReadonly || 1541 fFlags & kRefCounted && refCount() > 1 || 1542 newCapacity > getCapacity() 1543 ) { 1544 // check growCapacity for default value and use of the stack buffer 1545 if(growCapacity == -1) { 1546 growCapacity = newCapacity; 1547 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) { 1548 growCapacity = US_STACKBUF_SIZE; 1549 } 1550 1551 // save old values 1552 UChar oldStackBuffer[US_STACKBUF_SIZE]; 1553 UChar *oldArray; 1554 uint8_t flags = fFlags; 1555 1556 if(flags&kUsingStackBuffer) { 1557 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) { 1558 // copy the stack buffer contents because it will be overwritten with 1559 // fUnion.fFields values 1560 us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength); 1561 oldArray = oldStackBuffer; 1562 } else { 1563 oldArray = 0; // no need to copy from stack buffer to itself 1564 } 1565 } else { 1566 oldArray = fUnion.fFields.fArray; 1567 } 1568 1569 // allocate a new array 1570 if(allocate(growCapacity) || 1571 newCapacity < growCapacity && allocate(newCapacity) 1572 ) { 1573 if(doCopyArray && oldArray != 0) { 1574 // copy the contents 1575 // do not copy more than what fits - it may be smaller than before 1576 int32_t minLength = length(); 1577 newCapacity = getCapacity(); 1578 if(newCapacity < minLength) { 1579 minLength = newCapacity; 1580 setLength(minLength); 1581 } 1582 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength); 1583 } else { 1584 fShortLength = 0; 1585 } 1586 1587 // release the old array 1588 if(flags & kRefCounted) { 1589 // the array is refCounted; decrement and release if 0 1590 int32_t *pRefCount = ((int32_t *)oldArray - 1); 1591 if(umtx_atomic_dec(pRefCount) == 0) { 1592 if(pBufferToDelete == 0) { 1593 uprv_free(pRefCount); 1594 } else { 1595 // the caller requested to delete it himself 1596 *pBufferToDelete = pRefCount; 1597 } 1598 } 1599 } 1600 } else { 1601 // not enough memory for growCapacity and not even for the smaller newCapacity 1602 // reset the old values for setToBogus() to release the array 1603 if(!(flags&kUsingStackBuffer)) { 1604 fUnion.fFields.fArray = oldArray; 1605 } 1606 fFlags = flags; 1607 setToBogus(); 1608 return FALSE; 1609 } 1610 } 1611 return TRUE; 1612 } 1613 U_NAMESPACE_END 1614 1615 #ifdef U_STATIC_IMPLEMENTATION 1616 /* 1617 This should never be called. It is defined here to make sure that the 1618 virtual vector deleting destructor is defined within unistr.cpp. 1619 The vector deleting destructor is already a part of UObject, 1620 but defining it here makes sure that it is included with this object file. 1621 This makes sure that static library dependencies are kept to a minimum. 1622 */ 1623 static void uprv_UnicodeStringDummy(void) { 1624 U_NAMESPACE_USE 1625 delete [] (new UnicodeString[2]); 1626 } 1627 #endif 1628