1 /* 2 ****************************************************************************** 3 * Copyright (C) 1999-2011, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ****************************************************************************** 6 * 7 * File unistr.cpp 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 09/25/98 stephen Creation. 13 * 04/20/99 stephen Overhauled per 4/16 code review. 14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX 15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from 16 * Replaceable. 17 * 06/25/01 grhoten Removed the dependency on iostream 18 ****************************************************************************** 19 */ 20 21 #include "unicode/utypes.h" 22 #include "unicode/appendable.h" 23 #include "unicode/putil.h" 24 #include "cstring.h" 25 #include "cmemory.h" 26 #include "unicode/ustring.h" 27 #include "unicode/unistr.h" 28 #include "uhash.h" 29 #include "ustr_imp.h" 30 #include "umutex.h" 31 32 #if 0 33 34 #if U_IOSTREAM_SOURCE >= 199711 35 #include <iostream> 36 using namespace std; 37 #elif U_IOSTREAM_SOURCE >= 198506 38 #include <iostream.h> 39 #endif 40 41 //DEBUGGING 42 void 43 print(const UnicodeString& s, 44 const char *name) 45 { 46 UChar c; 47 cout << name << ":|"; 48 for(int i = 0; i < s.length(); ++i) { 49 c = s[i]; 50 if(c>= 0x007E || c < 0x0020) 51 cout << "[0x" << hex << s[i] << "]"; 52 else 53 cout << (char) s[i]; 54 } 55 cout << '|' << endl; 56 } 57 58 void 59 print(const UChar *s, 60 int32_t len, 61 const char *name) 62 { 63 UChar c; 64 cout << name << ":|"; 65 for(int i = 0; i < len; ++i) { 66 c = s[i]; 67 if(c>= 0x007E || c < 0x0020) 68 cout << "[0x" << hex << s[i] << "]"; 69 else 70 cout << (char) s[i]; 71 } 72 cout << '|' << endl; 73 } 74 // END DEBUGGING 75 #endif 76 77 // Local function definitions for now 78 79 // need to copy areas that may overlap 80 static 81 inline void 82 us_arrayCopy(const UChar *src, int32_t srcStart, 83 UChar *dst, int32_t dstStart, int32_t count) 84 { 85 if(count>0) { 86 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src))); 87 } 88 } 89 90 // u_unescapeAt() callback to get a UChar from a UnicodeString 91 U_CDECL_BEGIN 92 static UChar U_CALLCONV 93 UnicodeString_charAt(int32_t offset, void *context) { 94 return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset); 95 } 96 U_CDECL_END 97 98 U_NAMESPACE_BEGIN 99 100 /* The Replaceable virtual destructor can't be defined in the header 101 due to how AIX works with multiple definitions of virtual functions. 102 */ 103 Replaceable::~Replaceable() {} 104 Replaceable::Replaceable() {} 105 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString) 106 107 UnicodeString U_EXPORT2 108 operator+ (const UnicodeString &s1, const UnicodeString &s2) { 109 return 110 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0). 111 append(s1). 112 append(s2); 113 } 114 115 //======================================== 116 // Reference Counting functions, put at top of file so that optimizing compilers 117 // have a chance to automatically inline. 118 //======================================== 119 120 void 121 UnicodeString::addRef() 122 { umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);} 123 124 int32_t 125 UnicodeString::removeRef() 126 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);} 127 128 int32_t 129 UnicodeString::refCount() const 130 { 131 umtx_lock(NULL); 132 // Note: without the lock to force a memory barrier, we might see a very 133 // stale value on some multi-processor systems. 134 int32_t count = *((int32_t *)fUnion.fFields.fArray - 1); 135 umtx_unlock(NULL); 136 return count; 137 } 138 139 void 140 UnicodeString::releaseArray() { 141 if((fFlags & kRefCounted) && removeRef() == 0) { 142 uprv_free((int32_t *)fUnion.fFields.fArray - 1); 143 } 144 } 145 146 147 148 //======================================== 149 // Constructors 150 //======================================== 151 UnicodeString::UnicodeString() 152 : fShortLength(0), 153 fFlags(kShortString) 154 {} 155 156 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) 157 : fShortLength(0), 158 fFlags(0) 159 { 160 if(count <= 0 || (uint32_t)c > 0x10ffff) { 161 // just allocate and do not do anything else 162 allocate(capacity); 163 } else { 164 // count > 0, allocate and fill the new string with count c's 165 int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount; 166 if(capacity < length) { 167 capacity = length; 168 } 169 if(allocate(capacity)) { 170 UChar *array = getArrayStart(); 171 int32_t i = 0; 172 173 // fill the new string with c 174 if(unitCount == 1) { 175 // fill with length UChars 176 while(i < length) { 177 array[i++] = (UChar)c; 178 } 179 } else { 180 // get the code units for c 181 UChar units[UTF_MAX_CHAR_LENGTH]; 182 UTF_APPEND_CHAR_UNSAFE(units, i, c); 183 184 // now it must be i==unitCount 185 i = 0; 186 187 // for Unicode, unitCount can only be 1, 2, 3, or 4 188 // 1 is handled above 189 while(i < length) { 190 int32_t unitIdx = 0; 191 while(unitIdx < unitCount) { 192 array[i++]=units[unitIdx++]; 193 } 194 } 195 } 196 } 197 setLength(length); 198 } 199 } 200 201 UnicodeString::UnicodeString(UChar ch) 202 : fShortLength(1), 203 fFlags(kShortString) 204 { 205 fUnion.fStackBuffer[0] = ch; 206 } 207 208 UnicodeString::UnicodeString(UChar32 ch) 209 : fShortLength(0), 210 fFlags(kShortString) 211 { 212 int32_t i = 0; 213 UBool isError = FALSE; 214 U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError); 215 fShortLength = (int8_t)i; 216 } 217 218 UnicodeString::UnicodeString(const UChar *text) 219 : fShortLength(0), 220 fFlags(kShortString) 221 { 222 doReplace(0, 0, text, 0, -1); 223 } 224 225 UnicodeString::UnicodeString(const UChar *text, 226 int32_t textLength) 227 : fShortLength(0), 228 fFlags(kShortString) 229 { 230 doReplace(0, 0, text, 0, textLength); 231 } 232 233 UnicodeString::UnicodeString(UBool isTerminated, 234 const UChar *text, 235 int32_t textLength) 236 : fShortLength(0), 237 fFlags(kReadonlyAlias) 238 { 239 if(text == NULL) { 240 // treat as an empty string, do not alias 241 setToEmpty(); 242 } else if(textLength < -1 || 243 (textLength == -1 && !isTerminated) || 244 (textLength >= 0 && isTerminated && text[textLength] != 0) 245 ) { 246 setToBogus(); 247 } else { 248 if(textLength == -1) { 249 // text is terminated, or else it would have failed the above test 250 textLength = u_strlen(text); 251 } 252 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); 253 } 254 } 255 256 UnicodeString::UnicodeString(UChar *buff, 257 int32_t buffLength, 258 int32_t buffCapacity) 259 : fShortLength(0), 260 fFlags(kWritableAlias) 261 { 262 if(buff == NULL) { 263 // treat as an empty string, do not alias 264 setToEmpty(); 265 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { 266 setToBogus(); 267 } else { 268 if(buffLength == -1) { 269 // fLength = u_strlen(buff); but do not look beyond buffCapacity 270 const UChar *p = buff, *limit = buff + buffCapacity; 271 while(p != limit && *p != 0) { 272 ++p; 273 } 274 buffLength = (int32_t)(p - buff); 275 } 276 setArray(buff, buffLength, buffCapacity); 277 } 278 } 279 280 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) 281 : fShortLength(0), 282 fFlags(kShortString) 283 { 284 if(src==NULL) { 285 // treat as an empty string 286 } else { 287 if(length<0) { 288 length=(int32_t)uprv_strlen(src); 289 } 290 if(cloneArrayIfNeeded(length, length, FALSE)) { 291 u_charsToUChars(src, getArrayStart(), length); 292 setLength(length); 293 } else { 294 setToBogus(); 295 } 296 } 297 } 298 299 #if U_CHARSET_IS_UTF8 300 301 UnicodeString::UnicodeString(const char *codepageData) 302 : fShortLength(0), 303 fFlags(kShortString) { 304 if(codepageData != 0) { 305 setToUTF8(codepageData); 306 } 307 } 308 309 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) 310 : fShortLength(0), 311 fFlags(kShortString) { 312 // if there's nothing to convert, do nothing 313 if(codepageData == 0 || dataLength == 0 || dataLength < -1) { 314 return; 315 } 316 if(dataLength == -1) { 317 dataLength = (int32_t)uprv_strlen(codepageData); 318 } 319 setToUTF8(StringPiece(codepageData, dataLength)); 320 } 321 322 // else see unistr_cnv.cpp 323 #endif 324 325 UnicodeString::UnicodeString(const UnicodeString& that) 326 : Replaceable(), 327 fShortLength(0), 328 fFlags(kShortString) 329 { 330 copyFrom(that); 331 } 332 333 UnicodeString::UnicodeString(const UnicodeString& that, 334 int32_t srcStart) 335 : Replaceable(), 336 fShortLength(0), 337 fFlags(kShortString) 338 { 339 setTo(that, srcStart); 340 } 341 342 UnicodeString::UnicodeString(const UnicodeString& that, 343 int32_t srcStart, 344 int32_t srcLength) 345 : Replaceable(), 346 fShortLength(0), 347 fFlags(kShortString) 348 { 349 setTo(that, srcStart, srcLength); 350 } 351 352 // Replaceable base class clone() default implementation, does not clone 353 Replaceable * 354 Replaceable::clone() const { 355 return NULL; 356 } 357 358 // UnicodeString overrides clone() with a real implementation 359 Replaceable * 360 UnicodeString::clone() const { 361 return new UnicodeString(*this); 362 } 363 364 //======================================== 365 // array allocation 366 //======================================== 367 368 UBool 369 UnicodeString::allocate(int32_t capacity) { 370 if(capacity <= US_STACKBUF_SIZE) { 371 fFlags = kShortString; 372 } else { 373 // count bytes for the refCounter and the string capacity, and 374 // round up to a multiple of 16; then divide by 4 and allocate int32_t's 375 // to be safely aligned for the refCount 376 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer() 377 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2); 378 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words ); 379 if(array != 0) { 380 // set initial refCount and point behind the refCount 381 *array++ = 1; 382 383 // have fArray point to the first UChar 384 fUnion.fFields.fArray = (UChar *)array; 385 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR)); 386 fFlags = kLongString; 387 } else { 388 fShortLength = 0; 389 fUnion.fFields.fArray = 0; 390 fUnion.fFields.fCapacity = 0; 391 fFlags = kIsBogus; 392 return FALSE; 393 } 394 } 395 return TRUE; 396 } 397 398 //======================================== 399 // Destructor 400 //======================================== 401 UnicodeString::~UnicodeString() 402 { 403 releaseArray(); 404 } 405 406 //======================================== 407 // Factory methods 408 //======================================== 409 410 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) { 411 UnicodeString result; 412 result.setToUTF8(utf8); 413 return result; 414 } 415 416 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) { 417 UnicodeString result; 418 int32_t capacity; 419 // Most UTF-32 strings will be BMP-only and result in a same-length 420 // UTF-16 string. We overestimate the capacity just slightly, 421 // just in case there are a few supplementary characters. 422 if(length <= US_STACKBUF_SIZE) { 423 capacity = US_STACKBUF_SIZE; 424 } else { 425 capacity = length + (length >> 4) + 4; 426 } 427 do { 428 UChar *utf16 = result.getBuffer(capacity); 429 int32_t length16; 430 UErrorCode errorCode = U_ZERO_ERROR; 431 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16, 432 utf32, length, 433 0xfffd, // Substitution character. 434 NULL, // Don't care about number of substitutions. 435 &errorCode); 436 result.releaseBuffer(length16); 437 if(errorCode == U_BUFFER_OVERFLOW_ERROR) { 438 capacity = length16 + 1; // +1 for the terminating NUL. 439 continue; 440 } else if(U_FAILURE(errorCode)) { 441 result.setToBogus(); 442 } 443 break; 444 } while(TRUE); 445 return result; 446 } 447 448 //======================================== 449 // Assignment 450 //======================================== 451 452 UnicodeString & 453 UnicodeString::operator=(const UnicodeString &src) { 454 return copyFrom(src); 455 } 456 457 UnicodeString & 458 UnicodeString::fastCopyFrom(const UnicodeString &src) { 459 return copyFrom(src, TRUE); 460 } 461 462 UnicodeString & 463 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { 464 // if assigning to ourselves, do nothing 465 if(this == 0 || this == &src) { 466 return *this; 467 } 468 469 // is the right side bogus? 470 if(&src == 0 || src.isBogus()) { 471 setToBogus(); 472 return *this; 473 } 474 475 // delete the current contents 476 releaseArray(); 477 478 if(src.isEmpty()) { 479 // empty string - use the stack buffer 480 setToEmpty(); 481 return *this; 482 } 483 484 // we always copy the length 485 int32_t srcLength = src.length(); 486 setLength(srcLength); 487 488 // fLength>0 and not an "open" src.getBuffer(minCapacity) 489 switch(src.fFlags) { 490 case kShortString: 491 // short string using the stack buffer, do the same 492 fFlags = kShortString; 493 uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR); 494 break; 495 case kLongString: 496 // src uses a refCounted string buffer, use that buffer with refCount 497 // src is const, use a cast - we don't really change it 498 ((UnicodeString &)src).addRef(); 499 // copy all fields, share the reference-counted buffer 500 fUnion.fFields.fArray = src.fUnion.fFields.fArray; 501 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; 502 fFlags = src.fFlags; 503 break; 504 case kReadonlyAlias: 505 if(fastCopy) { 506 // src is a readonly alias, do the same 507 // -> maintain the readonly alias as such 508 fUnion.fFields.fArray = src.fUnion.fFields.fArray; 509 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; 510 fFlags = src.fFlags; 511 break; 512 } 513 // else if(!fastCopy) fall through to case kWritableAlias 514 // -> allocate a new buffer and copy the contents 515 case kWritableAlias: 516 // src is a writable alias; we make a copy of that instead 517 if(allocate(srcLength)) { 518 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR); 519 break; 520 } 521 // if there is not enough memory, then fall through to setting to bogus 522 default: 523 // if src is bogus, set ourselves to bogus 524 // do not call setToBogus() here because fArray and fFlags are not consistent here 525 fShortLength = 0; 526 fUnion.fFields.fArray = 0; 527 fUnion.fFields.fCapacity = 0; 528 fFlags = kIsBogus; 529 break; 530 } 531 532 return *this; 533 } 534 535 //======================================== 536 // Miscellaneous operations 537 //======================================== 538 539 UnicodeString UnicodeString::unescape() const { 540 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity 541 const UChar *array = getBuffer(); 542 int32_t len = length(); 543 int32_t prev = 0; 544 for (int32_t i=0;;) { 545 if (i == len) { 546 result.append(array, prev, len - prev); 547 break; 548 } 549 if (array[i++] == 0x5C /*'\\'*/) { 550 result.append(array, prev, (i - 1) - prev); 551 UChar32 c = unescapeAt(i); // advances i 552 if (c < 0) { 553 result.remove(); // return empty string 554 break; // invalid escape sequence 555 } 556 result.append(c); 557 prev = i; 558 } 559 } 560 return result; 561 } 562 563 UChar32 UnicodeString::unescapeAt(int32_t &offset) const { 564 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this); 565 } 566 567 //======================================== 568 // Read-only implementation 569 //======================================== 570 int8_t 571 UnicodeString::doCompare( int32_t start, 572 int32_t length, 573 const UChar *srcChars, 574 int32_t srcStart, 575 int32_t srcLength) const 576 { 577 // compare illegal string values 578 // treat const UChar *srcChars==NULL as an empty string 579 if(isBogus()) { 580 return -1; 581 } 582 583 // pin indices to legal values 584 pinIndices(start, length); 585 586 if(srcChars == NULL) { 587 srcStart = srcLength = 0; 588 } 589 590 // get the correct pointer 591 const UChar *chars = getArrayStart(); 592 593 chars += start; 594 srcChars += srcStart; 595 596 int32_t minLength; 597 int8_t lengthResult; 598 599 // get the srcLength if necessary 600 if(srcLength < 0) { 601 srcLength = u_strlen(srcChars + srcStart); 602 } 603 604 // are we comparing different lengths? 605 if(length != srcLength) { 606 if(length < srcLength) { 607 minLength = length; 608 lengthResult = -1; 609 } else { 610 minLength = srcLength; 611 lengthResult = 1; 612 } 613 } else { 614 minLength = length; 615 lengthResult = 0; 616 } 617 618 /* 619 * note that uprv_memcmp() returns an int but we return an int8_t; 620 * we need to take care not to truncate the result - 621 * one way to do this is to right-shift the value to 622 * move the sign bit into the lower 8 bits and making sure that this 623 * does not become 0 itself 624 */ 625 626 if(minLength > 0 && chars != srcChars) { 627 int32_t result; 628 629 # if U_IS_BIG_ENDIAN 630 // big-endian: byte comparison works 631 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar)); 632 if(result != 0) { 633 return (int8_t)(result >> 15 | 1); 634 } 635 # else 636 // little-endian: compare UChar units 637 do { 638 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++)); 639 if(result != 0) { 640 return (int8_t)(result >> 15 | 1); 641 } 642 } while(--minLength > 0); 643 # endif 644 } 645 return lengthResult; 646 } 647 648 /* String compare in code point order - doCompare() compares in code unit order. */ 649 int8_t 650 UnicodeString::doCompareCodePointOrder(int32_t start, 651 int32_t length, 652 const UChar *srcChars, 653 int32_t srcStart, 654 int32_t srcLength) const 655 { 656 // compare illegal string values 657 // treat const UChar *srcChars==NULL as an empty string 658 if(isBogus()) { 659 return -1; 660 } 661 662 // pin indices to legal values 663 pinIndices(start, length); 664 665 if(srcChars == NULL) { 666 srcStart = srcLength = 0; 667 } 668 669 int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + srcStart, srcLength, FALSE, TRUE); 670 /* translate the 32-bit result into an 8-bit one */ 671 if(diff!=0) { 672 return (int8_t)(diff >> 15 | 1); 673 } else { 674 return 0; 675 } 676 } 677 678 int32_t 679 UnicodeString::getLength() const { 680 return length(); 681 } 682 683 UChar 684 UnicodeString::getCharAt(int32_t offset) const { 685 return charAt(offset); 686 } 687 688 UChar32 689 UnicodeString::getChar32At(int32_t offset) const { 690 return char32At(offset); 691 } 692 693 int32_t 694 UnicodeString::countChar32(int32_t start, int32_t length) const { 695 pinIndices(start, length); 696 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL 697 return u_countChar32(getArrayStart()+start, length); 698 } 699 700 UBool 701 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const { 702 pinIndices(start, length); 703 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL 704 return u_strHasMoreChar32Than(getArrayStart()+start, length, number); 705 } 706 707 int32_t 708 UnicodeString::moveIndex32(int32_t index, int32_t delta) const { 709 // pin index 710 int32_t len = length(); 711 if(index<0) { 712 index=0; 713 } else if(index>len) { 714 index=len; 715 } 716 717 const UChar *array = getArrayStart(); 718 if(delta>0) { 719 UTF_FWD_N(array, index, len, delta); 720 } else { 721 UTF_BACK_N(array, 0, index, -delta); 722 } 723 724 return index; 725 } 726 727 void 728 UnicodeString::doExtract(int32_t start, 729 int32_t length, 730 UChar *dst, 731 int32_t dstStart) const 732 { 733 // pin indices to legal values 734 pinIndices(start, length); 735 736 // do not copy anything if we alias dst itself 737 const UChar *array = getArrayStart(); 738 if(array + start != dst + dstStart) { 739 us_arrayCopy(array, start, dst, dstStart, length); 740 } 741 } 742 743 int32_t 744 UnicodeString::extract(UChar *dest, int32_t destCapacity, 745 UErrorCode &errorCode) const { 746 int32_t len = length(); 747 if(U_SUCCESS(errorCode)) { 748 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { 749 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 750 } else { 751 const UChar *array = getArrayStart(); 752 if(len>0 && len<=destCapacity && array!=dest) { 753 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR); 754 } 755 return u_terminateUChars(dest, destCapacity, len, &errorCode); 756 } 757 } 758 759 return len; 760 } 761 762 int32_t 763 UnicodeString::extract(int32_t start, 764 int32_t length, 765 char *target, 766 int32_t targetCapacity, 767 enum EInvariant) const 768 { 769 // if the arguments are illegal, then do nothing 770 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) { 771 return 0; 772 } 773 774 // pin the indices to legal values 775 pinIndices(start, length); 776 777 if(length <= targetCapacity) { 778 u_UCharsToChars(getArrayStart() + start, target, length); 779 } 780 UErrorCode status = U_ZERO_ERROR; 781 return u_terminateChars(target, targetCapacity, length, &status); 782 } 783 784 UnicodeString 785 UnicodeString::tempSubString(int32_t start, int32_t len) const { 786 pinIndices(start, len); 787 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer 788 if(array==NULL) { 789 array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string 790 len=-2; // bogus result string 791 } 792 return UnicodeString(FALSE, array + start, len); 793 } 794 795 int32_t 796 UnicodeString::toUTF8(int32_t start, int32_t len, 797 char *target, int32_t capacity) const { 798 pinIndices(start, len); 799 int32_t length8; 800 UErrorCode errorCode = U_ZERO_ERROR; 801 u_strToUTF8WithSub(target, capacity, &length8, 802 getBuffer() + start, len, 803 0xFFFD, // Standard substitution character. 804 NULL, // Don't care about number of substitutions. 805 &errorCode); 806 return length8; 807 } 808 809 #if U_CHARSET_IS_UTF8 810 811 int32_t 812 UnicodeString::extract(int32_t start, int32_t len, 813 char *target, uint32_t dstSize) const { 814 // if the arguments are illegal, then do nothing 815 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) { 816 return 0; 817 } 818 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff); 819 } 820 821 // else see unistr_cnv.cpp 822 #endif 823 824 void 825 UnicodeString::extractBetween(int32_t start, 826 int32_t limit, 827 UnicodeString& target) const { 828 pinIndex(start); 829 pinIndex(limit); 830 doExtract(start, limit - start, target); 831 } 832 833 // When converting from UTF-16 to UTF-8, the result will have at most 3 times 834 // as many bytes as the source has UChars. 835 // The "worst cases" are writing systems like Indic, Thai and CJK with 836 // 3:1 bytes:UChars. 837 void 838 UnicodeString::toUTF8(ByteSink &sink) const { 839 int32_t length16 = length(); 840 if(length16 != 0) { 841 char stackBuffer[1024]; 842 int32_t capacity = (int32_t)sizeof(stackBuffer); 843 UBool utf8IsOwned = FALSE; 844 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity, 845 3*length16, 846 stackBuffer, capacity, 847 &capacity); 848 int32_t length8 = 0; 849 UErrorCode errorCode = U_ZERO_ERROR; 850 u_strToUTF8WithSub(utf8, capacity, &length8, 851 getBuffer(), length16, 852 0xFFFD, // Standard substitution character. 853 NULL, // Don't care about number of substitutions. 854 &errorCode); 855 if(errorCode == U_BUFFER_OVERFLOW_ERROR) { 856 utf8 = (char *)uprv_malloc(length8); 857 if(utf8 != NULL) { 858 utf8IsOwned = TRUE; 859 errorCode = U_ZERO_ERROR; 860 u_strToUTF8WithSub(utf8, length8, &length8, 861 getBuffer(), length16, 862 0xFFFD, // Standard substitution character. 863 NULL, // Don't care about number of substitutions. 864 &errorCode); 865 } else { 866 errorCode = U_MEMORY_ALLOCATION_ERROR; 867 } 868 } 869 if(U_SUCCESS(errorCode)) { 870 sink.Append(utf8, length8); 871 sink.Flush(); 872 } 873 if(utf8IsOwned) { 874 uprv_free(utf8); 875 } 876 } 877 } 878 879 int32_t 880 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const { 881 int32_t length32=0; 882 if(U_SUCCESS(errorCode)) { 883 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments. 884 u_strToUTF32WithSub(utf32, capacity, &length32, 885 getBuffer(), length(), 886 0xfffd, // Substitution character. 887 NULL, // Don't care about number of substitutions. 888 &errorCode); 889 } 890 return length32; 891 } 892 893 int32_t 894 UnicodeString::indexOf(const UChar *srcChars, 895 int32_t srcStart, 896 int32_t srcLength, 897 int32_t start, 898 int32_t length) const 899 { 900 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { 901 return -1; 902 } 903 904 // UnicodeString does not find empty substrings 905 if(srcLength < 0 && srcChars[srcStart] == 0) { 906 return -1; 907 } 908 909 // get the indices within bounds 910 pinIndices(start, length); 911 912 // find the first occurrence of the substring 913 const UChar *array = getArrayStart(); 914 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength); 915 if(match == NULL) { 916 return -1; 917 } else { 918 return (int32_t)(match - array); 919 } 920 } 921 922 int32_t 923 UnicodeString::doIndexOf(UChar c, 924 int32_t start, 925 int32_t length) const 926 { 927 // pin indices 928 pinIndices(start, length); 929 930 // find the first occurrence of c 931 const UChar *array = getArrayStart(); 932 const UChar *match = u_memchr(array + start, c, length); 933 if(match == NULL) { 934 return -1; 935 } else { 936 return (int32_t)(match - array); 937 } 938 } 939 940 int32_t 941 UnicodeString::doIndexOf(UChar32 c, 942 int32_t start, 943 int32_t length) const { 944 // pin indices 945 pinIndices(start, length); 946 947 // find the first occurrence of c 948 const UChar *array = getArrayStart(); 949 const UChar *match = u_memchr32(array + start, c, length); 950 if(match == NULL) { 951 return -1; 952 } else { 953 return (int32_t)(match - array); 954 } 955 } 956 957 int32_t 958 UnicodeString::lastIndexOf(const UChar *srcChars, 959 int32_t srcStart, 960 int32_t srcLength, 961 int32_t start, 962 int32_t length) const 963 { 964 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { 965 return -1; 966 } 967 968 // UnicodeString does not find empty substrings 969 if(srcLength < 0 && srcChars[srcStart] == 0) { 970 return -1; 971 } 972 973 // get the indices within bounds 974 pinIndices(start, length); 975 976 // find the last occurrence of the substring 977 const UChar *array = getArrayStart(); 978 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength); 979 if(match == NULL) { 980 return -1; 981 } else { 982 return (int32_t)(match - array); 983 } 984 } 985 986 int32_t 987 UnicodeString::doLastIndexOf(UChar c, 988 int32_t start, 989 int32_t length) const 990 { 991 if(isBogus()) { 992 return -1; 993 } 994 995 // pin indices 996 pinIndices(start, length); 997 998 // find the last occurrence of c 999 const UChar *array = getArrayStart(); 1000 const UChar *match = u_memrchr(array + start, c, length); 1001 if(match == NULL) { 1002 return -1; 1003 } else { 1004 return (int32_t)(match - array); 1005 } 1006 } 1007 1008 int32_t 1009 UnicodeString::doLastIndexOf(UChar32 c, 1010 int32_t start, 1011 int32_t length) const { 1012 // pin indices 1013 pinIndices(start, length); 1014 1015 // find the last occurrence of c 1016 const UChar *array = getArrayStart(); 1017 const UChar *match = u_memrchr32(array + start, c, length); 1018 if(match == NULL) { 1019 return -1; 1020 } else { 1021 return (int32_t)(match - array); 1022 } 1023 } 1024 1025 //======================================== 1026 // Write implementation 1027 //======================================== 1028 1029 UnicodeString& 1030 UnicodeString::findAndReplace(int32_t start, 1031 int32_t length, 1032 const UnicodeString& oldText, 1033 int32_t oldStart, 1034 int32_t oldLength, 1035 const UnicodeString& newText, 1036 int32_t newStart, 1037 int32_t newLength) 1038 { 1039 if(isBogus() || oldText.isBogus() || newText.isBogus()) { 1040 return *this; 1041 } 1042 1043 pinIndices(start, length); 1044 oldText.pinIndices(oldStart, oldLength); 1045 newText.pinIndices(newStart, newLength); 1046 1047 if(oldLength == 0) { 1048 return *this; 1049 } 1050 1051 while(length > 0 && length >= oldLength) { 1052 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length); 1053 if(pos < 0) { 1054 // no more oldText's here: done 1055 break; 1056 } else { 1057 // we found oldText, replace it by newText and go beyond it 1058 replace(pos, oldLength, newText, newStart, newLength); 1059 length -= pos + oldLength - start; 1060 start = pos + newLength; 1061 } 1062 } 1063 1064 return *this; 1065 } 1066 1067 1068 void 1069 UnicodeString::setToBogus() 1070 { 1071 releaseArray(); 1072 1073 fShortLength = 0; 1074 fUnion.fFields.fArray = 0; 1075 fUnion.fFields.fCapacity = 0; 1076 fFlags = kIsBogus; 1077 } 1078 1079 // turn a bogus string into an empty one 1080 void 1081 UnicodeString::unBogus() { 1082 if(fFlags & kIsBogus) { 1083 setToEmpty(); 1084 } 1085 } 1086 1087 // setTo() analogous to the readonly-aliasing constructor with the same signature 1088 UnicodeString & 1089 UnicodeString::setTo(UBool isTerminated, 1090 const UChar *text, 1091 int32_t textLength) 1092 { 1093 if(fFlags & kOpenGetBuffer) { 1094 // do not modify a string that has an "open" getBuffer(minCapacity) 1095 return *this; 1096 } 1097 1098 if(text == NULL) { 1099 // treat as an empty string, do not alias 1100 releaseArray(); 1101 setToEmpty(); 1102 return *this; 1103 } 1104 1105 if( textLength < -1 || 1106 (textLength == -1 && !isTerminated) || 1107 (textLength >= 0 && isTerminated && text[textLength] != 0) 1108 ) { 1109 setToBogus(); 1110 return *this; 1111 } 1112 1113 releaseArray(); 1114 1115 if(textLength == -1) { 1116 // text is terminated, or else it would have failed the above test 1117 textLength = u_strlen(text); 1118 } 1119 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); 1120 1121 fFlags = kReadonlyAlias; 1122 return *this; 1123 } 1124 1125 // setTo() analogous to the writable-aliasing constructor with the same signature 1126 UnicodeString & 1127 UnicodeString::setTo(UChar *buffer, 1128 int32_t buffLength, 1129 int32_t buffCapacity) { 1130 if(fFlags & kOpenGetBuffer) { 1131 // do not modify a string that has an "open" getBuffer(minCapacity) 1132 return *this; 1133 } 1134 1135 if(buffer == NULL) { 1136 // treat as an empty string, do not alias 1137 releaseArray(); 1138 setToEmpty(); 1139 return *this; 1140 } 1141 1142 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { 1143 setToBogus(); 1144 return *this; 1145 } else if(buffLength == -1) { 1146 // buffLength = u_strlen(buff); but do not look beyond buffCapacity 1147 const UChar *p = buffer, *limit = buffer + buffCapacity; 1148 while(p != limit && *p != 0) { 1149 ++p; 1150 } 1151 buffLength = (int32_t)(p - buffer); 1152 } 1153 1154 releaseArray(); 1155 1156 setArray(buffer, buffLength, buffCapacity); 1157 fFlags = kWritableAlias; 1158 return *this; 1159 } 1160 1161 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) { 1162 unBogus(); 1163 int32_t length = utf8.length(); 1164 int32_t capacity; 1165 // The UTF-16 string will be at most as long as the UTF-8 string. 1166 if(length <= US_STACKBUF_SIZE) { 1167 capacity = US_STACKBUF_SIZE; 1168 } else { 1169 capacity = length + 1; // +1 for the terminating NUL. 1170 } 1171 UChar *utf16 = getBuffer(capacity); 1172 int32_t length16; 1173 UErrorCode errorCode = U_ZERO_ERROR; 1174 u_strFromUTF8WithSub(utf16, getCapacity(), &length16, 1175 utf8.data(), length, 1176 0xfffd, // Substitution character. 1177 NULL, // Don't care about number of substitutions. 1178 &errorCode); 1179 releaseBuffer(length16); 1180 if(U_FAILURE(errorCode)) { 1181 setToBogus(); 1182 } 1183 return *this; 1184 } 1185 1186 UnicodeString& 1187 UnicodeString::setCharAt(int32_t offset, 1188 UChar c) 1189 { 1190 int32_t len = length(); 1191 if(cloneArrayIfNeeded() && len > 0) { 1192 if(offset < 0) { 1193 offset = 0; 1194 } else if(offset >= len) { 1195 offset = len - 1; 1196 } 1197 1198 getArrayStart()[offset] = c; 1199 } 1200 return *this; 1201 } 1202 1203 UnicodeString& 1204 UnicodeString::doReplace( int32_t start, 1205 int32_t length, 1206 const UnicodeString& src, 1207 int32_t srcStart, 1208 int32_t srcLength) 1209 { 1210 if(!src.isBogus()) { 1211 // pin the indices to legal values 1212 src.pinIndices(srcStart, srcLength); 1213 1214 // get the characters from src 1215 // and replace the range in ourselves with them 1216 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength); 1217 } else { 1218 // remove the range 1219 return doReplace(start, length, 0, 0, 0); 1220 } 1221 } 1222 1223 UnicodeString& 1224 UnicodeString::doReplace(int32_t start, 1225 int32_t length, 1226 const UChar *srcChars, 1227 int32_t srcStart, 1228 int32_t srcLength) 1229 { 1230 if(!isWritable()) { 1231 return *this; 1232 } 1233 1234 int32_t oldLength = this->length(); 1235 1236 // optimize (read-only alias).remove(0, start) and .remove(start, end) 1237 if((fFlags&kBufferIsReadonly) && srcLength == 0) { 1238 if(start == 0) { 1239 // remove prefix by adjusting the array pointer 1240 pinIndex(length); 1241 fUnion.fFields.fArray += length; 1242 fUnion.fFields.fCapacity -= length; 1243 setLength(oldLength - length); 1244 return *this; 1245 } else { 1246 pinIndex(start); 1247 if(length >= (oldLength - start)) { 1248 // remove suffix by reducing the length (like truncate()) 1249 setLength(start); 1250 fUnion.fFields.fCapacity = start; // not NUL-terminated any more 1251 return *this; 1252 } 1253 } 1254 } 1255 1256 if(srcChars == 0) { 1257 srcStart = srcLength = 0; 1258 } else if(srcLength < 0) { 1259 // get the srcLength if necessary 1260 srcLength = u_strlen(srcChars + srcStart); 1261 } 1262 1263 // calculate the size of the string after the replace 1264 int32_t newLength; 1265 1266 // optimize append() onto a large-enough, owned string 1267 if(start >= oldLength) { 1268 newLength = oldLength + srcLength; 1269 if(newLength <= getCapacity() && isBufferWritable()) { 1270 UChar *oldArray = getArrayStart(); 1271 // Do not copy characters when 1272 // UChar *buffer=str.getAppendBuffer(...); 1273 // is followed by 1274 // str.append(buffer, length); 1275 // or 1276 // str.appendString(buffer, length) 1277 // or similar. 1278 if(srcChars + srcStart != oldArray + start || start > oldLength) { 1279 us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength); 1280 } 1281 setLength(newLength); 1282 return *this; 1283 } else { 1284 // pin the indices to legal values 1285 start = oldLength; 1286 length = 0; 1287 } 1288 } else { 1289 // pin the indices to legal values 1290 pinIndices(start, length); 1291 1292 newLength = oldLength - length + srcLength; 1293 } 1294 1295 // the following may change fArray but will not copy the current contents; 1296 // therefore we need to keep the current fArray 1297 UChar oldStackBuffer[US_STACKBUF_SIZE]; 1298 UChar *oldArray; 1299 if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) { 1300 // copy the stack buffer contents because it will be overwritten with 1301 // fUnion.fFields values 1302 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength); 1303 oldArray = oldStackBuffer; 1304 } else { 1305 oldArray = getArrayStart(); 1306 } 1307 1308 // clone our array and allocate a bigger array if needed 1309 int32_t *bufferToDelete = 0; 1310 if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize, 1311 FALSE, &bufferToDelete) 1312 ) { 1313 return *this; 1314 } 1315 1316 // now do the replace 1317 1318 UChar *newArray = getArrayStart(); 1319 if(newArray != oldArray) { 1320 // if fArray changed, then we need to copy everything except what will change 1321 us_arrayCopy(oldArray, 0, newArray, 0, start); 1322 us_arrayCopy(oldArray, start + length, 1323 newArray, start + srcLength, 1324 oldLength - (start + length)); 1325 } else if(length != srcLength) { 1326 // fArray did not change; copy only the portion that isn't changing, leaving a hole 1327 us_arrayCopy(oldArray, start + length, 1328 newArray, start + srcLength, 1329 oldLength - (start + length)); 1330 } 1331 1332 // now fill in the hole with the new string 1333 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength); 1334 1335 setLength(newLength); 1336 1337 // delayed delete in case srcChars == fArray when we started, and 1338 // to keep oldArray alive for the above operations 1339 if (bufferToDelete) { 1340 uprv_free(bufferToDelete); 1341 } 1342 1343 return *this; 1344 } 1345 1346 /** 1347 * Replaceable API 1348 */ 1349 void 1350 UnicodeString::handleReplaceBetween(int32_t start, 1351 int32_t limit, 1352 const UnicodeString& text) { 1353 replaceBetween(start, limit, text); 1354 } 1355 1356 /** 1357 * Replaceable API 1358 */ 1359 void 1360 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) { 1361 if (limit <= start) { 1362 return; // Nothing to do; avoid bogus malloc call 1363 } 1364 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) ); 1365 // Check to make sure text is not null. 1366 if (text != NULL) { 1367 extractBetween(start, limit, text, 0); 1368 insert(dest, text, 0, limit - start); 1369 uprv_free(text); 1370 } 1371 } 1372 1373 /** 1374 * Replaceable API 1375 * 1376 * NOTE: This is for the Replaceable class. There is no rep.cpp, 1377 * so we implement this function here. 1378 */ 1379 UBool Replaceable::hasMetaData() const { 1380 return TRUE; 1381 } 1382 1383 /** 1384 * Replaceable API 1385 */ 1386 UBool UnicodeString::hasMetaData() const { 1387 return FALSE; 1388 } 1389 1390 UnicodeString& 1391 UnicodeString::doReverse(int32_t start, int32_t length) { 1392 if(length <= 1 || !cloneArrayIfNeeded()) { 1393 return *this; 1394 } 1395 1396 // pin the indices to legal values 1397 pinIndices(start, length); 1398 if(length <= 1) { // pinIndices() might have shrunk the length 1399 return *this; 1400 } 1401 1402 UChar *left = getArrayStart() + start; 1403 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2) 1404 UChar swap; 1405 UBool hasSupplementary = FALSE; 1406 1407 // Before the loop we know left<right because length>=2. 1408 do { 1409 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left); 1410 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right); 1411 *right-- = swap; 1412 } while(left < right); 1413 // Make sure to test the middle code unit of an odd-length string. 1414 // Redundant if the length is even. 1415 hasSupplementary |= (UBool)U16_IS_LEAD(*left); 1416 1417 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */ 1418 if(hasSupplementary) { 1419 UChar swap2; 1420 1421 left = getArrayStart() + start; 1422 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right 1423 while(left < right) { 1424 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) { 1425 *left++ = swap2; 1426 *left++ = swap; 1427 } else { 1428 ++left; 1429 } 1430 } 1431 } 1432 1433 return *this; 1434 } 1435 1436 UBool 1437 UnicodeString::padLeading(int32_t targetLength, 1438 UChar padChar) 1439 { 1440 int32_t oldLength = length(); 1441 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { 1442 return FALSE; 1443 } else { 1444 // move contents up by padding width 1445 UChar *array = getArrayStart(); 1446 int32_t start = targetLength - oldLength; 1447 us_arrayCopy(array, 0, array, start, oldLength); 1448 1449 // fill in padding character 1450 while(--start >= 0) { 1451 array[start] = padChar; 1452 } 1453 setLength(targetLength); 1454 return TRUE; 1455 } 1456 } 1457 1458 UBool 1459 UnicodeString::padTrailing(int32_t targetLength, 1460 UChar padChar) 1461 { 1462 int32_t oldLength = length(); 1463 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { 1464 return FALSE; 1465 } else { 1466 // fill in padding character 1467 UChar *array = getArrayStart(); 1468 int32_t length = targetLength; 1469 while(--length >= oldLength) { 1470 array[length] = padChar; 1471 } 1472 setLength(targetLength); 1473 return TRUE; 1474 } 1475 } 1476 1477 //======================================== 1478 // Hashing 1479 //======================================== 1480 int32_t 1481 UnicodeString::doHashCode() const 1482 { 1483 /* Delegate hash computation to uhash. This makes UnicodeString 1484 * hashing consistent with UChar* hashing. */ 1485 int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length()); 1486 if (hashCode == kInvalidHashCode) { 1487 hashCode = kEmptyHashCode; 1488 } 1489 return hashCode; 1490 } 1491 1492 //======================================== 1493 // External Buffer 1494 //======================================== 1495 1496 UChar * 1497 UnicodeString::getBuffer(int32_t minCapacity) { 1498 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { 1499 fFlags|=kOpenGetBuffer; 1500 fShortLength=0; 1501 return getArrayStart(); 1502 } else { 1503 return 0; 1504 } 1505 } 1506 1507 void 1508 UnicodeString::releaseBuffer(int32_t newLength) { 1509 if(fFlags&kOpenGetBuffer && newLength>=-1) { 1510 // set the new fLength 1511 int32_t capacity=getCapacity(); 1512 if(newLength==-1) { 1513 // the new length is the string length, capped by fCapacity 1514 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity; 1515 while(p<limit && *p!=0) { 1516 ++p; 1517 } 1518 newLength=(int32_t)(p-array); 1519 } else if(newLength>capacity) { 1520 newLength=capacity; 1521 } 1522 setLength(newLength); 1523 fFlags&=~kOpenGetBuffer; 1524 } 1525 } 1526 1527 //======================================== 1528 // Miscellaneous 1529 //======================================== 1530 UBool 1531 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, 1532 int32_t growCapacity, 1533 UBool doCopyArray, 1534 int32_t **pBufferToDelete, 1535 UBool forceClone) { 1536 // default parameters need to be static, therefore 1537 // the defaults are -1 to have convenience defaults 1538 if(newCapacity == -1) { 1539 newCapacity = getCapacity(); 1540 } 1541 1542 // while a getBuffer(minCapacity) is "open", 1543 // prevent any modifications of the string by returning FALSE here 1544 // if the string is bogus, then only an assignment or similar can revive it 1545 if(!isWritable()) { 1546 return FALSE; 1547 } 1548 1549 /* 1550 * We need to make a copy of the array if 1551 * the buffer is read-only, or 1552 * the buffer is refCounted (shared), and refCount>1, or 1553 * the buffer is too small. 1554 * Return FALSE if memory could not be allocated. 1555 */ 1556 if(forceClone || 1557 fFlags & kBufferIsReadonly || 1558 (fFlags & kRefCounted && refCount() > 1) || 1559 newCapacity > getCapacity() 1560 ) { 1561 // check growCapacity for default value and use of the stack buffer 1562 if(growCapacity == -1) { 1563 growCapacity = newCapacity; 1564 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) { 1565 growCapacity = US_STACKBUF_SIZE; 1566 } 1567 1568 // save old values 1569 UChar oldStackBuffer[US_STACKBUF_SIZE]; 1570 UChar *oldArray; 1571 uint8_t flags = fFlags; 1572 1573 if(flags&kUsingStackBuffer) { 1574 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) { 1575 // copy the stack buffer contents because it will be overwritten with 1576 // fUnion.fFields values 1577 us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength); 1578 oldArray = oldStackBuffer; 1579 } else { 1580 oldArray = 0; // no need to copy from stack buffer to itself 1581 } 1582 } else { 1583 oldArray = fUnion.fFields.fArray; 1584 } 1585 1586 // allocate a new array 1587 if(allocate(growCapacity) || 1588 (newCapacity < growCapacity && allocate(newCapacity)) 1589 ) { 1590 if(doCopyArray && oldArray != 0) { 1591 // copy the contents 1592 // do not copy more than what fits - it may be smaller than before 1593 int32_t minLength = length(); 1594 newCapacity = getCapacity(); 1595 if(newCapacity < minLength) { 1596 minLength = newCapacity; 1597 setLength(minLength); 1598 } 1599 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength); 1600 } else { 1601 fShortLength = 0; 1602 } 1603 1604 // release the old array 1605 if(flags & kRefCounted) { 1606 // the array is refCounted; decrement and release if 0 1607 int32_t *pRefCount = ((int32_t *)oldArray - 1); 1608 if(umtx_atomic_dec(pRefCount) == 0) { 1609 if(pBufferToDelete == 0) { 1610 uprv_free(pRefCount); 1611 } else { 1612 // the caller requested to delete it himself 1613 *pBufferToDelete = pRefCount; 1614 } 1615 } 1616 } 1617 } else { 1618 // not enough memory for growCapacity and not even for the smaller newCapacity 1619 // reset the old values for setToBogus() to release the array 1620 if(!(flags&kUsingStackBuffer)) { 1621 fUnion.fFields.fArray = oldArray; 1622 } 1623 fFlags = flags; 1624 setToBogus(); 1625 return FALSE; 1626 } 1627 } 1628 return TRUE; 1629 } 1630 1631 // UnicodeStringAppendable ------------------------------------------------- *** 1632 1633 UBool 1634 UnicodeStringAppendable::appendCodeUnit(UChar c) { 1635 return str.doReplace(str.length(), 0, &c, 0, 1).isWritable(); 1636 } 1637 1638 UBool 1639 UnicodeStringAppendable::appendCodePoint(UChar32 c) { 1640 UChar buffer[U16_MAX_LENGTH]; 1641 int32_t cLength = 0; 1642 UBool isError = FALSE; 1643 U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError); 1644 return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable(); 1645 } 1646 1647 UBool 1648 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) { 1649 return str.doReplace(str.length(), 0, s, 0, length).isWritable(); 1650 } 1651 1652 UBool 1653 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) { 1654 return str.cloneArrayIfNeeded(str.length() + appendCapacity); 1655 } 1656 1657 UChar * 1658 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity, 1659 int32_t desiredCapacityHint, 1660 UChar *scratch, int32_t scratchCapacity, 1661 int32_t *resultCapacity) { 1662 if(minCapacity < 1 || scratchCapacity < minCapacity) { 1663 *resultCapacity = 0; 1664 return NULL; 1665 } 1666 int32_t oldLength = str.length(); 1667 if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) { 1668 *resultCapacity = str.getCapacity() - oldLength; 1669 return str.getArrayStart() + oldLength; 1670 } 1671 *resultCapacity = scratchCapacity; 1672 return scratch; 1673 } 1674 1675 U_NAMESPACE_END 1676 1677 #ifdef U_STATIC_IMPLEMENTATION 1678 /* 1679 This should never be called. It is defined here to make sure that the 1680 virtual vector deleting destructor is defined within unistr.cpp. 1681 The vector deleting destructor is already a part of UObject, 1682 but defining it here makes sure that it is included with this object file. 1683 This makes sure that static library dependencies are kept to a minimum. 1684 */ 1685 static void uprv_UnicodeStringDummy(void) { 1686 U_NAMESPACE_USE 1687 delete [] (new UnicodeString[2]); 1688 } 1689 #endif 1690