1 /* 2 ****************************************************************************** 3 * Copyright (C) 1999-2013, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ****************************************************************************** 6 * 7 * File unistr.cpp 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 09/25/98 stephen Creation. 13 * 04/20/99 stephen Overhauled per 4/16 code review. 14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX 15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from 16 * Replaceable. 17 * 06/25/01 grhoten Removed the dependency on iostream 18 ****************************************************************************** 19 */ 20 21 #include "unicode/utypes.h" 22 #include "unicode/appendable.h" 23 #include "unicode/putil.h" 24 #include "cstring.h" 25 #include "cmemory.h" 26 #include "unicode/ustring.h" 27 #include "unicode/unistr.h" 28 #include "unicode/utf.h" 29 #include "unicode/utf16.h" 30 #include "uelement.h" 31 #include "ustr_imp.h" 32 #include "umutex.h" 33 #include "uassert.h" 34 35 #if 0 36 37 #include <iostream> 38 using namespace std; 39 40 //DEBUGGING 41 void 42 print(const UnicodeString& s, 43 const char *name) 44 { 45 UChar c; 46 cout << name << ":|"; 47 for(int i = 0; i < s.length(); ++i) { 48 c = s[i]; 49 if(c>= 0x007E || c < 0x0020) 50 cout << "[0x" << hex << s[i] << "]"; 51 else 52 cout << (char) s[i]; 53 } 54 cout << '|' << endl; 55 } 56 57 void 58 print(const UChar *s, 59 int32_t len, 60 const char *name) 61 { 62 UChar c; 63 cout << name << ":|"; 64 for(int i = 0; i < len; ++i) { 65 c = s[i]; 66 if(c>= 0x007E || c < 0x0020) 67 cout << "[0x" << hex << s[i] << "]"; 68 else 69 cout << (char) s[i]; 70 } 71 cout << '|' << endl; 72 } 73 // END DEBUGGING 74 #endif 75 76 // Local function definitions for now 77 78 // need to copy areas that may overlap 79 static 80 inline void 81 us_arrayCopy(const UChar *src, int32_t srcStart, 82 UChar *dst, int32_t dstStart, int32_t count) 83 { 84 if(count>0) { 85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src))); 86 } 87 } 88 89 // u_unescapeAt() callback to get a UChar from a UnicodeString 90 U_CDECL_BEGIN 91 static UChar U_CALLCONV 92 UnicodeString_charAt(int32_t offset, void *context) { 93 return ((icu::UnicodeString*) context)->charAt(offset); 94 } 95 U_CDECL_END 96 97 U_NAMESPACE_BEGIN 98 99 /* The Replaceable virtual destructor can't be defined in the header 100 due to how AIX works with multiple definitions of virtual functions. 101 */ 102 Replaceable::~Replaceable() {} 103 104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString) 105 106 UnicodeString U_EXPORT2 107 operator+ (const UnicodeString &s1, const UnicodeString &s2) { 108 return 109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0). 110 append(s1). 111 append(s2); 112 } 113 114 //======================================== 115 // Reference Counting functions, put at top of file so that optimizing compilers 116 // have a chance to automatically inline. 117 //======================================== 118 119 void 120 UnicodeString::addRef() { 121 umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1); 122 } 123 124 int32_t 125 UnicodeString::removeRef() { 126 return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1); 127 } 128 129 int32_t 130 UnicodeString::refCount() const { 131 return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1)); 132 } 133 134 void 135 UnicodeString::releaseArray() { 136 if((fFlags & kRefCounted) && removeRef() == 0) { 137 uprv_free((int32_t *)fUnion.fFields.fArray - 1); 138 } 139 } 140 141 142 143 //======================================== 144 // Constructors 145 //======================================== 146 147 // The default constructor is inline in unistr.h. 148 149 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) 150 : fShortLength(0), 151 fFlags(0) 152 { 153 if(count <= 0 || (uint32_t)c > 0x10ffff) { 154 // just allocate and do not do anything else 155 allocate(capacity); 156 } else { 157 // count > 0, allocate and fill the new string with count c's 158 int32_t unitCount = U16_LENGTH(c), length = count * unitCount; 159 if(capacity < length) { 160 capacity = length; 161 } 162 if(allocate(capacity)) { 163 UChar *array = getArrayStart(); 164 int32_t i = 0; 165 166 // fill the new string with c 167 if(unitCount == 1) { 168 // fill with length UChars 169 while(i < length) { 170 array[i++] = (UChar)c; 171 } 172 } else { 173 // get the code units for c 174 UChar units[U16_MAX_LENGTH]; 175 U16_APPEND_UNSAFE(units, i, c); 176 177 // now it must be i==unitCount 178 i = 0; 179 180 // for Unicode, unitCount can only be 1, 2, 3, or 4 181 // 1 is handled above 182 while(i < length) { 183 int32_t unitIdx = 0; 184 while(unitIdx < unitCount) { 185 array[i++]=units[unitIdx++]; 186 } 187 } 188 } 189 } 190 setLength(length); 191 } 192 } 193 194 UnicodeString::UnicodeString(UChar ch) 195 : fShortLength(1), 196 fFlags(kShortString) 197 { 198 fUnion.fStackBuffer[0] = ch; 199 } 200 201 UnicodeString::UnicodeString(UChar32 ch) 202 : fShortLength(0), 203 fFlags(kShortString) 204 { 205 int32_t i = 0; 206 UBool isError = FALSE; 207 U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError); 208 // We test isError so that the compiler does not complain that we don't. 209 // If isError then i==0 which is what we want anyway. 210 if(!isError) { 211 fShortLength = (int8_t)i; 212 } 213 } 214 215 UnicodeString::UnicodeString(const UChar *text) 216 : fShortLength(0), 217 fFlags(kShortString) 218 { 219 doReplace(0, 0, text, 0, -1); 220 } 221 222 UnicodeString::UnicodeString(const UChar *text, 223 int32_t textLength) 224 : fShortLength(0), 225 fFlags(kShortString) 226 { 227 doReplace(0, 0, text, 0, textLength); 228 } 229 230 UnicodeString::UnicodeString(UBool isTerminated, 231 const UChar *text, 232 int32_t textLength) 233 : fShortLength(0), 234 fFlags(kReadonlyAlias) 235 { 236 if(text == NULL) { 237 // treat as an empty string, do not alias 238 setToEmpty(); 239 } else if(textLength < -1 || 240 (textLength == -1 && !isTerminated) || 241 (textLength >= 0 && isTerminated && text[textLength] != 0) 242 ) { 243 setToBogus(); 244 } else { 245 if(textLength == -1) { 246 // text is terminated, or else it would have failed the above test 247 textLength = u_strlen(text); 248 } 249 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); 250 } 251 } 252 253 UnicodeString::UnicodeString(UChar *buff, 254 int32_t buffLength, 255 int32_t buffCapacity) 256 : fShortLength(0), 257 fFlags(kWritableAlias) 258 { 259 if(buff == NULL) { 260 // treat as an empty string, do not alias 261 setToEmpty(); 262 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { 263 setToBogus(); 264 } else { 265 if(buffLength == -1) { 266 // fLength = u_strlen(buff); but do not look beyond buffCapacity 267 const UChar *p = buff, *limit = buff + buffCapacity; 268 while(p != limit && *p != 0) { 269 ++p; 270 } 271 buffLength = (int32_t)(p - buff); 272 } 273 setArray(buff, buffLength, buffCapacity); 274 } 275 } 276 277 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) 278 : fShortLength(0), 279 fFlags(kShortString) 280 { 281 if(src==NULL) { 282 // treat as an empty string 283 } else { 284 if(length<0) { 285 length=(int32_t)uprv_strlen(src); 286 } 287 if(cloneArrayIfNeeded(length, length, FALSE)) { 288 u_charsToUChars(src, getArrayStart(), length); 289 setLength(length); 290 } else { 291 setToBogus(); 292 } 293 } 294 } 295 296 #if U_CHARSET_IS_UTF8 297 298 UnicodeString::UnicodeString(const char *codepageData) 299 : fShortLength(0), 300 fFlags(kShortString) { 301 if(codepageData != 0) { 302 setToUTF8(codepageData); 303 } 304 } 305 306 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) 307 : fShortLength(0), 308 fFlags(kShortString) { 309 // if there's nothing to convert, do nothing 310 if(codepageData == 0 || dataLength == 0 || dataLength < -1) { 311 return; 312 } 313 if(dataLength == -1) { 314 dataLength = (int32_t)uprv_strlen(codepageData); 315 } 316 setToUTF8(StringPiece(codepageData, dataLength)); 317 } 318 319 // else see unistr_cnv.cpp 320 #endif 321 322 UnicodeString::UnicodeString(const UnicodeString& that) 323 : Replaceable(), 324 fShortLength(0), 325 fFlags(kShortString) 326 { 327 copyFrom(that); 328 } 329 330 UnicodeString::UnicodeString(const UnicodeString& that, 331 int32_t srcStart) 332 : Replaceable(), 333 fShortLength(0), 334 fFlags(kShortString) 335 { 336 setTo(that, srcStart); 337 } 338 339 UnicodeString::UnicodeString(const UnicodeString& that, 340 int32_t srcStart, 341 int32_t srcLength) 342 : Replaceable(), 343 fShortLength(0), 344 fFlags(kShortString) 345 { 346 setTo(that, srcStart, srcLength); 347 } 348 349 // Replaceable base class clone() default implementation, does not clone 350 Replaceable * 351 Replaceable::clone() const { 352 return NULL; 353 } 354 355 // UnicodeString overrides clone() with a real implementation 356 Replaceable * 357 UnicodeString::clone() const { 358 return new UnicodeString(*this); 359 } 360 361 //======================================== 362 // array allocation 363 //======================================== 364 365 UBool 366 UnicodeString::allocate(int32_t capacity) { 367 if(capacity <= US_STACKBUF_SIZE) { 368 fFlags = kShortString; 369 } else { 370 // count bytes for the refCounter and the string capacity, and 371 // round up to a multiple of 16; then divide by 4 and allocate int32_t's 372 // to be safely aligned for the refCount 373 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer() 374 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2); 375 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words ); 376 if(array != 0) { 377 // set initial refCount and point behind the refCount 378 *array++ = 1; 379 380 // have fArray point to the first UChar 381 fUnion.fFields.fArray = (UChar *)array; 382 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR)); 383 fFlags = kLongString; 384 } else { 385 fShortLength = 0; 386 fUnion.fFields.fArray = 0; 387 fUnion.fFields.fCapacity = 0; 388 fFlags = kIsBogus; 389 return FALSE; 390 } 391 } 392 return TRUE; 393 } 394 395 //======================================== 396 // Destructor 397 //======================================== 398 UnicodeString::~UnicodeString() 399 { 400 releaseArray(); 401 } 402 403 //======================================== 404 // Factory methods 405 //======================================== 406 407 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) { 408 UnicodeString result; 409 result.setToUTF8(utf8); 410 return result; 411 } 412 413 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) { 414 UnicodeString result; 415 int32_t capacity; 416 // Most UTF-32 strings will be BMP-only and result in a same-length 417 // UTF-16 string. We overestimate the capacity just slightly, 418 // just in case there are a few supplementary characters. 419 if(length <= US_STACKBUF_SIZE) { 420 capacity = US_STACKBUF_SIZE; 421 } else { 422 capacity = length + (length >> 4) + 4; 423 } 424 do { 425 UChar *utf16 = result.getBuffer(capacity); 426 int32_t length16; 427 UErrorCode errorCode = U_ZERO_ERROR; 428 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16, 429 utf32, length, 430 0xfffd, // Substitution character. 431 NULL, // Don't care about number of substitutions. 432 &errorCode); 433 result.releaseBuffer(length16); 434 if(errorCode == U_BUFFER_OVERFLOW_ERROR) { 435 capacity = length16 + 1; // +1 for the terminating NUL. 436 continue; 437 } else if(U_FAILURE(errorCode)) { 438 result.setToBogus(); 439 } 440 break; 441 } while(TRUE); 442 return result; 443 } 444 445 //======================================== 446 // Assignment 447 //======================================== 448 449 UnicodeString & 450 UnicodeString::operator=(const UnicodeString &src) { 451 return copyFrom(src); 452 } 453 454 UnicodeString & 455 UnicodeString::fastCopyFrom(const UnicodeString &src) { 456 return copyFrom(src, TRUE); 457 } 458 459 UnicodeString & 460 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { 461 // if assigning to ourselves, do nothing 462 if(this == 0 || this == &src) { 463 return *this; 464 } 465 466 // is the right side bogus? 467 if(&src == 0 || src.isBogus()) { 468 setToBogus(); 469 return *this; 470 } 471 472 // delete the current contents 473 releaseArray(); 474 475 if(src.isEmpty()) { 476 // empty string - use the stack buffer 477 setToEmpty(); 478 return *this; 479 } 480 481 // we always copy the length 482 int32_t srcLength = src.length(); 483 setLength(srcLength); 484 485 // fLength>0 and not an "open" src.getBuffer(minCapacity) 486 switch(src.fFlags) { 487 case kShortString: 488 // short string using the stack buffer, do the same 489 fFlags = kShortString; 490 uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR); 491 break; 492 case kLongString: 493 // src uses a refCounted string buffer, use that buffer with refCount 494 // src is const, use a cast - we don't really change it 495 ((UnicodeString &)src).addRef(); 496 // copy all fields, share the reference-counted buffer 497 fUnion.fFields.fArray = src.fUnion.fFields.fArray; 498 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; 499 fFlags = src.fFlags; 500 break; 501 case kReadonlyAlias: 502 if(fastCopy) { 503 // src is a readonly alias, do the same 504 // -> maintain the readonly alias as such 505 fUnion.fFields.fArray = src.fUnion.fFields.fArray; 506 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; 507 fFlags = src.fFlags; 508 break; 509 } 510 // else if(!fastCopy) fall through to case kWritableAlias 511 // -> allocate a new buffer and copy the contents 512 case kWritableAlias: 513 // src is a writable alias; we make a copy of that instead 514 if(allocate(srcLength)) { 515 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR); 516 break; 517 } 518 // if there is not enough memory, then fall through to setting to bogus 519 default: 520 // if src is bogus, set ourselves to bogus 521 // do not call setToBogus() here because fArray and fFlags are not consistent here 522 fShortLength = 0; 523 fUnion.fFields.fArray = 0; 524 fUnion.fFields.fCapacity = 0; 525 fFlags = kIsBogus; 526 break; 527 } 528 529 return *this; 530 } 531 532 //======================================== 533 // Miscellaneous operations 534 //======================================== 535 536 UnicodeString UnicodeString::unescape() const { 537 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity 538 const UChar *array = getBuffer(); 539 int32_t len = length(); 540 int32_t prev = 0; 541 for (int32_t i=0;;) { 542 if (i == len) { 543 result.append(array, prev, len - prev); 544 break; 545 } 546 if (array[i++] == 0x5C /*'\\'*/) { 547 result.append(array, prev, (i - 1) - prev); 548 UChar32 c = unescapeAt(i); // advances i 549 if (c < 0) { 550 result.remove(); // return empty string 551 break; // invalid escape sequence 552 } 553 result.append(c); 554 prev = i; 555 } 556 } 557 return result; 558 } 559 560 UChar32 UnicodeString::unescapeAt(int32_t &offset) const { 561 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this); 562 } 563 564 //======================================== 565 // Read-only implementation 566 //======================================== 567 UBool 568 UnicodeString::doEquals(const UnicodeString &text, int32_t len) const { 569 // Requires: this & text not bogus and have same lengths. 570 // Byte-wise comparison works for equality regardless of endianness. 571 return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0; 572 } 573 574 int8_t 575 UnicodeString::doCompare( int32_t start, 576 int32_t length, 577 const UChar *srcChars, 578 int32_t srcStart, 579 int32_t srcLength) const 580 { 581 // compare illegal string values 582 if(isBogus()) { 583 return -1; 584 } 585 586 // pin indices to legal values 587 pinIndices(start, length); 588 589 if(srcChars == NULL) { 590 // treat const UChar *srcChars==NULL as an empty string 591 return length == 0 ? 0 : 1; 592 } 593 594 // get the correct pointer 595 const UChar *chars = getArrayStart(); 596 597 chars += start; 598 srcChars += srcStart; 599 600 int32_t minLength; 601 int8_t lengthResult; 602 603 // get the srcLength if necessary 604 if(srcLength < 0) { 605 srcLength = u_strlen(srcChars + srcStart); 606 } 607 608 // are we comparing different lengths? 609 if(length != srcLength) { 610 if(length < srcLength) { 611 minLength = length; 612 lengthResult = -1; 613 } else { 614 minLength = srcLength; 615 lengthResult = 1; 616 } 617 } else { 618 minLength = length; 619 lengthResult = 0; 620 } 621 622 /* 623 * note that uprv_memcmp() returns an int but we return an int8_t; 624 * we need to take care not to truncate the result - 625 * one way to do this is to right-shift the value to 626 * move the sign bit into the lower 8 bits and making sure that this 627 * does not become 0 itself 628 */ 629 630 if(minLength > 0 && chars != srcChars) { 631 int32_t result; 632 633 # if U_IS_BIG_ENDIAN 634 // big-endian: byte comparison works 635 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar)); 636 if(result != 0) { 637 return (int8_t)(result >> 15 | 1); 638 } 639 # else 640 // little-endian: compare UChar units 641 do { 642 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++)); 643 if(result != 0) { 644 return (int8_t)(result >> 15 | 1); 645 } 646 } while(--minLength > 0); 647 # endif 648 } 649 return lengthResult; 650 } 651 652 /* String compare in code point order - doCompare() compares in code unit order. */ 653 int8_t 654 UnicodeString::doCompareCodePointOrder(int32_t start, 655 int32_t length, 656 const UChar *srcChars, 657 int32_t srcStart, 658 int32_t srcLength) const 659 { 660 // compare illegal string values 661 // treat const UChar *srcChars==NULL as an empty string 662 if(isBogus()) { 663 return -1; 664 } 665 666 // pin indices to legal values 667 pinIndices(start, length); 668 669 if(srcChars == NULL) { 670 srcStart = srcLength = 0; 671 } 672 673 int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE); 674 /* translate the 32-bit result into an 8-bit one */ 675 if(diff!=0) { 676 return (int8_t)(diff >> 15 | 1); 677 } else { 678 return 0; 679 } 680 } 681 682 int32_t 683 UnicodeString::getLength() const { 684 return length(); 685 } 686 687 UChar 688 UnicodeString::getCharAt(int32_t offset) const { 689 return charAt(offset); 690 } 691 692 UChar32 693 UnicodeString::getChar32At(int32_t offset) const { 694 return char32At(offset); 695 } 696 697 UChar32 698 UnicodeString::char32At(int32_t offset) const 699 { 700 int32_t len = length(); 701 if((uint32_t)offset < (uint32_t)len) { 702 const UChar *array = getArrayStart(); 703 UChar32 c; 704 U16_GET(array, 0, offset, len, c); 705 return c; 706 } else { 707 return kInvalidUChar; 708 } 709 } 710 711 int32_t 712 UnicodeString::getChar32Start(int32_t offset) const { 713 if((uint32_t)offset < (uint32_t)length()) { 714 const UChar *array = getArrayStart(); 715 U16_SET_CP_START(array, 0, offset); 716 return offset; 717 } else { 718 return 0; 719 } 720 } 721 722 int32_t 723 UnicodeString::getChar32Limit(int32_t offset) const { 724 int32_t len = length(); 725 if((uint32_t)offset < (uint32_t)len) { 726 const UChar *array = getArrayStart(); 727 U16_SET_CP_LIMIT(array, 0, offset, len); 728 return offset; 729 } else { 730 return len; 731 } 732 } 733 734 int32_t 735 UnicodeString::countChar32(int32_t start, int32_t length) const { 736 pinIndices(start, length); 737 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL 738 return u_countChar32(getArrayStart()+start, length); 739 } 740 741 UBool 742 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const { 743 pinIndices(start, length); 744 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL 745 return u_strHasMoreChar32Than(getArrayStart()+start, length, number); 746 } 747 748 int32_t 749 UnicodeString::moveIndex32(int32_t index, int32_t delta) const { 750 // pin index 751 int32_t len = length(); 752 if(index<0) { 753 index=0; 754 } else if(index>len) { 755 index=len; 756 } 757 758 const UChar *array = getArrayStart(); 759 if(delta>0) { 760 U16_FWD_N(array, index, len, delta); 761 } else { 762 U16_BACK_N(array, 0, index, -delta); 763 } 764 765 return index; 766 } 767 768 void 769 UnicodeString::doExtract(int32_t start, 770 int32_t length, 771 UChar *dst, 772 int32_t dstStart) const 773 { 774 // pin indices to legal values 775 pinIndices(start, length); 776 777 // do not copy anything if we alias dst itself 778 const UChar *array = getArrayStart(); 779 if(array + start != dst + dstStart) { 780 us_arrayCopy(array, start, dst, dstStart, length); 781 } 782 } 783 784 int32_t 785 UnicodeString::extract(UChar *dest, int32_t destCapacity, 786 UErrorCode &errorCode) const { 787 int32_t len = length(); 788 if(U_SUCCESS(errorCode)) { 789 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { 790 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 791 } else { 792 const UChar *array = getArrayStart(); 793 if(len>0 && len<=destCapacity && array!=dest) { 794 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR); 795 } 796 return u_terminateUChars(dest, destCapacity, len, &errorCode); 797 } 798 } 799 800 return len; 801 } 802 803 int32_t 804 UnicodeString::extract(int32_t start, 805 int32_t length, 806 char *target, 807 int32_t targetCapacity, 808 enum EInvariant) const 809 { 810 // if the arguments are illegal, then do nothing 811 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) { 812 return 0; 813 } 814 815 // pin the indices to legal values 816 pinIndices(start, length); 817 818 if(length <= targetCapacity) { 819 u_UCharsToChars(getArrayStart() + start, target, length); 820 } 821 UErrorCode status = U_ZERO_ERROR; 822 return u_terminateChars(target, targetCapacity, length, &status); 823 } 824 825 UnicodeString 826 UnicodeString::tempSubString(int32_t start, int32_t len) const { 827 pinIndices(start, len); 828 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer 829 if(array==NULL) { 830 array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string 831 len=-2; // bogus result string 832 } 833 return UnicodeString(FALSE, array + start, len); 834 } 835 836 int32_t 837 UnicodeString::toUTF8(int32_t start, int32_t len, 838 char *target, int32_t capacity) const { 839 pinIndices(start, len); 840 int32_t length8; 841 UErrorCode errorCode = U_ZERO_ERROR; 842 u_strToUTF8WithSub(target, capacity, &length8, 843 getBuffer() + start, len, 844 0xFFFD, // Standard substitution character. 845 NULL, // Don't care about number of substitutions. 846 &errorCode); 847 return length8; 848 } 849 850 #if U_CHARSET_IS_UTF8 851 852 int32_t 853 UnicodeString::extract(int32_t start, int32_t len, 854 char *target, uint32_t dstSize) const { 855 // if the arguments are illegal, then do nothing 856 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) { 857 return 0; 858 } 859 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff); 860 } 861 862 // else see unistr_cnv.cpp 863 #endif 864 865 void 866 UnicodeString::extractBetween(int32_t start, 867 int32_t limit, 868 UnicodeString& target) const { 869 pinIndex(start); 870 pinIndex(limit); 871 doExtract(start, limit - start, target); 872 } 873 874 // When converting from UTF-16 to UTF-8, the result will have at most 3 times 875 // as many bytes as the source has UChars. 876 // The "worst cases" are writing systems like Indic, Thai and CJK with 877 // 3:1 bytes:UChars. 878 void 879 UnicodeString::toUTF8(ByteSink &sink) const { 880 int32_t length16 = length(); 881 if(length16 != 0) { 882 char stackBuffer[1024]; 883 int32_t capacity = (int32_t)sizeof(stackBuffer); 884 UBool utf8IsOwned = FALSE; 885 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity, 886 3*length16, 887 stackBuffer, capacity, 888 &capacity); 889 int32_t length8 = 0; 890 UErrorCode errorCode = U_ZERO_ERROR; 891 u_strToUTF8WithSub(utf8, capacity, &length8, 892 getBuffer(), length16, 893 0xFFFD, // Standard substitution character. 894 NULL, // Don't care about number of substitutions. 895 &errorCode); 896 if(errorCode == U_BUFFER_OVERFLOW_ERROR) { 897 utf8 = (char *)uprv_malloc(length8); 898 if(utf8 != NULL) { 899 utf8IsOwned = TRUE; 900 errorCode = U_ZERO_ERROR; 901 u_strToUTF8WithSub(utf8, length8, &length8, 902 getBuffer(), length16, 903 0xFFFD, // Standard substitution character. 904 NULL, // Don't care about number of substitutions. 905 &errorCode); 906 } else { 907 errorCode = U_MEMORY_ALLOCATION_ERROR; 908 } 909 } 910 if(U_SUCCESS(errorCode)) { 911 sink.Append(utf8, length8); 912 sink.Flush(); 913 } 914 if(utf8IsOwned) { 915 uprv_free(utf8); 916 } 917 } 918 } 919 920 int32_t 921 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const { 922 int32_t length32=0; 923 if(U_SUCCESS(errorCode)) { 924 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments. 925 u_strToUTF32WithSub(utf32, capacity, &length32, 926 getBuffer(), length(), 927 0xfffd, // Substitution character. 928 NULL, // Don't care about number of substitutions. 929 &errorCode); 930 } 931 return length32; 932 } 933 934 int32_t 935 UnicodeString::indexOf(const UChar *srcChars, 936 int32_t srcStart, 937 int32_t srcLength, 938 int32_t start, 939 int32_t length) const 940 { 941 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { 942 return -1; 943 } 944 945 // UnicodeString does not find empty substrings 946 if(srcLength < 0 && srcChars[srcStart] == 0) { 947 return -1; 948 } 949 950 // get the indices within bounds 951 pinIndices(start, length); 952 953 // find the first occurrence of the substring 954 const UChar *array = getArrayStart(); 955 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength); 956 if(match == NULL) { 957 return -1; 958 } else { 959 return (int32_t)(match - array); 960 } 961 } 962 963 int32_t 964 UnicodeString::doIndexOf(UChar c, 965 int32_t start, 966 int32_t length) const 967 { 968 // pin indices 969 pinIndices(start, length); 970 971 // find the first occurrence of c 972 const UChar *array = getArrayStart(); 973 const UChar *match = u_memchr(array + start, c, length); 974 if(match == NULL) { 975 return -1; 976 } else { 977 return (int32_t)(match - array); 978 } 979 } 980 981 int32_t 982 UnicodeString::doIndexOf(UChar32 c, 983 int32_t start, 984 int32_t length) const { 985 // pin indices 986 pinIndices(start, length); 987 988 // find the first occurrence of c 989 const UChar *array = getArrayStart(); 990 const UChar *match = u_memchr32(array + start, c, length); 991 if(match == NULL) { 992 return -1; 993 } else { 994 return (int32_t)(match - array); 995 } 996 } 997 998 int32_t 999 UnicodeString::lastIndexOf(const UChar *srcChars, 1000 int32_t srcStart, 1001 int32_t srcLength, 1002 int32_t start, 1003 int32_t length) const 1004 { 1005 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { 1006 return -1; 1007 } 1008 1009 // UnicodeString does not find empty substrings 1010 if(srcLength < 0 && srcChars[srcStart] == 0) { 1011 return -1; 1012 } 1013 1014 // get the indices within bounds 1015 pinIndices(start, length); 1016 1017 // find the last occurrence of the substring 1018 const UChar *array = getArrayStart(); 1019 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength); 1020 if(match == NULL) { 1021 return -1; 1022 } else { 1023 return (int32_t)(match - array); 1024 } 1025 } 1026 1027 int32_t 1028 UnicodeString::doLastIndexOf(UChar c, 1029 int32_t start, 1030 int32_t length) const 1031 { 1032 if(isBogus()) { 1033 return -1; 1034 } 1035 1036 // pin indices 1037 pinIndices(start, length); 1038 1039 // find the last occurrence of c 1040 const UChar *array = getArrayStart(); 1041 const UChar *match = u_memrchr(array + start, c, length); 1042 if(match == NULL) { 1043 return -1; 1044 } else { 1045 return (int32_t)(match - array); 1046 } 1047 } 1048 1049 int32_t 1050 UnicodeString::doLastIndexOf(UChar32 c, 1051 int32_t start, 1052 int32_t length) const { 1053 // pin indices 1054 pinIndices(start, length); 1055 1056 // find the last occurrence of c 1057 const UChar *array = getArrayStart(); 1058 const UChar *match = u_memrchr32(array + start, c, length); 1059 if(match == NULL) { 1060 return -1; 1061 } else { 1062 return (int32_t)(match - array); 1063 } 1064 } 1065 1066 //======================================== 1067 // Write implementation 1068 //======================================== 1069 1070 UnicodeString& 1071 UnicodeString::findAndReplace(int32_t start, 1072 int32_t length, 1073 const UnicodeString& oldText, 1074 int32_t oldStart, 1075 int32_t oldLength, 1076 const UnicodeString& newText, 1077 int32_t newStart, 1078 int32_t newLength) 1079 { 1080 if(isBogus() || oldText.isBogus() || newText.isBogus()) { 1081 return *this; 1082 } 1083 1084 pinIndices(start, length); 1085 oldText.pinIndices(oldStart, oldLength); 1086 newText.pinIndices(newStart, newLength); 1087 1088 if(oldLength == 0) { 1089 return *this; 1090 } 1091 1092 while(length > 0 && length >= oldLength) { 1093 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length); 1094 if(pos < 0) { 1095 // no more oldText's here: done 1096 break; 1097 } else { 1098 // we found oldText, replace it by newText and go beyond it 1099 replace(pos, oldLength, newText, newStart, newLength); 1100 length -= pos + oldLength - start; 1101 start = pos + newLength; 1102 } 1103 } 1104 1105 return *this; 1106 } 1107 1108 1109 void 1110 UnicodeString::setToBogus() 1111 { 1112 releaseArray(); 1113 1114 fShortLength = 0; 1115 fUnion.fFields.fArray = 0; 1116 fUnion.fFields.fCapacity = 0; 1117 fFlags = kIsBogus; 1118 } 1119 1120 // turn a bogus string into an empty one 1121 void 1122 UnicodeString::unBogus() { 1123 if(fFlags & kIsBogus) { 1124 setToEmpty(); 1125 } 1126 } 1127 1128 const UChar * 1129 UnicodeString::getTerminatedBuffer() { 1130 if(!isWritable()) { 1131 return 0; 1132 } 1133 UChar *array = getArrayStart(); 1134 int32_t len = length(); 1135 if(len < getCapacity()) { 1136 if(fFlags & kBufferIsReadonly) { 1137 // If len<capacity on a read-only alias, then array[len] is 1138 // either the original NUL (if constructed with (TRUE, s, length)) 1139 // or one of the original string contents characters (if later truncated), 1140 // therefore we can assume that array[len] is initialized memory. 1141 if(array[len] == 0) { 1142 return array; 1143 } 1144 } else if(((fFlags & kRefCounted) == 0 || refCount() == 1)) { 1145 // kRefCounted: Do not write the NUL if the buffer is shared. 1146 // That is mostly safe, except when the length of one copy was modified 1147 // without copy-on-write, e.g., via truncate(newLength) or remove(void). 1148 // Then the NUL would be written into the middle of another copy's string. 1149 1150 // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL. 1151 // Do not test if there is a NUL already because it might be uninitialized memory. 1152 // (That would be safe, but tools like valgrind & Purify would complain.) 1153 array[len] = 0; 1154 return array; 1155 } 1156 } 1157 if(cloneArrayIfNeeded(len+1)) { 1158 array = getArrayStart(); 1159 array[len] = 0; 1160 return array; 1161 } else { 1162 return NULL; 1163 } 1164 } 1165 1166 // setTo() analogous to the readonly-aliasing constructor with the same signature 1167 UnicodeString & 1168 UnicodeString::setTo(UBool isTerminated, 1169 const UChar *text, 1170 int32_t textLength) 1171 { 1172 if(fFlags & kOpenGetBuffer) { 1173 // do not modify a string that has an "open" getBuffer(minCapacity) 1174 return *this; 1175 } 1176 1177 if(text == NULL) { 1178 // treat as an empty string, do not alias 1179 releaseArray(); 1180 setToEmpty(); 1181 return *this; 1182 } 1183 1184 if( textLength < -1 || 1185 (textLength == -1 && !isTerminated) || 1186 (textLength >= 0 && isTerminated && text[textLength] != 0) 1187 ) { 1188 setToBogus(); 1189 return *this; 1190 } 1191 1192 releaseArray(); 1193 1194 if(textLength == -1) { 1195 // text is terminated, or else it would have failed the above test 1196 textLength = u_strlen(text); 1197 } 1198 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); 1199 1200 fFlags = kReadonlyAlias; 1201 return *this; 1202 } 1203 1204 // setTo() analogous to the writable-aliasing constructor with the same signature 1205 UnicodeString & 1206 UnicodeString::setTo(UChar *buffer, 1207 int32_t buffLength, 1208 int32_t buffCapacity) { 1209 if(fFlags & kOpenGetBuffer) { 1210 // do not modify a string that has an "open" getBuffer(minCapacity) 1211 return *this; 1212 } 1213 1214 if(buffer == NULL) { 1215 // treat as an empty string, do not alias 1216 releaseArray(); 1217 setToEmpty(); 1218 return *this; 1219 } 1220 1221 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { 1222 setToBogus(); 1223 return *this; 1224 } else if(buffLength == -1) { 1225 // buffLength = u_strlen(buff); but do not look beyond buffCapacity 1226 const UChar *p = buffer, *limit = buffer + buffCapacity; 1227 while(p != limit && *p != 0) { 1228 ++p; 1229 } 1230 buffLength = (int32_t)(p - buffer); 1231 } 1232 1233 releaseArray(); 1234 1235 setArray(buffer, buffLength, buffCapacity); 1236 fFlags = kWritableAlias; 1237 return *this; 1238 } 1239 1240 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) { 1241 unBogus(); 1242 int32_t length = utf8.length(); 1243 int32_t capacity; 1244 // The UTF-16 string will be at most as long as the UTF-8 string. 1245 if(length <= US_STACKBUF_SIZE) { 1246 capacity = US_STACKBUF_SIZE; 1247 } else { 1248 capacity = length + 1; // +1 for the terminating NUL. 1249 } 1250 UChar *utf16 = getBuffer(capacity); 1251 int32_t length16; 1252 UErrorCode errorCode = U_ZERO_ERROR; 1253 u_strFromUTF8WithSub(utf16, getCapacity(), &length16, 1254 utf8.data(), length, 1255 0xfffd, // Substitution character. 1256 NULL, // Don't care about number of substitutions. 1257 &errorCode); 1258 releaseBuffer(length16); 1259 if(U_FAILURE(errorCode)) { 1260 setToBogus(); 1261 } 1262 return *this; 1263 } 1264 1265 UnicodeString& 1266 UnicodeString::setCharAt(int32_t offset, 1267 UChar c) 1268 { 1269 int32_t len = length(); 1270 if(cloneArrayIfNeeded() && len > 0) { 1271 if(offset < 0) { 1272 offset = 0; 1273 } else if(offset >= len) { 1274 offset = len - 1; 1275 } 1276 1277 getArrayStart()[offset] = c; 1278 } 1279 return *this; 1280 } 1281 1282 UnicodeString& 1283 UnicodeString::replace(int32_t start, 1284 int32_t _length, 1285 UChar32 srcChar) { 1286 UChar buffer[U16_MAX_LENGTH]; 1287 int32_t count = 0; 1288 UBool isError = FALSE; 1289 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError); 1290 // We test isError so that the compiler does not complain that we don't. 1291 // If isError (srcChar is not a valid code point) then count==0 which means 1292 // we remove the source segment rather than replacing it with srcChar. 1293 return doReplace(start, _length, buffer, 0, isError ? 0 : count); 1294 } 1295 1296 UnicodeString& 1297 UnicodeString::append(UChar32 srcChar) { 1298 UChar buffer[U16_MAX_LENGTH]; 1299 int32_t _length = 0; 1300 UBool isError = FALSE; 1301 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError); 1302 // We test isError so that the compiler does not complain that we don't. 1303 // If isError then _length==0 which turns the doReplace() into a no-op anyway. 1304 return isError ? *this : doReplace(length(), 0, buffer, 0, _length); 1305 } 1306 1307 UnicodeString& 1308 UnicodeString::doReplace( int32_t start, 1309 int32_t length, 1310 const UnicodeString& src, 1311 int32_t srcStart, 1312 int32_t srcLength) 1313 { 1314 if(!src.isBogus()) { 1315 // pin the indices to legal values 1316 src.pinIndices(srcStart, srcLength); 1317 1318 // get the characters from src 1319 // and replace the range in ourselves with them 1320 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength); 1321 } else { 1322 // remove the range 1323 return doReplace(start, length, 0, 0, 0); 1324 } 1325 } 1326 1327 UnicodeString& 1328 UnicodeString::doReplace(int32_t start, 1329 int32_t length, 1330 const UChar *srcChars, 1331 int32_t srcStart, 1332 int32_t srcLength) 1333 { 1334 if(!isWritable()) { 1335 return *this; 1336 } 1337 1338 int32_t oldLength = this->length(); 1339 1340 // optimize (read-only alias).remove(0, start) and .remove(start, end) 1341 if((fFlags&kBufferIsReadonly) && srcLength == 0) { 1342 if(start == 0) { 1343 // remove prefix by adjusting the array pointer 1344 pinIndex(length); 1345 fUnion.fFields.fArray += length; 1346 fUnion.fFields.fCapacity -= length; 1347 setLength(oldLength - length); 1348 return *this; 1349 } else { 1350 pinIndex(start); 1351 if(length >= (oldLength - start)) { 1352 // remove suffix by reducing the length (like truncate()) 1353 setLength(start); 1354 fUnion.fFields.fCapacity = start; // not NUL-terminated any more 1355 return *this; 1356 } 1357 } 1358 } 1359 1360 if(srcChars == 0) { 1361 srcStart = srcLength = 0; 1362 } else if(srcLength < 0) { 1363 // get the srcLength if necessary 1364 srcLength = u_strlen(srcChars + srcStart); 1365 } 1366 1367 // calculate the size of the string after the replace 1368 int32_t newLength; 1369 1370 // optimize append() onto a large-enough, owned string 1371 if(start >= oldLength) { 1372 if(srcLength == 0) { 1373 return *this; 1374 } 1375 newLength = oldLength + srcLength; 1376 if(newLength <= getCapacity() && isBufferWritable()) { 1377 UChar *oldArray = getArrayStart(); 1378 // Do not copy characters when 1379 // UChar *buffer=str.getAppendBuffer(...); 1380 // is followed by 1381 // str.append(buffer, length); 1382 // or 1383 // str.appendString(buffer, length) 1384 // or similar. 1385 if(srcChars + srcStart != oldArray + start || start > oldLength) { 1386 us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength); 1387 } 1388 setLength(newLength); 1389 return *this; 1390 } else { 1391 // pin the indices to legal values 1392 start = oldLength; 1393 length = 0; 1394 } 1395 } else { 1396 // pin the indices to legal values 1397 pinIndices(start, length); 1398 1399 newLength = oldLength - length + srcLength; 1400 } 1401 1402 // the following may change fArray but will not copy the current contents; 1403 // therefore we need to keep the current fArray 1404 UChar oldStackBuffer[US_STACKBUF_SIZE]; 1405 UChar *oldArray; 1406 if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) { 1407 // copy the stack buffer contents because it will be overwritten with 1408 // fUnion.fFields values 1409 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength); 1410 oldArray = oldStackBuffer; 1411 } else { 1412 oldArray = getArrayStart(); 1413 } 1414 1415 // clone our array and allocate a bigger array if needed 1416 int32_t *bufferToDelete = 0; 1417 if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize, 1418 FALSE, &bufferToDelete) 1419 ) { 1420 return *this; 1421 } 1422 1423 // now do the replace 1424 1425 UChar *newArray = getArrayStart(); 1426 if(newArray != oldArray) { 1427 // if fArray changed, then we need to copy everything except what will change 1428 us_arrayCopy(oldArray, 0, newArray, 0, start); 1429 us_arrayCopy(oldArray, start + length, 1430 newArray, start + srcLength, 1431 oldLength - (start + length)); 1432 } else if(length != srcLength) { 1433 // fArray did not change; copy only the portion that isn't changing, leaving a hole 1434 us_arrayCopy(oldArray, start + length, 1435 newArray, start + srcLength, 1436 oldLength - (start + length)); 1437 } 1438 1439 // now fill in the hole with the new string 1440 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength); 1441 1442 setLength(newLength); 1443 1444 // delayed delete in case srcChars == fArray when we started, and 1445 // to keep oldArray alive for the above operations 1446 if (bufferToDelete) { 1447 uprv_free(bufferToDelete); 1448 } 1449 1450 return *this; 1451 } 1452 1453 /** 1454 * Replaceable API 1455 */ 1456 void 1457 UnicodeString::handleReplaceBetween(int32_t start, 1458 int32_t limit, 1459 const UnicodeString& text) { 1460 replaceBetween(start, limit, text); 1461 } 1462 1463 /** 1464 * Replaceable API 1465 */ 1466 void 1467 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) { 1468 if (limit <= start) { 1469 return; // Nothing to do; avoid bogus malloc call 1470 } 1471 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) ); 1472 // Check to make sure text is not null. 1473 if (text != NULL) { 1474 extractBetween(start, limit, text, 0); 1475 insert(dest, text, 0, limit - start); 1476 uprv_free(text); 1477 } 1478 } 1479 1480 /** 1481 * Replaceable API 1482 * 1483 * NOTE: This is for the Replaceable class. There is no rep.cpp, 1484 * so we implement this function here. 1485 */ 1486 UBool Replaceable::hasMetaData() const { 1487 return TRUE; 1488 } 1489 1490 /** 1491 * Replaceable API 1492 */ 1493 UBool UnicodeString::hasMetaData() const { 1494 return FALSE; 1495 } 1496 1497 UnicodeString& 1498 UnicodeString::doReverse(int32_t start, int32_t length) { 1499 if(length <= 1 || !cloneArrayIfNeeded()) { 1500 return *this; 1501 } 1502 1503 // pin the indices to legal values 1504 pinIndices(start, length); 1505 if(length <= 1) { // pinIndices() might have shrunk the length 1506 return *this; 1507 } 1508 1509 UChar *left = getArrayStart() + start; 1510 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2) 1511 UChar swap; 1512 UBool hasSupplementary = FALSE; 1513 1514 // Before the loop we know left<right because length>=2. 1515 do { 1516 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left); 1517 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right); 1518 *right-- = swap; 1519 } while(left < right); 1520 // Make sure to test the middle code unit of an odd-length string. 1521 // Redundant if the length is even. 1522 hasSupplementary |= (UBool)U16_IS_LEAD(*left); 1523 1524 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */ 1525 if(hasSupplementary) { 1526 UChar swap2; 1527 1528 left = getArrayStart() + start; 1529 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right 1530 while(left < right) { 1531 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) { 1532 *left++ = swap2; 1533 *left++ = swap; 1534 } else { 1535 ++left; 1536 } 1537 } 1538 } 1539 1540 return *this; 1541 } 1542 1543 UBool 1544 UnicodeString::padLeading(int32_t targetLength, 1545 UChar padChar) 1546 { 1547 int32_t oldLength = length(); 1548 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { 1549 return FALSE; 1550 } else { 1551 // move contents up by padding width 1552 UChar *array = getArrayStart(); 1553 int32_t start = targetLength - oldLength; 1554 us_arrayCopy(array, 0, array, start, oldLength); 1555 1556 // fill in padding character 1557 while(--start >= 0) { 1558 array[start] = padChar; 1559 } 1560 setLength(targetLength); 1561 return TRUE; 1562 } 1563 } 1564 1565 UBool 1566 UnicodeString::padTrailing(int32_t targetLength, 1567 UChar padChar) 1568 { 1569 int32_t oldLength = length(); 1570 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { 1571 return FALSE; 1572 } else { 1573 // fill in padding character 1574 UChar *array = getArrayStart(); 1575 int32_t length = targetLength; 1576 while(--length >= oldLength) { 1577 array[length] = padChar; 1578 } 1579 setLength(targetLength); 1580 return TRUE; 1581 } 1582 } 1583 1584 //======================================== 1585 // Hashing 1586 //======================================== 1587 int32_t 1588 UnicodeString::doHashCode() const 1589 { 1590 /* Delegate hash computation to uhash. This makes UnicodeString 1591 * hashing consistent with UChar* hashing. */ 1592 int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length()); 1593 if (hashCode == kInvalidHashCode) { 1594 hashCode = kEmptyHashCode; 1595 } 1596 return hashCode; 1597 } 1598 1599 //======================================== 1600 // External Buffer 1601 //======================================== 1602 1603 UChar * 1604 UnicodeString::getBuffer(int32_t minCapacity) { 1605 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { 1606 fFlags|=kOpenGetBuffer; 1607 fShortLength=0; 1608 return getArrayStart(); 1609 } else { 1610 return 0; 1611 } 1612 } 1613 1614 void 1615 UnicodeString::releaseBuffer(int32_t newLength) { 1616 if(fFlags&kOpenGetBuffer && newLength>=-1) { 1617 // set the new fLength 1618 int32_t capacity=getCapacity(); 1619 if(newLength==-1) { 1620 // the new length is the string length, capped by fCapacity 1621 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity; 1622 while(p<limit && *p!=0) { 1623 ++p; 1624 } 1625 newLength=(int32_t)(p-array); 1626 } else if(newLength>capacity) { 1627 newLength=capacity; 1628 } 1629 setLength(newLength); 1630 fFlags&=~kOpenGetBuffer; 1631 } 1632 } 1633 1634 //======================================== 1635 // Miscellaneous 1636 //======================================== 1637 UBool 1638 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, 1639 int32_t growCapacity, 1640 UBool doCopyArray, 1641 int32_t **pBufferToDelete, 1642 UBool forceClone) { 1643 // default parameters need to be static, therefore 1644 // the defaults are -1 to have convenience defaults 1645 if(newCapacity == -1) { 1646 newCapacity = getCapacity(); 1647 } 1648 1649 // while a getBuffer(minCapacity) is "open", 1650 // prevent any modifications of the string by returning FALSE here 1651 // if the string is bogus, then only an assignment or similar can revive it 1652 if(!isWritable()) { 1653 return FALSE; 1654 } 1655 1656 /* 1657 * We need to make a copy of the array if 1658 * the buffer is read-only, or 1659 * the buffer is refCounted (shared), and refCount>1, or 1660 * the buffer is too small. 1661 * Return FALSE if memory could not be allocated. 1662 */ 1663 if(forceClone || 1664 fFlags & kBufferIsReadonly || 1665 (fFlags & kRefCounted && refCount() > 1) || 1666 newCapacity > getCapacity() 1667 ) { 1668 // check growCapacity for default value and use of the stack buffer 1669 if(growCapacity < 0) { 1670 growCapacity = newCapacity; 1671 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) { 1672 growCapacity = US_STACKBUF_SIZE; 1673 } 1674 1675 // save old values 1676 UChar oldStackBuffer[US_STACKBUF_SIZE]; 1677 UChar *oldArray; 1678 uint8_t flags = fFlags; 1679 1680 if(flags&kUsingStackBuffer) { 1681 U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */ 1682 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) { 1683 // copy the stack buffer contents because it will be overwritten with 1684 // fUnion.fFields values 1685 us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength); 1686 oldArray = oldStackBuffer; 1687 } else { 1688 oldArray = 0; // no need to copy from stack buffer to itself 1689 } 1690 } else { 1691 oldArray = fUnion.fFields.fArray; 1692 U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */ 1693 } 1694 1695 // allocate a new array 1696 if(allocate(growCapacity) || 1697 (newCapacity < growCapacity && allocate(newCapacity)) 1698 ) { 1699 if(doCopyArray && oldArray != 0) { 1700 // copy the contents 1701 // do not copy more than what fits - it may be smaller than before 1702 int32_t minLength = length(); 1703 newCapacity = getCapacity(); 1704 if(newCapacity < minLength) { 1705 minLength = newCapacity; 1706 setLength(minLength); 1707 } 1708 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength); 1709 } else { 1710 fShortLength = 0; 1711 } 1712 1713 // release the old array 1714 if(flags & kRefCounted) { 1715 // the array is refCounted; decrement and release if 0 1716 u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1); 1717 if(umtx_atomic_dec(pRefCount) == 0) { 1718 if(pBufferToDelete == 0) { 1719 // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t 1720 // is defined as volatile. (Volatile has useful non-standard behavior 1721 // with this compiler.) 1722 uprv_free((void *)pRefCount); 1723 } else { 1724 // the caller requested to delete it himself 1725 *pBufferToDelete = (int32_t *)pRefCount; 1726 } 1727 } 1728 } 1729 } else { 1730 // not enough memory for growCapacity and not even for the smaller newCapacity 1731 // reset the old values for setToBogus() to release the array 1732 if(!(flags&kUsingStackBuffer)) { 1733 fUnion.fFields.fArray = oldArray; 1734 } 1735 fFlags = flags; 1736 setToBogus(); 1737 return FALSE; 1738 } 1739 } 1740 return TRUE; 1741 } 1742 1743 // UnicodeStringAppendable ------------------------------------------------- *** 1744 1745 UnicodeStringAppendable::~UnicodeStringAppendable() {} 1746 1747 UBool 1748 UnicodeStringAppendable::appendCodeUnit(UChar c) { 1749 return str.doReplace(str.length(), 0, &c, 0, 1).isWritable(); 1750 } 1751 1752 UBool 1753 UnicodeStringAppendable::appendCodePoint(UChar32 c) { 1754 UChar buffer[U16_MAX_LENGTH]; 1755 int32_t cLength = 0; 1756 UBool isError = FALSE; 1757 U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError); 1758 return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable(); 1759 } 1760 1761 UBool 1762 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) { 1763 return str.doReplace(str.length(), 0, s, 0, length).isWritable(); 1764 } 1765 1766 UBool 1767 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) { 1768 return str.cloneArrayIfNeeded(str.length() + appendCapacity); 1769 } 1770 1771 UChar * 1772 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity, 1773 int32_t desiredCapacityHint, 1774 UChar *scratch, int32_t scratchCapacity, 1775 int32_t *resultCapacity) { 1776 if(minCapacity < 1 || scratchCapacity < minCapacity) { 1777 *resultCapacity = 0; 1778 return NULL; 1779 } 1780 int32_t oldLength = str.length(); 1781 if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) { 1782 *resultCapacity = str.getCapacity() - oldLength; 1783 return str.getArrayStart() + oldLength; 1784 } 1785 *resultCapacity = scratchCapacity; 1786 return scratch; 1787 } 1788 1789 U_NAMESPACE_END 1790 1791 U_NAMESPACE_USE 1792 1793 U_CAPI int32_t U_EXPORT2 1794 uhash_hashUnicodeString(const UElement key) { 1795 const UnicodeString *str = (const UnicodeString*) key.pointer; 1796 return (str == NULL) ? 0 : str->hashCode(); 1797 } 1798 1799 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString* 1800 // does not depend on hashtable code. 1801 U_CAPI UBool U_EXPORT2 1802 uhash_compareUnicodeString(const UElement key1, const UElement key2) { 1803 const UnicodeString *str1 = (const UnicodeString*) key1.pointer; 1804 const UnicodeString *str2 = (const UnicodeString*) key2.pointer; 1805 if (str1 == str2) { 1806 return TRUE; 1807 } 1808 if (str1 == NULL || str2 == NULL) { 1809 return FALSE; 1810 } 1811 return *str1 == *str2; 1812 } 1813 1814 #ifdef U_STATIC_IMPLEMENTATION 1815 /* 1816 This should never be called. It is defined here to make sure that the 1817 virtual vector deleting destructor is defined within unistr.cpp. 1818 The vector deleting destructor is already a part of UObject, 1819 but defining it here makes sure that it is included with this object file. 1820 This makes sure that static library dependencies are kept to a minimum. 1821 */ 1822 static void uprv_UnicodeStringDummy(void) { 1823 delete [] (new UnicodeString[2]); 1824 } 1825 #endif 1826