1 /* 2 ****************************************************************************** 3 * Copyright (C) 1999-2014, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ****************************************************************************** 6 * 7 * File unistr.cpp 8 * 9 * Modification History: 10 * 11 * Date Name Description 12 * 09/25/98 stephen Creation. 13 * 04/20/99 stephen Overhauled per 4/16 code review. 14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX 15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from 16 * Replaceable. 17 * 06/25/01 grhoten Removed the dependency on iostream 18 ****************************************************************************** 19 */ 20 21 #include "unicode/utypes.h" 22 #include "unicode/appendable.h" 23 #include "unicode/putil.h" 24 #include "cstring.h" 25 #include "cmemory.h" 26 #include "unicode/ustring.h" 27 #include "unicode/unistr.h" 28 #include "unicode/utf.h" 29 #include "unicode/utf16.h" 30 #include "uelement.h" 31 #include "ustr_imp.h" 32 #include "umutex.h" 33 #include "uassert.h" 34 35 #if 0 36 37 #include <iostream> 38 using namespace std; 39 40 //DEBUGGING 41 void 42 print(const UnicodeString& s, 43 const char *name) 44 { 45 UChar c; 46 cout << name << ":|"; 47 for(int i = 0; i < s.length(); ++i) { 48 c = s[i]; 49 if(c>= 0x007E || c < 0x0020) 50 cout << "[0x" << hex << s[i] << "]"; 51 else 52 cout << (char) s[i]; 53 } 54 cout << '|' << endl; 55 } 56 57 void 58 print(const UChar *s, 59 int32_t len, 60 const char *name) 61 { 62 UChar c; 63 cout << name << ":|"; 64 for(int i = 0; i < len; ++i) { 65 c = s[i]; 66 if(c>= 0x007E || c < 0x0020) 67 cout << "[0x" << hex << s[i] << "]"; 68 else 69 cout << (char) s[i]; 70 } 71 cout << '|' << endl; 72 } 73 // END DEBUGGING 74 #endif 75 76 // Local function definitions for now 77 78 // need to copy areas that may overlap 79 static 80 inline void 81 us_arrayCopy(const UChar *src, int32_t srcStart, 82 UChar *dst, int32_t dstStart, int32_t count) 83 { 84 if(count>0) { 85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src))); 86 } 87 } 88 89 // u_unescapeAt() callback to get a UChar from a UnicodeString 90 U_CDECL_BEGIN 91 static UChar U_CALLCONV 92 UnicodeString_charAt(int32_t offset, void *context) { 93 return ((icu::UnicodeString*) context)->charAt(offset); 94 } 95 U_CDECL_END 96 97 U_NAMESPACE_BEGIN 98 99 /* The Replaceable virtual destructor can't be defined in the header 100 due to how AIX works with multiple definitions of virtual functions. 101 */ 102 Replaceable::~Replaceable() {} 103 104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString) 105 106 UnicodeString U_EXPORT2 107 operator+ (const UnicodeString &s1, const UnicodeString &s2) { 108 return 109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0). 110 append(s1). 111 append(s2); 112 } 113 114 //======================================== 115 // Reference Counting functions, put at top of file so that optimizing compilers 116 // have a chance to automatically inline. 117 //======================================== 118 119 void 120 UnicodeString::addRef() { 121 umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1); 122 } 123 124 int32_t 125 UnicodeString::removeRef() { 126 return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1); 127 } 128 129 int32_t 130 UnicodeString::refCount() const { 131 return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1)); 132 } 133 134 void 135 UnicodeString::releaseArray() { 136 if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) { 137 uprv_free((int32_t *)fUnion.fFields.fArray - 1); 138 } 139 } 140 141 142 143 //======================================== 144 // Constructors 145 //======================================== 146 147 // The default constructor is inline in unistr.h. 148 149 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) { 150 fUnion.fFields.fLengthAndFlags = 0; 151 if(count <= 0 || (uint32_t)c > 0x10ffff) { 152 // just allocate and do not do anything else 153 allocate(capacity); 154 } else { 155 // count > 0, allocate and fill the new string with count c's 156 int32_t unitCount = U16_LENGTH(c), length = count * unitCount; 157 if(capacity < length) { 158 capacity = length; 159 } 160 if(allocate(capacity)) { 161 UChar *array = getArrayStart(); 162 int32_t i = 0; 163 164 // fill the new string with c 165 if(unitCount == 1) { 166 // fill with length UChars 167 while(i < length) { 168 array[i++] = (UChar)c; 169 } 170 } else { 171 // get the code units for c 172 UChar units[U16_MAX_LENGTH]; 173 U16_APPEND_UNSAFE(units, i, c); 174 175 // now it must be i==unitCount 176 i = 0; 177 178 // for Unicode, unitCount can only be 1, 2, 3, or 4 179 // 1 is handled above 180 while(i < length) { 181 int32_t unitIdx = 0; 182 while(unitIdx < unitCount) { 183 array[i++]=units[unitIdx++]; 184 } 185 } 186 } 187 } 188 setLength(length); 189 } 190 } 191 192 UnicodeString::UnicodeString(UChar ch) { 193 fUnion.fFields.fLengthAndFlags = kLength1 | kShortString; 194 fUnion.fStackFields.fBuffer[0] = ch; 195 } 196 197 UnicodeString::UnicodeString(UChar32 ch) { 198 fUnion.fFields.fLengthAndFlags = kShortString; 199 int32_t i = 0; 200 UBool isError = FALSE; 201 U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError); 202 // We test isError so that the compiler does not complain that we don't. 203 // If isError then i==0 which is what we want anyway. 204 if(!isError) { 205 setShortLength(i); 206 } 207 } 208 209 UnicodeString::UnicodeString(const UChar *text) { 210 fUnion.fFields.fLengthAndFlags = kShortString; 211 doReplace(0, 0, text, 0, -1); 212 } 213 214 UnicodeString::UnicodeString(const UChar *text, 215 int32_t textLength) { 216 fUnion.fFields.fLengthAndFlags = kShortString; 217 doReplace(0, 0, text, 0, textLength); 218 } 219 220 UnicodeString::UnicodeString(UBool isTerminated, 221 const UChar *text, 222 int32_t textLength) { 223 fUnion.fFields.fLengthAndFlags = kReadonlyAlias; 224 if(text == NULL) { 225 // treat as an empty string, do not alias 226 setToEmpty(); 227 } else if(textLength < -1 || 228 (textLength == -1 && !isTerminated) || 229 (textLength >= 0 && isTerminated && text[textLength] != 0) 230 ) { 231 setToBogus(); 232 } else { 233 if(textLength == -1) { 234 // text is terminated, or else it would have failed the above test 235 textLength = u_strlen(text); 236 } 237 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); 238 } 239 } 240 241 UnicodeString::UnicodeString(UChar *buff, 242 int32_t buffLength, 243 int32_t buffCapacity) { 244 fUnion.fFields.fLengthAndFlags = kWritableAlias; 245 if(buff == NULL) { 246 // treat as an empty string, do not alias 247 setToEmpty(); 248 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { 249 setToBogus(); 250 } else { 251 if(buffLength == -1) { 252 // fLength = u_strlen(buff); but do not look beyond buffCapacity 253 const UChar *p = buff, *limit = buff + buffCapacity; 254 while(p != limit && *p != 0) { 255 ++p; 256 } 257 buffLength = (int32_t)(p - buff); 258 } 259 setArray(buff, buffLength, buffCapacity); 260 } 261 } 262 263 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) { 264 fUnion.fFields.fLengthAndFlags = kShortString; 265 if(src==NULL) { 266 // treat as an empty string 267 } else { 268 if(length<0) { 269 length=(int32_t)uprv_strlen(src); 270 } 271 if(cloneArrayIfNeeded(length, length, FALSE)) { 272 u_charsToUChars(src, getArrayStart(), length); 273 setLength(length); 274 } else { 275 setToBogus(); 276 } 277 } 278 } 279 280 #if U_CHARSET_IS_UTF8 281 282 UnicodeString::UnicodeString(const char *codepageData) { 283 fUnion.fFields.fLengthAndFlags = kShortString; 284 if(codepageData != 0) { 285 setToUTF8(codepageData); 286 } 287 } 288 289 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) { 290 fUnion.fFields.fLengthAndFlags = kShortString; 291 // if there's nothing to convert, do nothing 292 if(codepageData == 0 || dataLength == 0 || dataLength < -1) { 293 return; 294 } 295 if(dataLength == -1) { 296 dataLength = (int32_t)uprv_strlen(codepageData); 297 } 298 setToUTF8(StringPiece(codepageData, dataLength)); 299 } 300 301 // else see unistr_cnv.cpp 302 #endif 303 304 UnicodeString::UnicodeString(const UnicodeString& that) { 305 fUnion.fFields.fLengthAndFlags = kShortString; 306 copyFrom(that); 307 } 308 309 UnicodeString::UnicodeString(const UnicodeString& that, 310 int32_t srcStart) { 311 fUnion.fFields.fLengthAndFlags = kShortString; 312 setTo(that, srcStart); 313 } 314 315 UnicodeString::UnicodeString(const UnicodeString& that, 316 int32_t srcStart, 317 int32_t srcLength) { 318 fUnion.fFields.fLengthAndFlags = kShortString; 319 setTo(that, srcStart, srcLength); 320 } 321 322 // Replaceable base class clone() default implementation, does not clone 323 Replaceable * 324 Replaceable::clone() const { 325 return NULL; 326 } 327 328 // UnicodeString overrides clone() with a real implementation 329 Replaceable * 330 UnicodeString::clone() const { 331 return new UnicodeString(*this); 332 } 333 334 //======================================== 335 // array allocation 336 //======================================== 337 338 UBool 339 UnicodeString::allocate(int32_t capacity) { 340 if(capacity <= US_STACKBUF_SIZE) { 341 fUnion.fFields.fLengthAndFlags = kShortString; 342 } else { 343 // count bytes for the refCounter and the string capacity, and 344 // round up to a multiple of 16; then divide by 4 and allocate int32_t's 345 // to be safely aligned for the refCount 346 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer() 347 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2); 348 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words ); 349 if(array != 0) { 350 // set initial refCount and point behind the refCount 351 *array++ = 1; 352 353 // have fArray point to the first UChar 354 fUnion.fFields.fArray = (UChar *)array; 355 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR)); 356 fUnion.fFields.fLengthAndFlags = kLongString; 357 } else { 358 fUnion.fFields.fLengthAndFlags = kIsBogus; 359 fUnion.fFields.fArray = 0; 360 fUnion.fFields.fCapacity = 0; 361 return FALSE; 362 } 363 } 364 return TRUE; 365 } 366 367 //======================================== 368 // Destructor 369 //======================================== 370 UnicodeString::~UnicodeString() 371 { 372 releaseArray(); 373 } 374 375 //======================================== 376 // Factory methods 377 //======================================== 378 379 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) { 380 UnicodeString result; 381 result.setToUTF8(utf8); 382 return result; 383 } 384 385 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) { 386 UnicodeString result; 387 int32_t capacity; 388 // Most UTF-32 strings will be BMP-only and result in a same-length 389 // UTF-16 string. We overestimate the capacity just slightly, 390 // just in case there are a few supplementary characters. 391 if(length <= US_STACKBUF_SIZE) { 392 capacity = US_STACKBUF_SIZE; 393 } else { 394 capacity = length + (length >> 4) + 4; 395 } 396 do { 397 UChar *utf16 = result.getBuffer(capacity); 398 int32_t length16; 399 UErrorCode errorCode = U_ZERO_ERROR; 400 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16, 401 utf32, length, 402 0xfffd, // Substitution character. 403 NULL, // Don't care about number of substitutions. 404 &errorCode); 405 result.releaseBuffer(length16); 406 if(errorCode == U_BUFFER_OVERFLOW_ERROR) { 407 capacity = length16 + 1; // +1 for the terminating NUL. 408 continue; 409 } else if(U_FAILURE(errorCode)) { 410 result.setToBogus(); 411 } 412 break; 413 } while(TRUE); 414 return result; 415 } 416 417 //======================================== 418 // Assignment 419 //======================================== 420 421 UnicodeString & 422 UnicodeString::operator=(const UnicodeString &src) { 423 return copyFrom(src); 424 } 425 426 UnicodeString & 427 UnicodeString::fastCopyFrom(const UnicodeString &src) { 428 return copyFrom(src, TRUE); 429 } 430 431 UnicodeString & 432 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) { 433 // if assigning to ourselves, do nothing 434 if(this == &src) { 435 return *this; 436 } 437 438 // is the right side bogus? 439 if(src.isBogus()) { 440 setToBogus(); 441 return *this; 442 } 443 444 // delete the current contents 445 releaseArray(); 446 447 if(src.isEmpty()) { 448 // empty string - use the stack buffer 449 setToEmpty(); 450 return *this; 451 } 452 453 // fLength>0 and not an "open" src.getBuffer(minCapacity) 454 fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags; 455 switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) { 456 case kShortString: 457 // short string using the stack buffer, do the same 458 uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer, 459 getShortLength() * U_SIZEOF_UCHAR); 460 break; 461 case kLongString: 462 // src uses a refCounted string buffer, use that buffer with refCount 463 // src is const, use a cast - we don't actually change it 464 ((UnicodeString &)src).addRef(); 465 // copy all fields, share the reference-counted buffer 466 fUnion.fFields.fArray = src.fUnion.fFields.fArray; 467 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; 468 if(!hasShortLength()) { 469 fUnion.fFields.fLength = src.fUnion.fFields.fLength; 470 } 471 break; 472 case kReadonlyAlias: 473 if(fastCopy) { 474 // src is a readonly alias, do the same 475 // -> maintain the readonly alias as such 476 fUnion.fFields.fArray = src.fUnion.fFields.fArray; 477 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity; 478 if(!hasShortLength()) { 479 fUnion.fFields.fLength = src.fUnion.fFields.fLength; 480 } 481 break; 482 } 483 // else if(!fastCopy) fall through to case kWritableAlias 484 // -> allocate a new buffer and copy the contents 485 case kWritableAlias: { 486 // src is a writable alias; we make a copy of that instead 487 int32_t srcLength = src.length(); 488 if(allocate(srcLength)) { 489 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR); 490 setLength(srcLength); 491 break; 492 } 493 // if there is not enough memory, then fall through to setting to bogus 494 } 495 default: 496 // if src is bogus, set ourselves to bogus 497 // do not call setToBogus() here because fArray and flags are not consistent here 498 fUnion.fFields.fLengthAndFlags = kIsBogus; 499 fUnion.fFields.fArray = 0; 500 fUnion.fFields.fCapacity = 0; 501 break; 502 } 503 504 return *this; 505 } 506 507 //======================================== 508 // Miscellaneous operations 509 //======================================== 510 511 UnicodeString UnicodeString::unescape() const { 512 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity 513 const UChar *array = getBuffer(); 514 int32_t len = length(); 515 int32_t prev = 0; 516 for (int32_t i=0;;) { 517 if (i == len) { 518 result.append(array, prev, len - prev); 519 break; 520 } 521 if (array[i++] == 0x5C /*'\\'*/) { 522 result.append(array, prev, (i - 1) - prev); 523 UChar32 c = unescapeAt(i); // advances i 524 if (c < 0) { 525 result.remove(); // return empty string 526 break; // invalid escape sequence 527 } 528 result.append(c); 529 prev = i; 530 } 531 } 532 return result; 533 } 534 535 UChar32 UnicodeString::unescapeAt(int32_t &offset) const { 536 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this); 537 } 538 539 //======================================== 540 // Read-only implementation 541 //======================================== 542 UBool 543 UnicodeString::doEquals(const UnicodeString &text, int32_t len) const { 544 // Requires: this & text not bogus and have same lengths. 545 // Byte-wise comparison works for equality regardless of endianness. 546 return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0; 547 } 548 549 int8_t 550 UnicodeString::doCompare( int32_t start, 551 int32_t length, 552 const UChar *srcChars, 553 int32_t srcStart, 554 int32_t srcLength) const 555 { 556 // compare illegal string values 557 if(isBogus()) { 558 return -1; 559 } 560 561 // pin indices to legal values 562 pinIndices(start, length); 563 564 if(srcChars == NULL) { 565 // treat const UChar *srcChars==NULL as an empty string 566 return length == 0 ? 0 : 1; 567 } 568 569 // get the correct pointer 570 const UChar *chars = getArrayStart(); 571 572 chars += start; 573 srcChars += srcStart; 574 575 int32_t minLength; 576 int8_t lengthResult; 577 578 // get the srcLength if necessary 579 if(srcLength < 0) { 580 srcLength = u_strlen(srcChars + srcStart); 581 } 582 583 // are we comparing different lengths? 584 if(length != srcLength) { 585 if(length < srcLength) { 586 minLength = length; 587 lengthResult = -1; 588 } else { 589 minLength = srcLength; 590 lengthResult = 1; 591 } 592 } else { 593 minLength = length; 594 lengthResult = 0; 595 } 596 597 /* 598 * note that uprv_memcmp() returns an int but we return an int8_t; 599 * we need to take care not to truncate the result - 600 * one way to do this is to right-shift the value to 601 * move the sign bit into the lower 8 bits and making sure that this 602 * does not become 0 itself 603 */ 604 605 if(minLength > 0 && chars != srcChars) { 606 int32_t result; 607 608 # if U_IS_BIG_ENDIAN 609 // big-endian: byte comparison works 610 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar)); 611 if(result != 0) { 612 return (int8_t)(result >> 15 | 1); 613 } 614 # else 615 // little-endian: compare UChar units 616 do { 617 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++)); 618 if(result != 0) { 619 return (int8_t)(result >> 15 | 1); 620 } 621 } while(--minLength > 0); 622 # endif 623 } 624 return lengthResult; 625 } 626 627 /* String compare in code point order - doCompare() compares in code unit order. */ 628 int8_t 629 UnicodeString::doCompareCodePointOrder(int32_t start, 630 int32_t length, 631 const UChar *srcChars, 632 int32_t srcStart, 633 int32_t srcLength) const 634 { 635 // compare illegal string values 636 // treat const UChar *srcChars==NULL as an empty string 637 if(isBogus()) { 638 return -1; 639 } 640 641 // pin indices to legal values 642 pinIndices(start, length); 643 644 if(srcChars == NULL) { 645 srcStart = srcLength = 0; 646 } 647 648 int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE); 649 /* translate the 32-bit result into an 8-bit one */ 650 if(diff!=0) { 651 return (int8_t)(diff >> 15 | 1); 652 } else { 653 return 0; 654 } 655 } 656 657 int32_t 658 UnicodeString::getLength() const { 659 return length(); 660 } 661 662 UChar 663 UnicodeString::getCharAt(int32_t offset) const { 664 return charAt(offset); 665 } 666 667 UChar32 668 UnicodeString::getChar32At(int32_t offset) const { 669 return char32At(offset); 670 } 671 672 UChar32 673 UnicodeString::char32At(int32_t offset) const 674 { 675 int32_t len = length(); 676 if((uint32_t)offset < (uint32_t)len) { 677 const UChar *array = getArrayStart(); 678 UChar32 c; 679 U16_GET(array, 0, offset, len, c); 680 return c; 681 } else { 682 return kInvalidUChar; 683 } 684 } 685 686 int32_t 687 UnicodeString::getChar32Start(int32_t offset) const { 688 if((uint32_t)offset < (uint32_t)length()) { 689 const UChar *array = getArrayStart(); 690 U16_SET_CP_START(array, 0, offset); 691 return offset; 692 } else { 693 return 0; 694 } 695 } 696 697 int32_t 698 UnicodeString::getChar32Limit(int32_t offset) const { 699 int32_t len = length(); 700 if((uint32_t)offset < (uint32_t)len) { 701 const UChar *array = getArrayStart(); 702 U16_SET_CP_LIMIT(array, 0, offset, len); 703 return offset; 704 } else { 705 return len; 706 } 707 } 708 709 int32_t 710 UnicodeString::countChar32(int32_t start, int32_t length) const { 711 pinIndices(start, length); 712 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL 713 return u_countChar32(getArrayStart()+start, length); 714 } 715 716 UBool 717 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const { 718 pinIndices(start, length); 719 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL 720 return u_strHasMoreChar32Than(getArrayStart()+start, length, number); 721 } 722 723 int32_t 724 UnicodeString::moveIndex32(int32_t index, int32_t delta) const { 725 // pin index 726 int32_t len = length(); 727 if(index<0) { 728 index=0; 729 } else if(index>len) { 730 index=len; 731 } 732 733 const UChar *array = getArrayStart(); 734 if(delta>0) { 735 U16_FWD_N(array, index, len, delta); 736 } else { 737 U16_BACK_N(array, 0, index, -delta); 738 } 739 740 return index; 741 } 742 743 void 744 UnicodeString::doExtract(int32_t start, 745 int32_t length, 746 UChar *dst, 747 int32_t dstStart) const 748 { 749 // pin indices to legal values 750 pinIndices(start, length); 751 752 // do not copy anything if we alias dst itself 753 const UChar *array = getArrayStart(); 754 if(array + start != dst + dstStart) { 755 us_arrayCopy(array, start, dst, dstStart, length); 756 } 757 } 758 759 int32_t 760 UnicodeString::extract(UChar *dest, int32_t destCapacity, 761 UErrorCode &errorCode) const { 762 int32_t len = length(); 763 if(U_SUCCESS(errorCode)) { 764 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) { 765 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 766 } else { 767 const UChar *array = getArrayStart(); 768 if(len>0 && len<=destCapacity && array!=dest) { 769 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR); 770 } 771 return u_terminateUChars(dest, destCapacity, len, &errorCode); 772 } 773 } 774 775 return len; 776 } 777 778 int32_t 779 UnicodeString::extract(int32_t start, 780 int32_t length, 781 char *target, 782 int32_t targetCapacity, 783 enum EInvariant) const 784 { 785 // if the arguments are illegal, then do nothing 786 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) { 787 return 0; 788 } 789 790 // pin the indices to legal values 791 pinIndices(start, length); 792 793 if(length <= targetCapacity) { 794 u_UCharsToChars(getArrayStart() + start, target, length); 795 } 796 UErrorCode status = U_ZERO_ERROR; 797 return u_terminateChars(target, targetCapacity, length, &status); 798 } 799 800 UnicodeString 801 UnicodeString::tempSubString(int32_t start, int32_t len) const { 802 pinIndices(start, len); 803 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer 804 if(array==NULL) { 805 array=fUnion.fStackFields.fBuffer; // anything not NULL because that would make an empty string 806 len=-2; // bogus result string 807 } 808 return UnicodeString(FALSE, array + start, len); 809 } 810 811 int32_t 812 UnicodeString::toUTF8(int32_t start, int32_t len, 813 char *target, int32_t capacity) const { 814 pinIndices(start, len); 815 int32_t length8; 816 UErrorCode errorCode = U_ZERO_ERROR; 817 u_strToUTF8WithSub(target, capacity, &length8, 818 getBuffer() + start, len, 819 0xFFFD, // Standard substitution character. 820 NULL, // Don't care about number of substitutions. 821 &errorCode); 822 return length8; 823 } 824 825 #if U_CHARSET_IS_UTF8 826 827 int32_t 828 UnicodeString::extract(int32_t start, int32_t len, 829 char *target, uint32_t dstSize) const { 830 // if the arguments are illegal, then do nothing 831 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) { 832 return 0; 833 } 834 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff); 835 } 836 837 // else see unistr_cnv.cpp 838 #endif 839 840 void 841 UnicodeString::extractBetween(int32_t start, 842 int32_t limit, 843 UnicodeString& target) const { 844 pinIndex(start); 845 pinIndex(limit); 846 doExtract(start, limit - start, target); 847 } 848 849 // When converting from UTF-16 to UTF-8, the result will have at most 3 times 850 // as many bytes as the source has UChars. 851 // The "worst cases" are writing systems like Indic, Thai and CJK with 852 // 3:1 bytes:UChars. 853 void 854 UnicodeString::toUTF8(ByteSink &sink) const { 855 int32_t length16 = length(); 856 if(length16 != 0) { 857 char stackBuffer[1024]; 858 int32_t capacity = (int32_t)sizeof(stackBuffer); 859 UBool utf8IsOwned = FALSE; 860 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity, 861 3*length16, 862 stackBuffer, capacity, 863 &capacity); 864 int32_t length8 = 0; 865 UErrorCode errorCode = U_ZERO_ERROR; 866 u_strToUTF8WithSub(utf8, capacity, &length8, 867 getBuffer(), length16, 868 0xFFFD, // Standard substitution character. 869 NULL, // Don't care about number of substitutions. 870 &errorCode); 871 if(errorCode == U_BUFFER_OVERFLOW_ERROR) { 872 utf8 = (char *)uprv_malloc(length8); 873 if(utf8 != NULL) { 874 utf8IsOwned = TRUE; 875 errorCode = U_ZERO_ERROR; 876 u_strToUTF8WithSub(utf8, length8, &length8, 877 getBuffer(), length16, 878 0xFFFD, // Standard substitution character. 879 NULL, // Don't care about number of substitutions. 880 &errorCode); 881 } else { 882 errorCode = U_MEMORY_ALLOCATION_ERROR; 883 } 884 } 885 if(U_SUCCESS(errorCode)) { 886 sink.Append(utf8, length8); 887 sink.Flush(); 888 } 889 if(utf8IsOwned) { 890 uprv_free(utf8); 891 } 892 } 893 } 894 895 int32_t 896 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const { 897 int32_t length32=0; 898 if(U_SUCCESS(errorCode)) { 899 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments. 900 u_strToUTF32WithSub(utf32, capacity, &length32, 901 getBuffer(), length(), 902 0xfffd, // Substitution character. 903 NULL, // Don't care about number of substitutions. 904 &errorCode); 905 } 906 return length32; 907 } 908 909 int32_t 910 UnicodeString::indexOf(const UChar *srcChars, 911 int32_t srcStart, 912 int32_t srcLength, 913 int32_t start, 914 int32_t length) const 915 { 916 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { 917 return -1; 918 } 919 920 // UnicodeString does not find empty substrings 921 if(srcLength < 0 && srcChars[srcStart] == 0) { 922 return -1; 923 } 924 925 // get the indices within bounds 926 pinIndices(start, length); 927 928 // find the first occurrence of the substring 929 const UChar *array = getArrayStart(); 930 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength); 931 if(match == NULL) { 932 return -1; 933 } else { 934 return (int32_t)(match - array); 935 } 936 } 937 938 int32_t 939 UnicodeString::doIndexOf(UChar c, 940 int32_t start, 941 int32_t length) const 942 { 943 // pin indices 944 pinIndices(start, length); 945 946 // find the first occurrence of c 947 const UChar *array = getArrayStart(); 948 const UChar *match = u_memchr(array + start, c, length); 949 if(match == NULL) { 950 return -1; 951 } else { 952 return (int32_t)(match - array); 953 } 954 } 955 956 int32_t 957 UnicodeString::doIndexOf(UChar32 c, 958 int32_t start, 959 int32_t length) const { 960 // pin indices 961 pinIndices(start, length); 962 963 // find the first occurrence of c 964 const UChar *array = getArrayStart(); 965 const UChar *match = u_memchr32(array + start, c, length); 966 if(match == NULL) { 967 return -1; 968 } else { 969 return (int32_t)(match - array); 970 } 971 } 972 973 int32_t 974 UnicodeString::lastIndexOf(const UChar *srcChars, 975 int32_t srcStart, 976 int32_t srcLength, 977 int32_t start, 978 int32_t length) const 979 { 980 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) { 981 return -1; 982 } 983 984 // UnicodeString does not find empty substrings 985 if(srcLength < 0 && srcChars[srcStart] == 0) { 986 return -1; 987 } 988 989 // get the indices within bounds 990 pinIndices(start, length); 991 992 // find the last occurrence of the substring 993 const UChar *array = getArrayStart(); 994 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength); 995 if(match == NULL) { 996 return -1; 997 } else { 998 return (int32_t)(match - array); 999 } 1000 } 1001 1002 int32_t 1003 UnicodeString::doLastIndexOf(UChar c, 1004 int32_t start, 1005 int32_t length) const 1006 { 1007 if(isBogus()) { 1008 return -1; 1009 } 1010 1011 // pin indices 1012 pinIndices(start, length); 1013 1014 // find the last occurrence of c 1015 const UChar *array = getArrayStart(); 1016 const UChar *match = u_memrchr(array + start, c, length); 1017 if(match == NULL) { 1018 return -1; 1019 } else { 1020 return (int32_t)(match - array); 1021 } 1022 } 1023 1024 int32_t 1025 UnicodeString::doLastIndexOf(UChar32 c, 1026 int32_t start, 1027 int32_t length) const { 1028 // pin indices 1029 pinIndices(start, length); 1030 1031 // find the last occurrence of c 1032 const UChar *array = getArrayStart(); 1033 const UChar *match = u_memrchr32(array + start, c, length); 1034 if(match == NULL) { 1035 return -1; 1036 } else { 1037 return (int32_t)(match - array); 1038 } 1039 } 1040 1041 //======================================== 1042 // Write implementation 1043 //======================================== 1044 1045 UnicodeString& 1046 UnicodeString::findAndReplace(int32_t start, 1047 int32_t length, 1048 const UnicodeString& oldText, 1049 int32_t oldStart, 1050 int32_t oldLength, 1051 const UnicodeString& newText, 1052 int32_t newStart, 1053 int32_t newLength) 1054 { 1055 if(isBogus() || oldText.isBogus() || newText.isBogus()) { 1056 return *this; 1057 } 1058 1059 pinIndices(start, length); 1060 oldText.pinIndices(oldStart, oldLength); 1061 newText.pinIndices(newStart, newLength); 1062 1063 if(oldLength == 0) { 1064 return *this; 1065 } 1066 1067 while(length > 0 && length >= oldLength) { 1068 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length); 1069 if(pos < 0) { 1070 // no more oldText's here: done 1071 break; 1072 } else { 1073 // we found oldText, replace it by newText and go beyond it 1074 replace(pos, oldLength, newText, newStart, newLength); 1075 length -= pos + oldLength - start; 1076 start = pos + newLength; 1077 } 1078 } 1079 1080 return *this; 1081 } 1082 1083 1084 void 1085 UnicodeString::setToBogus() 1086 { 1087 releaseArray(); 1088 1089 fUnion.fFields.fLengthAndFlags = kIsBogus; 1090 fUnion.fFields.fArray = 0; 1091 fUnion.fFields.fCapacity = 0; 1092 } 1093 1094 // turn a bogus string into an empty one 1095 void 1096 UnicodeString::unBogus() { 1097 if(fUnion.fFields.fLengthAndFlags & kIsBogus) { 1098 setToEmpty(); 1099 } 1100 } 1101 1102 const UChar * 1103 UnicodeString::getTerminatedBuffer() { 1104 if(!isWritable()) { 1105 return 0; 1106 } 1107 UChar *array = getArrayStart(); 1108 int32_t len = length(); 1109 if(len < getCapacity()) { 1110 if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) { 1111 // If len<capacity on a read-only alias, then array[len] is 1112 // either the original NUL (if constructed with (TRUE, s, length)) 1113 // or one of the original string contents characters (if later truncated), 1114 // therefore we can assume that array[len] is initialized memory. 1115 if(array[len] == 0) { 1116 return array; 1117 } 1118 } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) { 1119 // kRefCounted: Do not write the NUL if the buffer is shared. 1120 // That is mostly safe, except when the length of one copy was modified 1121 // without copy-on-write, e.g., via truncate(newLength) or remove(void). 1122 // Then the NUL would be written into the middle of another copy's string. 1123 1124 // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL. 1125 // Do not test if there is a NUL already because it might be uninitialized memory. 1126 // (That would be safe, but tools like valgrind & Purify would complain.) 1127 array[len] = 0; 1128 return array; 1129 } 1130 } 1131 if(cloneArrayIfNeeded(len+1)) { 1132 array = getArrayStart(); 1133 array[len] = 0; 1134 return array; 1135 } else { 1136 return NULL; 1137 } 1138 } 1139 1140 // setTo() analogous to the readonly-aliasing constructor with the same signature 1141 UnicodeString & 1142 UnicodeString::setTo(UBool isTerminated, 1143 const UChar *text, 1144 int32_t textLength) 1145 { 1146 if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) { 1147 // do not modify a string that has an "open" getBuffer(minCapacity) 1148 return *this; 1149 } 1150 1151 if(text == NULL) { 1152 // treat as an empty string, do not alias 1153 releaseArray(); 1154 setToEmpty(); 1155 return *this; 1156 } 1157 1158 if( textLength < -1 || 1159 (textLength == -1 && !isTerminated) || 1160 (textLength >= 0 && isTerminated && text[textLength] != 0) 1161 ) { 1162 setToBogus(); 1163 return *this; 1164 } 1165 1166 releaseArray(); 1167 1168 if(textLength == -1) { 1169 // text is terminated, or else it would have failed the above test 1170 textLength = u_strlen(text); 1171 } 1172 fUnion.fFields.fLengthAndFlags = kReadonlyAlias; 1173 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength); 1174 return *this; 1175 } 1176 1177 // setTo() analogous to the writable-aliasing constructor with the same signature 1178 UnicodeString & 1179 UnicodeString::setTo(UChar *buffer, 1180 int32_t buffLength, 1181 int32_t buffCapacity) { 1182 if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) { 1183 // do not modify a string that has an "open" getBuffer(minCapacity) 1184 return *this; 1185 } 1186 1187 if(buffer == NULL) { 1188 // treat as an empty string, do not alias 1189 releaseArray(); 1190 setToEmpty(); 1191 return *this; 1192 } 1193 1194 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) { 1195 setToBogus(); 1196 return *this; 1197 } else if(buffLength == -1) { 1198 // buffLength = u_strlen(buff); but do not look beyond buffCapacity 1199 const UChar *p = buffer, *limit = buffer + buffCapacity; 1200 while(p != limit && *p != 0) { 1201 ++p; 1202 } 1203 buffLength = (int32_t)(p - buffer); 1204 } 1205 1206 releaseArray(); 1207 1208 fUnion.fFields.fLengthAndFlags = kWritableAlias; 1209 setArray(buffer, buffLength, buffCapacity); 1210 return *this; 1211 } 1212 1213 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) { 1214 unBogus(); 1215 int32_t length = utf8.length(); 1216 int32_t capacity; 1217 // The UTF-16 string will be at most as long as the UTF-8 string. 1218 if(length <= US_STACKBUF_SIZE) { 1219 capacity = US_STACKBUF_SIZE; 1220 } else { 1221 capacity = length + 1; // +1 for the terminating NUL. 1222 } 1223 UChar *utf16 = getBuffer(capacity); 1224 int32_t length16; 1225 UErrorCode errorCode = U_ZERO_ERROR; 1226 u_strFromUTF8WithSub(utf16, getCapacity(), &length16, 1227 utf8.data(), length, 1228 0xfffd, // Substitution character. 1229 NULL, // Don't care about number of substitutions. 1230 &errorCode); 1231 releaseBuffer(length16); 1232 if(U_FAILURE(errorCode)) { 1233 setToBogus(); 1234 } 1235 return *this; 1236 } 1237 1238 UnicodeString& 1239 UnicodeString::setCharAt(int32_t offset, 1240 UChar c) 1241 { 1242 int32_t len = length(); 1243 if(cloneArrayIfNeeded() && len > 0) { 1244 if(offset < 0) { 1245 offset = 0; 1246 } else if(offset >= len) { 1247 offset = len - 1; 1248 } 1249 1250 getArrayStart()[offset] = c; 1251 } 1252 return *this; 1253 } 1254 1255 UnicodeString& 1256 UnicodeString::replace(int32_t start, 1257 int32_t _length, 1258 UChar32 srcChar) { 1259 UChar buffer[U16_MAX_LENGTH]; 1260 int32_t count = 0; 1261 UBool isError = FALSE; 1262 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError); 1263 // We test isError so that the compiler does not complain that we don't. 1264 // If isError (srcChar is not a valid code point) then count==0 which means 1265 // we remove the source segment rather than replacing it with srcChar. 1266 return doReplace(start, _length, buffer, 0, isError ? 0 : count); 1267 } 1268 1269 UnicodeString& 1270 UnicodeString::append(UChar32 srcChar) { 1271 UChar buffer[U16_MAX_LENGTH]; 1272 int32_t _length = 0; 1273 UBool isError = FALSE; 1274 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError); 1275 // We test isError so that the compiler does not complain that we don't. 1276 // If isError then _length==0 which turns the doReplace() into a no-op anyway. 1277 return isError ? *this : doReplace(length(), 0, buffer, 0, _length); 1278 } 1279 1280 UnicodeString& 1281 UnicodeString::doReplace( int32_t start, 1282 int32_t length, 1283 const UnicodeString& src, 1284 int32_t srcStart, 1285 int32_t srcLength) 1286 { 1287 if(!src.isBogus()) { 1288 // pin the indices to legal values 1289 src.pinIndices(srcStart, srcLength); 1290 1291 // get the characters from src 1292 // and replace the range in ourselves with them 1293 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength); 1294 } else { 1295 // remove the range 1296 return doReplace(start, length, 0, 0, 0); 1297 } 1298 } 1299 1300 UnicodeString& 1301 UnicodeString::doReplace(int32_t start, 1302 int32_t length, 1303 const UChar *srcChars, 1304 int32_t srcStart, 1305 int32_t srcLength) 1306 { 1307 if(!isWritable()) { 1308 return *this; 1309 } 1310 1311 int32_t oldLength = this->length(); 1312 1313 // optimize (read-only alias).remove(0, start) and .remove(start, end) 1314 if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) { 1315 if(start == 0) { 1316 // remove prefix by adjusting the array pointer 1317 pinIndex(length); 1318 fUnion.fFields.fArray += length; 1319 fUnion.fFields.fCapacity -= length; 1320 setLength(oldLength - length); 1321 return *this; 1322 } else { 1323 pinIndex(start); 1324 if(length >= (oldLength - start)) { 1325 // remove suffix by reducing the length (like truncate()) 1326 setLength(start); 1327 fUnion.fFields.fCapacity = start; // not NUL-terminated any more 1328 return *this; 1329 } 1330 } 1331 } 1332 1333 if(srcChars == 0) { 1334 srcStart = srcLength = 0; 1335 } else if(srcLength < 0) { 1336 // get the srcLength if necessary 1337 srcLength = u_strlen(srcChars + srcStart); 1338 } 1339 1340 // calculate the size of the string after the replace 1341 int32_t newLength; 1342 1343 // optimize append() onto a large-enough, owned string 1344 if(start >= oldLength) { 1345 if(srcLength == 0) { 1346 return *this; 1347 } 1348 newLength = oldLength + srcLength; 1349 if(newLength <= getCapacity() && isBufferWritable()) { 1350 UChar *oldArray = getArrayStart(); 1351 // Do not copy characters when 1352 // UChar *buffer=str.getAppendBuffer(...); 1353 // is followed by 1354 // str.append(buffer, length); 1355 // or 1356 // str.appendString(buffer, length) 1357 // or similar. 1358 if(srcChars + srcStart != oldArray + start || start > oldLength) { 1359 us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength); 1360 } 1361 setLength(newLength); 1362 return *this; 1363 } else { 1364 // pin the indices to legal values 1365 start = oldLength; 1366 length = 0; 1367 } 1368 } else { 1369 // pin the indices to legal values 1370 pinIndices(start, length); 1371 1372 newLength = oldLength - length + srcLength; 1373 } 1374 1375 // the following may change fArray but will not copy the current contents; 1376 // therefore we need to keep the current fArray 1377 UChar oldStackBuffer[US_STACKBUF_SIZE]; 1378 UChar *oldArray; 1379 if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) { 1380 // copy the stack buffer contents because it will be overwritten with 1381 // fUnion.fFields values 1382 u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength); 1383 oldArray = oldStackBuffer; 1384 } else { 1385 oldArray = getArrayStart(); 1386 } 1387 1388 // clone our array and allocate a bigger array if needed 1389 int32_t *bufferToDelete = 0; 1390 if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize, 1391 FALSE, &bufferToDelete) 1392 ) { 1393 return *this; 1394 } 1395 1396 // now do the replace 1397 1398 UChar *newArray = getArrayStart(); 1399 if(newArray != oldArray) { 1400 // if fArray changed, then we need to copy everything except what will change 1401 us_arrayCopy(oldArray, 0, newArray, 0, start); 1402 us_arrayCopy(oldArray, start + length, 1403 newArray, start + srcLength, 1404 oldLength - (start + length)); 1405 } else if(length != srcLength) { 1406 // fArray did not change; copy only the portion that isn't changing, leaving a hole 1407 us_arrayCopy(oldArray, start + length, 1408 newArray, start + srcLength, 1409 oldLength - (start + length)); 1410 } 1411 1412 // now fill in the hole with the new string 1413 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength); 1414 1415 setLength(newLength); 1416 1417 // delayed delete in case srcChars == fArray when we started, and 1418 // to keep oldArray alive for the above operations 1419 if (bufferToDelete) { 1420 uprv_free(bufferToDelete); 1421 } 1422 1423 return *this; 1424 } 1425 1426 /** 1427 * Replaceable API 1428 */ 1429 void 1430 UnicodeString::handleReplaceBetween(int32_t start, 1431 int32_t limit, 1432 const UnicodeString& text) { 1433 replaceBetween(start, limit, text); 1434 } 1435 1436 /** 1437 * Replaceable API 1438 */ 1439 void 1440 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) { 1441 if (limit <= start) { 1442 return; // Nothing to do; avoid bogus malloc call 1443 } 1444 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) ); 1445 // Check to make sure text is not null. 1446 if (text != NULL) { 1447 extractBetween(start, limit, text, 0); 1448 insert(dest, text, 0, limit - start); 1449 uprv_free(text); 1450 } 1451 } 1452 1453 /** 1454 * Replaceable API 1455 * 1456 * NOTE: This is for the Replaceable class. There is no rep.cpp, 1457 * so we implement this function here. 1458 */ 1459 UBool Replaceable::hasMetaData() const { 1460 return TRUE; 1461 } 1462 1463 /** 1464 * Replaceable API 1465 */ 1466 UBool UnicodeString::hasMetaData() const { 1467 return FALSE; 1468 } 1469 1470 UnicodeString& 1471 UnicodeString::doReverse(int32_t start, int32_t length) { 1472 if(length <= 1 || !cloneArrayIfNeeded()) { 1473 return *this; 1474 } 1475 1476 // pin the indices to legal values 1477 pinIndices(start, length); 1478 if(length <= 1) { // pinIndices() might have shrunk the length 1479 return *this; 1480 } 1481 1482 UChar *left = getArrayStart() + start; 1483 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2) 1484 UChar swap; 1485 UBool hasSupplementary = FALSE; 1486 1487 // Before the loop we know left<right because length>=2. 1488 do { 1489 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left); 1490 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right); 1491 *right-- = swap; 1492 } while(left < right); 1493 // Make sure to test the middle code unit of an odd-length string. 1494 // Redundant if the length is even. 1495 hasSupplementary |= (UBool)U16_IS_LEAD(*left); 1496 1497 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */ 1498 if(hasSupplementary) { 1499 UChar swap2; 1500 1501 left = getArrayStart() + start; 1502 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right 1503 while(left < right) { 1504 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) { 1505 *left++ = swap2; 1506 *left++ = swap; 1507 } else { 1508 ++left; 1509 } 1510 } 1511 } 1512 1513 return *this; 1514 } 1515 1516 UBool 1517 UnicodeString::padLeading(int32_t targetLength, 1518 UChar padChar) 1519 { 1520 int32_t oldLength = length(); 1521 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { 1522 return FALSE; 1523 } else { 1524 // move contents up by padding width 1525 UChar *array = getArrayStart(); 1526 int32_t start = targetLength - oldLength; 1527 us_arrayCopy(array, 0, array, start, oldLength); 1528 1529 // fill in padding character 1530 while(--start >= 0) { 1531 array[start] = padChar; 1532 } 1533 setLength(targetLength); 1534 return TRUE; 1535 } 1536 } 1537 1538 UBool 1539 UnicodeString::padTrailing(int32_t targetLength, 1540 UChar padChar) 1541 { 1542 int32_t oldLength = length(); 1543 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) { 1544 return FALSE; 1545 } else { 1546 // fill in padding character 1547 UChar *array = getArrayStart(); 1548 int32_t length = targetLength; 1549 while(--length >= oldLength) { 1550 array[length] = padChar; 1551 } 1552 setLength(targetLength); 1553 return TRUE; 1554 } 1555 } 1556 1557 //======================================== 1558 // Hashing 1559 //======================================== 1560 int32_t 1561 UnicodeString::doHashCode() const 1562 { 1563 /* Delegate hash computation to uhash. This makes UnicodeString 1564 * hashing consistent with UChar* hashing. */ 1565 int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length()); 1566 if (hashCode == kInvalidHashCode) { 1567 hashCode = kEmptyHashCode; 1568 } 1569 return hashCode; 1570 } 1571 1572 //======================================== 1573 // External Buffer 1574 //======================================== 1575 1576 UChar * 1577 UnicodeString::getBuffer(int32_t minCapacity) { 1578 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) { 1579 fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer; 1580 setZeroLength(); 1581 return getArrayStart(); 1582 } else { 1583 return 0; 1584 } 1585 } 1586 1587 void 1588 UnicodeString::releaseBuffer(int32_t newLength) { 1589 if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) { 1590 // set the new fLength 1591 int32_t capacity=getCapacity(); 1592 if(newLength==-1) { 1593 // the new length is the string length, capped by fCapacity 1594 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity; 1595 while(p<limit && *p!=0) { 1596 ++p; 1597 } 1598 newLength=(int32_t)(p-array); 1599 } else if(newLength>capacity) { 1600 newLength=capacity; 1601 } 1602 setLength(newLength); 1603 fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer; 1604 } 1605 } 1606 1607 //======================================== 1608 // Miscellaneous 1609 //======================================== 1610 UBool 1611 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, 1612 int32_t growCapacity, 1613 UBool doCopyArray, 1614 int32_t **pBufferToDelete, 1615 UBool forceClone) { 1616 // default parameters need to be static, therefore 1617 // the defaults are -1 to have convenience defaults 1618 if(newCapacity == -1) { 1619 newCapacity = getCapacity(); 1620 } 1621 1622 // while a getBuffer(minCapacity) is "open", 1623 // prevent any modifications of the string by returning FALSE here 1624 // if the string is bogus, then only an assignment or similar can revive it 1625 if(!isWritable()) { 1626 return FALSE; 1627 } 1628 1629 /* 1630 * We need to make a copy of the array if 1631 * the buffer is read-only, or 1632 * the buffer is refCounted (shared), and refCount>1, or 1633 * the buffer is too small. 1634 * Return FALSE if memory could not be allocated. 1635 */ 1636 if(forceClone || 1637 fUnion.fFields.fLengthAndFlags & kBufferIsReadonly || 1638 (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) || 1639 newCapacity > getCapacity() 1640 ) { 1641 // check growCapacity for default value and use of the stack buffer 1642 if(growCapacity < 0) { 1643 growCapacity = newCapacity; 1644 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) { 1645 growCapacity = US_STACKBUF_SIZE; 1646 } 1647 1648 // save old values 1649 UChar oldStackBuffer[US_STACKBUF_SIZE]; 1650 UChar *oldArray; 1651 int32_t oldLength = length(); 1652 int16_t flags = fUnion.fFields.fLengthAndFlags; 1653 1654 if(flags&kUsingStackBuffer) { 1655 U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */ 1656 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) { 1657 // copy the stack buffer contents because it will be overwritten with 1658 // fUnion.fFields values 1659 us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength); 1660 oldArray = oldStackBuffer; 1661 } else { 1662 oldArray = NULL; // no need to copy from the stack buffer to itself 1663 } 1664 } else { 1665 oldArray = fUnion.fFields.fArray; 1666 U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */ 1667 } 1668 1669 // allocate a new array 1670 if(allocate(growCapacity) || 1671 (newCapacity < growCapacity && allocate(newCapacity)) 1672 ) { 1673 if(doCopyArray) { 1674 // copy the contents 1675 // do not copy more than what fits - it may be smaller than before 1676 int32_t minLength = oldLength; 1677 newCapacity = getCapacity(); 1678 if(newCapacity < minLength) { 1679 minLength = newCapacity; 1680 } 1681 if(oldArray != NULL) { 1682 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength); 1683 } 1684 setLength(minLength); 1685 } else { 1686 setZeroLength(); 1687 } 1688 1689 // release the old array 1690 if(flags & kRefCounted) { 1691 // the array is refCounted; decrement and release if 0 1692 u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1); 1693 if(umtx_atomic_dec(pRefCount) == 0) { 1694 if(pBufferToDelete == 0) { 1695 // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t 1696 // is defined as volatile. (Volatile has useful non-standard behavior 1697 // with this compiler.) 1698 uprv_free((void *)pRefCount); 1699 } else { 1700 // the caller requested to delete it himself 1701 *pBufferToDelete = (int32_t *)pRefCount; 1702 } 1703 } 1704 } 1705 } else { 1706 // not enough memory for growCapacity and not even for the smaller newCapacity 1707 // reset the old values for setToBogus() to release the array 1708 if(!(flags&kUsingStackBuffer)) { 1709 fUnion.fFields.fArray = oldArray; 1710 } 1711 fUnion.fFields.fLengthAndFlags = flags; 1712 setToBogus(); 1713 return FALSE; 1714 } 1715 } 1716 return TRUE; 1717 } 1718 1719 // UnicodeStringAppendable ------------------------------------------------- *** 1720 1721 UnicodeStringAppendable::~UnicodeStringAppendable() {} 1722 1723 UBool 1724 UnicodeStringAppendable::appendCodeUnit(UChar c) { 1725 return str.doReplace(str.length(), 0, &c, 0, 1).isWritable(); 1726 } 1727 1728 UBool 1729 UnicodeStringAppendable::appendCodePoint(UChar32 c) { 1730 UChar buffer[U16_MAX_LENGTH]; 1731 int32_t cLength = 0; 1732 UBool isError = FALSE; 1733 U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError); 1734 return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable(); 1735 } 1736 1737 UBool 1738 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) { 1739 return str.doReplace(str.length(), 0, s, 0, length).isWritable(); 1740 } 1741 1742 UBool 1743 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) { 1744 return str.cloneArrayIfNeeded(str.length() + appendCapacity); 1745 } 1746 1747 UChar * 1748 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity, 1749 int32_t desiredCapacityHint, 1750 UChar *scratch, int32_t scratchCapacity, 1751 int32_t *resultCapacity) { 1752 if(minCapacity < 1 || scratchCapacity < minCapacity) { 1753 *resultCapacity = 0; 1754 return NULL; 1755 } 1756 int32_t oldLength = str.length(); 1757 if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) { 1758 *resultCapacity = str.getCapacity() - oldLength; 1759 return str.getArrayStart() + oldLength; 1760 } 1761 *resultCapacity = scratchCapacity; 1762 return scratch; 1763 } 1764 1765 U_NAMESPACE_END 1766 1767 U_NAMESPACE_USE 1768 1769 U_CAPI int32_t U_EXPORT2 1770 uhash_hashUnicodeString(const UElement key) { 1771 const UnicodeString *str = (const UnicodeString*) key.pointer; 1772 return (str == NULL) ? 0 : str->hashCode(); 1773 } 1774 1775 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString* 1776 // does not depend on hashtable code. 1777 U_CAPI UBool U_EXPORT2 1778 uhash_compareUnicodeString(const UElement key1, const UElement key2) { 1779 const UnicodeString *str1 = (const UnicodeString*) key1.pointer; 1780 const UnicodeString *str2 = (const UnicodeString*) key2.pointer; 1781 if (str1 == str2) { 1782 return TRUE; 1783 } 1784 if (str1 == NULL || str2 == NULL) { 1785 return FALSE; 1786 } 1787 return *str1 == *str2; 1788 } 1789 1790 #ifdef U_STATIC_IMPLEMENTATION 1791 /* 1792 This should never be called. It is defined here to make sure that the 1793 virtual vector deleting destructor is defined within unistr.cpp. 1794 The vector deleting destructor is already a part of UObject, 1795 but defining it here makes sure that it is included with this object file. 1796 This makes sure that static library dependencies are kept to a minimum. 1797 */ 1798 static void uprv_UnicodeStringDummy(void) { 1799 delete [] (new UnicodeString[2]); 1800 } 1801 #endif 1802