1 /* 2 * (C) 1999 Lars Knoll (knoll (at) kde.org) 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights reserved. 4 * Copyright (C) 2007-2009 Torch Mobile, Inc. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 */ 21 22 #include "config.h" 23 #include "WTFString.h" 24 25 #include <stdarg.h> 26 #include <wtf/ASCIICType.h> 27 #include <wtf/text/CString.h> 28 #include <wtf/StringExtras.h> 29 #include <wtf/Vector.h> 30 #include <wtf/dtoa.h> 31 #include <wtf/unicode/UTF8.h> 32 #include <wtf/unicode/Unicode.h> 33 34 using namespace std; 35 36 namespace WTF { 37 38 using namespace Unicode; 39 using namespace std; 40 41 // Construct a string with UTF-16 data. 42 String::String(const UChar* characters, unsigned length) 43 : m_impl(characters ? StringImpl::create(characters, length) : 0) 44 { 45 } 46 47 // Construct a string with UTF-16 data, from a null-terminated source. 48 String::String(const UChar* str) 49 { 50 if (!str) 51 return; 52 53 size_t len = 0; 54 while (str[len] != UChar(0)) 55 len++; 56 57 if (len > numeric_limits<unsigned>::max()) 58 CRASH(); 59 60 m_impl = StringImpl::create(str, len); 61 } 62 63 // Construct a string with latin1 data. 64 String::String(const char* characters, unsigned length) 65 : m_impl(characters ? StringImpl::create(characters, length) : 0) 66 { 67 } 68 69 // Construct a string with latin1 data, from a null-terminated source. 70 String::String(const char* characters) 71 : m_impl(characters ? StringImpl::create(characters) : 0) 72 { 73 } 74 75 void String::append(const String& str) 76 { 77 if (str.isEmpty()) 78 return; 79 80 // FIXME: This is extremely inefficient. So much so that we might want to take this 81 // out of String's API. We can make it better by optimizing the case where exactly 82 // one String is pointing at this StringImpl, but even then it's going to require a 83 // call to fastMalloc every single time. 84 if (str.m_impl) { 85 if (m_impl) { 86 UChar* data; 87 if (str.length() > numeric_limits<unsigned>::max() - m_impl->length()) 88 CRASH(); 89 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data); 90 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); 91 memcpy(data + m_impl->length(), str.characters(), str.length() * sizeof(UChar)); 92 m_impl = newImpl.release(); 93 } else 94 m_impl = str.m_impl; 95 } 96 } 97 98 void String::append(char c) 99 { 100 // FIXME: This is extremely inefficient. So much so that we might want to take this 101 // out of String's API. We can make it better by optimizing the case where exactly 102 // one String is pointing at this StringImpl, but even then it's going to require a 103 // call to fastMalloc every single time. 104 if (m_impl) { 105 UChar* data; 106 if (m_impl->length() >= numeric_limits<unsigned>::max()) 107 CRASH(); 108 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); 109 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); 110 data[m_impl->length()] = c; 111 m_impl = newImpl.release(); 112 } else 113 m_impl = StringImpl::create(&c, 1); 114 } 115 116 void String::append(UChar c) 117 { 118 // FIXME: This is extremely inefficient. So much so that we might want to take this 119 // out of String's API. We can make it better by optimizing the case where exactly 120 // one String is pointing at this StringImpl, but even then it's going to require a 121 // call to fastMalloc every single time. 122 if (m_impl) { 123 UChar* data; 124 if (m_impl->length() >= numeric_limits<unsigned>::max()) 125 CRASH(); 126 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data); 127 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar)); 128 data[m_impl->length()] = c; 129 m_impl = newImpl.release(); 130 } else 131 m_impl = StringImpl::create(&c, 1); 132 } 133 134 String operator+(const String& a, const String& b) 135 { 136 if (a.isEmpty()) 137 return b; 138 if (b.isEmpty()) 139 return a; 140 String c = a; 141 c += b; 142 return c; 143 } 144 145 String operator+(const String& s, const char* cs) 146 { 147 return s + String(cs); 148 } 149 150 String operator+(const char* cs, const String& s) 151 { 152 return String(cs) + s; 153 } 154 155 int codePointCompare(const String& a, const String& b) 156 { 157 return codePointCompare(a.impl(), b.impl()); 158 } 159 160 void String::insert(const String& str, unsigned pos) 161 { 162 if (str.isEmpty()) { 163 if (str.isNull()) 164 return; 165 if (isNull()) 166 m_impl = str.impl(); 167 return; 168 } 169 insert(str.characters(), str.length(), pos); 170 } 171 172 void String::append(const UChar* charactersToAppend, unsigned lengthToAppend) 173 { 174 if (!m_impl) { 175 if (!charactersToAppend) 176 return; 177 m_impl = StringImpl::create(charactersToAppend, lengthToAppend); 178 return; 179 } 180 181 if (!lengthToAppend) 182 return; 183 184 ASSERT(charactersToAppend); 185 UChar* data; 186 if (lengthToAppend > numeric_limits<unsigned>::max() - length()) 187 CRASH(); 188 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data); 189 memcpy(data, characters(), length() * sizeof(UChar)); 190 memcpy(data + length(), charactersToAppend, lengthToAppend * sizeof(UChar)); 191 m_impl = newImpl.release(); 192 } 193 194 void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position) 195 { 196 if (position >= length()) { 197 append(charactersToInsert, lengthToInsert); 198 return; 199 } 200 201 ASSERT(m_impl); 202 203 if (!lengthToInsert) 204 return; 205 206 ASSERT(charactersToInsert); 207 UChar* data; 208 if (lengthToInsert > numeric_limits<unsigned>::max() - length()) 209 CRASH(); 210 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToInsert, data); 211 memcpy(data, characters(), position * sizeof(UChar)); 212 memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar)); 213 memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar)); 214 m_impl = newImpl.release(); 215 } 216 217 UChar32 String::characterStartingAt(unsigned i) const 218 { 219 if (!m_impl || i >= m_impl->length()) 220 return 0; 221 return m_impl->characterStartingAt(i); 222 } 223 224 void String::truncate(unsigned position) 225 { 226 if (position >= length()) 227 return; 228 UChar* data; 229 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data); 230 memcpy(data, characters(), position * sizeof(UChar)); 231 m_impl = newImpl.release(); 232 } 233 234 void String::remove(unsigned position, int lengthToRemove) 235 { 236 if (lengthToRemove <= 0) 237 return; 238 if (position >= length()) 239 return; 240 if (static_cast<unsigned>(lengthToRemove) > length() - position) 241 lengthToRemove = length() - position; 242 UChar* data; 243 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data); 244 memcpy(data, characters(), position * sizeof(UChar)); 245 memcpy(data + position, characters() + position + lengthToRemove, 246 (length() - lengthToRemove - position) * sizeof(UChar)); 247 m_impl = newImpl.release(); 248 } 249 250 String String::substring(unsigned pos, unsigned len) const 251 { 252 if (!m_impl) 253 return String(); 254 return m_impl->substring(pos, len); 255 } 256 257 String String::substringSharingImpl(unsigned offset, unsigned length) const 258 { 259 // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar). 260 261 unsigned stringLength = this->length(); 262 offset = min(offset, stringLength); 263 length = min(length, stringLength - offset); 264 265 if (!offset && length == stringLength) 266 return *this; 267 return String(StringImpl::create(m_impl, offset, length)); 268 } 269 270 String String::lower() const 271 { 272 if (!m_impl) 273 return String(); 274 return m_impl->lower(); 275 } 276 277 String String::upper() const 278 { 279 if (!m_impl) 280 return String(); 281 return m_impl->upper(); 282 } 283 284 String String::stripWhiteSpace() const 285 { 286 if (!m_impl) 287 return String(); 288 return m_impl->stripWhiteSpace(); 289 } 290 291 String String::simplifyWhiteSpace() const 292 { 293 if (!m_impl) 294 return String(); 295 return m_impl->simplifyWhiteSpace(); 296 } 297 298 String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const 299 { 300 if (!m_impl) 301 return String(); 302 return m_impl->removeCharacters(findMatch); 303 } 304 305 String String::foldCase() const 306 { 307 if (!m_impl) 308 return String(); 309 return m_impl->foldCase(); 310 } 311 312 bool String::percentage(int& result) const 313 { 314 if (!m_impl || !m_impl->length()) 315 return false; 316 317 if ((*m_impl)[m_impl->length() - 1] != '%') 318 return false; 319 320 result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1); 321 return true; 322 } 323 324 const UChar* String::charactersWithNullTermination() 325 { 326 if (!m_impl) 327 return 0; 328 if (m_impl->hasTerminatingNullCharacter()) 329 return m_impl->characters(); 330 m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl); 331 return m_impl->characters(); 332 } 333 334 String String::format(const char *format, ...) 335 { 336 #if PLATFORM(QT) 337 // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf. 338 // https://bugs.webkit.org/show_bug.cgi?id=18994 339 va_list args; 340 va_start(args, format); 341 342 QString buffer; 343 buffer.vsprintf(format, args); 344 345 va_end(args); 346 347 QByteArray ba = buffer.toUtf8(); 348 return StringImpl::create(ba.constData(), ba.length()); 349 350 #elif OS(WINCE) 351 va_list args; 352 va_start(args, format); 353 354 Vector<char, 256> buffer; 355 356 int bufferSize = 256; 357 buffer.resize(bufferSize); 358 for (;;) { 359 int written = vsnprintf(buffer.data(), bufferSize, format, args); 360 va_end(args); 361 362 if (written == 0) 363 return String(""); 364 if (written > 0) 365 return StringImpl::create(buffer.data(), written); 366 367 bufferSize <<= 1; 368 buffer.resize(bufferSize); 369 va_start(args, format); 370 } 371 372 #else 373 va_list args; 374 va_start(args, format); 375 376 Vector<char, 256> buffer; 377 378 // Do the format once to get the length. 379 #if COMPILER(MSVC) 380 int result = _vscprintf(format, args); 381 #else 382 char ch; 383 int result = vsnprintf(&ch, 1, format, args); 384 // We need to call va_end() and then va_start() again here, as the 385 // contents of args is undefined after the call to vsnprintf 386 // according to http://man.cx/snprintf(3) 387 // 388 // Not calling va_end/va_start here happens to work on lots of 389 // systems, but fails e.g. on 64bit Linux. 390 va_end(args); 391 va_start(args, format); 392 #endif 393 394 if (result == 0) 395 return String(""); 396 if (result < 0) 397 return String(); 398 unsigned len = result; 399 buffer.grow(len + 1); 400 401 // Now do the formatting again, guaranteed to fit. 402 vsnprintf(buffer.data(), buffer.size(), format, args); 403 404 va_end(args); 405 406 return StringImpl::create(buffer.data(), len); 407 #endif 408 } 409 410 String String::number(short n) 411 { 412 return String::format("%hd", n); 413 } 414 415 String String::number(unsigned short n) 416 { 417 return String::format("%hu", n); 418 } 419 420 String String::number(int n) 421 { 422 return String::format("%d", n); 423 } 424 425 String String::number(unsigned n) 426 { 427 return String::format("%u", n); 428 } 429 430 String String::number(long n) 431 { 432 return String::format("%ld", n); 433 } 434 435 String String::number(unsigned long n) 436 { 437 return String::format("%lu", n); 438 } 439 440 String String::number(long long n) 441 { 442 #if OS(WINDOWS) && !PLATFORM(QT) 443 return String::format("%I64i", n); 444 #else 445 return String::format("%lli", n); 446 #endif 447 } 448 449 String String::number(unsigned long long n) 450 { 451 #if OS(WINDOWS) && !PLATFORM(QT) 452 return String::format("%I64u", n); 453 #else 454 return String::format("%llu", n); 455 #endif 456 } 457 458 String String::number(double n) 459 { 460 return String::format("%.6lg", n); 461 } 462 463 int String::toIntStrict(bool* ok, int base) const 464 { 465 if (!m_impl) { 466 if (ok) 467 *ok = false; 468 return 0; 469 } 470 return m_impl->toIntStrict(ok, base); 471 } 472 473 unsigned String::toUIntStrict(bool* ok, int base) const 474 { 475 if (!m_impl) { 476 if (ok) 477 *ok = false; 478 return 0; 479 } 480 return m_impl->toUIntStrict(ok, base); 481 } 482 483 int64_t String::toInt64Strict(bool* ok, int base) const 484 { 485 if (!m_impl) { 486 if (ok) 487 *ok = false; 488 return 0; 489 } 490 return m_impl->toInt64Strict(ok, base); 491 } 492 493 uint64_t String::toUInt64Strict(bool* ok, int base) const 494 { 495 if (!m_impl) { 496 if (ok) 497 *ok = false; 498 return 0; 499 } 500 return m_impl->toUInt64Strict(ok, base); 501 } 502 503 intptr_t String::toIntPtrStrict(bool* ok, int base) const 504 { 505 if (!m_impl) { 506 if (ok) 507 *ok = false; 508 return 0; 509 } 510 return m_impl->toIntPtrStrict(ok, base); 511 } 512 513 514 int String::toInt(bool* ok) const 515 { 516 if (!m_impl) { 517 if (ok) 518 *ok = false; 519 return 0; 520 } 521 return m_impl->toInt(ok); 522 } 523 524 unsigned String::toUInt(bool* ok) const 525 { 526 if (!m_impl) { 527 if (ok) 528 *ok = false; 529 return 0; 530 } 531 return m_impl->toUInt(ok); 532 } 533 534 int64_t String::toInt64(bool* ok) const 535 { 536 if (!m_impl) { 537 if (ok) 538 *ok = false; 539 return 0; 540 } 541 return m_impl->toInt64(ok); 542 } 543 544 uint64_t String::toUInt64(bool* ok) const 545 { 546 if (!m_impl) { 547 if (ok) 548 *ok = false; 549 return 0; 550 } 551 return m_impl->toUInt64(ok); 552 } 553 554 intptr_t String::toIntPtr(bool* ok) const 555 { 556 if (!m_impl) { 557 if (ok) 558 *ok = false; 559 return 0; 560 } 561 return m_impl->toIntPtr(ok); 562 } 563 564 double String::toDouble(bool* ok, bool* didReadNumber) const 565 { 566 if (!m_impl) { 567 if (ok) 568 *ok = false; 569 if (didReadNumber) 570 *didReadNumber = false; 571 return 0.0; 572 } 573 return m_impl->toDouble(ok, didReadNumber); 574 } 575 576 float String::toFloat(bool* ok, bool* didReadNumber) const 577 { 578 if (!m_impl) { 579 if (ok) 580 *ok = false; 581 if (didReadNumber) 582 *didReadNumber = false; 583 return 0.0f; 584 } 585 return m_impl->toFloat(ok, didReadNumber); 586 } 587 588 String String::threadsafeCopy() const 589 { 590 if (!m_impl) 591 return String(); 592 return m_impl->threadsafeCopy(); 593 } 594 595 String String::crossThreadString() const 596 { 597 if (!m_impl) 598 return String(); 599 return m_impl->crossThreadString(); 600 } 601 602 void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const 603 { 604 result.clear(); 605 606 unsigned startPos = 0; 607 size_t endPos; 608 while ((endPos = find(separator, startPos)) != notFound) { 609 if (allowEmptyEntries || startPos != endPos) 610 result.append(substring(startPos, endPos - startPos)); 611 startPos = endPos + separator.length(); 612 } 613 if (allowEmptyEntries || startPos != length()) 614 result.append(substring(startPos)); 615 } 616 617 void String::split(const String& separator, Vector<String>& result) const 618 { 619 split(separator, false, result); 620 } 621 622 void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const 623 { 624 result.clear(); 625 626 unsigned startPos = 0; 627 size_t endPos; 628 while ((endPos = find(separator, startPos)) != notFound) { 629 if (allowEmptyEntries || startPos != endPos) 630 result.append(substring(startPos, endPos - startPos)); 631 startPos = endPos + 1; 632 } 633 if (allowEmptyEntries || startPos != length()) 634 result.append(substring(startPos)); 635 } 636 637 void String::split(UChar separator, Vector<String>& result) const 638 { 639 split(String(&separator, 1), false, result); 640 } 641 642 CString String::ascii() const 643 { 644 // Printable ASCII characters 32..127 and the null character are 645 // preserved, characters outside of this range are converted to '?'. 646 647 unsigned length = this->length(); 648 const UChar* characters = this->characters(); 649 650 char* characterBuffer; 651 CString result = CString::newUninitialized(length, characterBuffer); 652 653 for (unsigned i = 0; i < length; ++i) { 654 UChar ch = characters[i]; 655 characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; 656 } 657 658 return result; 659 } 660 661 CString String::latin1() const 662 { 663 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are 664 // preserved, characters outside of this range are converted to '?'. 665 666 unsigned length = this->length(); 667 const UChar* characters = this->characters(); 668 669 char* characterBuffer; 670 CString result = CString::newUninitialized(length, characterBuffer); 671 672 for (unsigned i = 0; i < length; ++i) { 673 UChar ch = characters[i]; 674 characterBuffer[i] = ch > 0xff ? '?' : ch; 675 } 676 677 return result; 678 } 679 680 // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available. 681 static inline void putUTF8Triple(char*& buffer, UChar ch) 682 { 683 ASSERT(ch >= 0x0800); 684 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0); 685 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80); 686 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80); 687 } 688 689 CString String::utf8(bool strict) const 690 { 691 unsigned length = this->length(); 692 const UChar* characters = this->characters(); 693 694 // Allocate a buffer big enough to hold all the characters 695 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes). 696 // Optimization ideas, if we find this function is hot: 697 // * We could speculatively create a CStringBuffer to contain 'length' 698 // characters, and resize if necessary (i.e. if the buffer contains 699 // non-ascii characters). (Alternatively, scan the buffer first for 700 // ascii characters, so we know this will be sufficient). 701 // * We could allocate a CStringBuffer with an appropriate size to 702 // have a good chance of being able to write the string into the 703 // buffer without reallocing (say, 1.5 x length). 704 if (length > numeric_limits<unsigned>::max() / 3) 705 return CString(); 706 Vector<char, 1024> bufferVector(length * 3); 707 708 char* buffer = bufferVector.data(); 709 ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict); 710 ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion 711 712 // Only produced from strict conversion. 713 if (result == sourceIllegal) 714 return CString(); 715 716 // Check for an unconverted high surrogate. 717 if (result == sourceExhausted) { 718 if (strict) 719 return CString(); 720 // This should be one unpaired high surrogate. Treat it the same 721 // was as an unpaired high surrogate would have been handled in 722 // the middle of a string with non-strict conversion - which is 723 // to say, simply encode it to UTF-8. 724 ASSERT((characters + 1) == (this->characters() + length)); 725 ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF)); 726 // There should be room left, since one UChar hasn't been converted. 727 ASSERT((buffer + 3) <= (buffer + bufferVector.size())); 728 putUTF8Triple(buffer, *characters); 729 } 730 731 return CString(bufferVector.data(), buffer - bufferVector.data()); 732 } 733 734 String String::fromUTF8(const char* stringStart, size_t length) 735 { 736 if (length > numeric_limits<unsigned>::max()) 737 CRASH(); 738 739 if (!stringStart) 740 return String(); 741 742 // We'll use a StringImpl as a buffer; if the source string only contains ascii this should be 743 // the right length, if there are any multi-byte sequences this buffer will be too large. 744 UChar* buffer; 745 String stringBuffer(StringImpl::createUninitialized(length, buffer)); 746 UChar* bufferEnd = buffer + length; 747 748 // Try converting into the buffer. 749 const char* stringCurrent = stringStart; 750 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &buffer, bufferEnd) != conversionOK) 751 return String(); 752 753 // stringBuffer is full (the input must have been all ascii) so just return it! 754 if (buffer == bufferEnd) 755 return stringBuffer; 756 757 // stringBuffer served its purpose as a buffer, copy the contents out into a new string. 758 unsigned utf16Length = buffer - stringBuffer.characters(); 759 ASSERT(utf16Length < length); 760 return String(stringBuffer.characters(), utf16Length); 761 } 762 763 String String::fromUTF8(const char* string) 764 { 765 if (!string) 766 return String(); 767 return fromUTF8(string, strlen(string)); 768 } 769 770 String String::fromUTF8WithLatin1Fallback(const char* string, size_t size) 771 { 772 String utf8 = fromUTF8(string, size); 773 if (!utf8) 774 return String(string, size); 775 return utf8; 776 } 777 778 // String Operations 779 780 static bool isCharacterAllowedInBase(UChar c, int base) 781 { 782 if (c > 0x7F) 783 return false; 784 if (isASCIIDigit(c)) 785 return c - '0' < base; 786 if (isASCIIAlpha(c)) { 787 if (base > 36) 788 base = 36; 789 return (c >= 'a' && c < 'a' + base - 10) 790 || (c >= 'A' && c < 'A' + base - 10); 791 } 792 return false; 793 } 794 795 template <typename IntegralType> 796 static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base) 797 { 798 static const IntegralType integralMax = numeric_limits<IntegralType>::max(); 799 static const bool isSigned = numeric_limits<IntegralType>::is_signed; 800 const IntegralType maxMultiplier = integralMax / base; 801 802 IntegralType value = 0; 803 bool isOk = false; 804 bool isNegative = false; 805 806 if (!data) 807 goto bye; 808 809 // skip leading whitespace 810 while (length && isSpaceOrNewline(*data)) { 811 length--; 812 data++; 813 } 814 815 if (isSigned && length && *data == '-') { 816 length--; 817 data++; 818 isNegative = true; 819 } else if (length && *data == '+') { 820 length--; 821 data++; 822 } 823 824 if (!length || !isCharacterAllowedInBase(*data, base)) 825 goto bye; 826 827 while (length && isCharacterAllowedInBase(*data, base)) { 828 length--; 829 IntegralType digitValue; 830 UChar c = *data; 831 if (isASCIIDigit(c)) 832 digitValue = c - '0'; 833 else if (c >= 'a') 834 digitValue = c - 'a' + 10; 835 else 836 digitValue = c - 'A' + 10; 837 838 if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative)) 839 goto bye; 840 841 value = base * value + digitValue; 842 data++; 843 } 844 845 #if COMPILER(MSVC) 846 #pragma warning(push, 0) 847 #pragma warning(disable:4146) 848 #endif 849 850 if (isNegative) 851 value = -value; 852 853 #if COMPILER(MSVC) 854 #pragma warning(pop) 855 #endif 856 857 // skip trailing space 858 while (length && isSpaceOrNewline(*data)) { 859 length--; 860 data++; 861 } 862 863 if (!length) 864 isOk = true; 865 bye: 866 if (ok) 867 *ok = isOk; 868 return isOk ? value : 0; 869 } 870 871 static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length) 872 { 873 size_t i = 0; 874 875 // Allow leading spaces. 876 for (; i != length; ++i) { 877 if (!isSpaceOrNewline(data[i])) 878 break; 879 } 880 881 // Allow sign. 882 if (i != length && (data[i] == '+' || data[i] == '-')) 883 ++i; 884 885 // Allow digits. 886 for (; i != length; ++i) { 887 if (!isASCIIDigit(data[i])) 888 break; 889 } 890 891 return i; 892 } 893 894 int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base) 895 { 896 return toIntegralType<int>(data, length, ok, base); 897 } 898 899 unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base) 900 { 901 return toIntegralType<unsigned>(data, length, ok, base); 902 } 903 904 int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base) 905 { 906 return toIntegralType<int64_t>(data, length, ok, base); 907 } 908 909 uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base) 910 { 911 return toIntegralType<uint64_t>(data, length, ok, base); 912 } 913 914 intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base) 915 { 916 return toIntegralType<intptr_t>(data, length, ok, base); 917 } 918 919 int charactersToInt(const UChar* data, size_t length, bool* ok) 920 { 921 return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10); 922 } 923 924 unsigned charactersToUInt(const UChar* data, size_t length, bool* ok) 925 { 926 return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10); 927 } 928 929 int64_t charactersToInt64(const UChar* data, size_t length, bool* ok) 930 { 931 return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); 932 } 933 934 uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok) 935 { 936 return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); 937 } 938 939 intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok) 940 { 941 return toIntegralType<intptr_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10); 942 } 943 944 double charactersToDouble(const UChar* data, size_t length, bool* ok, bool* didReadNumber) 945 { 946 if (!length) { 947 if (ok) 948 *ok = false; 949 if (didReadNumber) 950 *didReadNumber = false; 951 return 0.0; 952 } 953 954 Vector<char, 256> bytes(length + 1); 955 for (unsigned i = 0; i < length; ++i) 956 bytes[i] = data[i] < 0x7F ? data[i] : '?'; 957 bytes[length] = '\0'; 958 char* start = bytes.data(); 959 char* end; 960 double val = WTF::strtod(start, &end); 961 if (ok) 962 *ok = (end == 0 || *end == '\0'); 963 if (didReadNumber) 964 *didReadNumber = end - start; 965 return val; 966 } 967 968 float charactersToFloat(const UChar* data, size_t length, bool* ok, bool* didReadNumber) 969 { 970 // FIXME: This will return ok even when the string fits into a double but not a float. 971 return static_cast<float>(charactersToDouble(data, length, ok, didReadNumber)); 972 } 973 974 } // namespace WTF 975 976 #ifndef NDEBUG 977 // For use in the debugger 978 String* string(const char*); 979 Vector<char> asciiDebug(StringImpl* impl); 980 Vector<char> asciiDebug(String& string); 981 982 String* string(const char* s) 983 { 984 // leaks memory! 985 return new String(s); 986 } 987 988 Vector<char> asciiDebug(StringImpl* impl) 989 { 990 if (!impl) 991 return asciiDebug(String("[null]").impl()); 992 993 Vector<char> buffer; 994 unsigned length = impl->length(); 995 const UChar* characters = impl->characters(); 996 997 buffer.resize(length + 1); 998 for (unsigned i = 0; i < length; ++i) { 999 UChar ch = characters[i]; 1000 buffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch; 1001 } 1002 buffer[length] = '\0'; 1003 1004 return buffer; 1005 } 1006 1007 Vector<char> asciiDebug(String& string) 1008 { 1009 return asciiDebug(string.impl()); 1010 } 1011 1012 #endif 1013