Home | History | Annotate | Download | only in text
      1 /*
      2  * (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights reserved.
      4  * Copyright (C) 2007-2009 Torch Mobile, Inc.
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Library General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Library General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Library General Public License
     17  * along with this library; see the file COPYING.LIB.  If not, write to
     18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  * Boston, MA 02110-1301, USA.
     20  */
     21 
     22 #include "config.h"
     23 #include "WTFString.h"
     24 
     25 #include <stdarg.h>
     26 #include <wtf/ASCIICType.h>
     27 #include <wtf/text/CString.h>
     28 #include <wtf/StringExtras.h>
     29 #include <wtf/Vector.h>
     30 #include <wtf/dtoa.h>
     31 #include <wtf/unicode/UTF8.h>
     32 #include <wtf/unicode/Unicode.h>
     33 
     34 using namespace std;
     35 
     36 namespace WTF {
     37 
     38 using namespace Unicode;
     39 using namespace std;
     40 
     41 // Construct a string with UTF-16 data.
     42 String::String(const UChar* characters, unsigned length)
     43     : m_impl(characters ? StringImpl::create(characters, length) : 0)
     44 {
     45 }
     46 
     47 // Construct a string with UTF-16 data, from a null-terminated source.
     48 String::String(const UChar* str)
     49 {
     50     if (!str)
     51         return;
     52 
     53     size_t len = 0;
     54     while (str[len] != UChar(0))
     55         len++;
     56 
     57     if (len > numeric_limits<unsigned>::max())
     58         CRASH();
     59 
     60     m_impl = StringImpl::create(str, len);
     61 }
     62 
     63 // Construct a string with latin1 data.
     64 String::String(const char* characters, unsigned length)
     65     : m_impl(characters ? StringImpl::create(characters, length) : 0)
     66 {
     67 }
     68 
     69 // Construct a string with latin1 data, from a null-terminated source.
     70 String::String(const char* characters)
     71     : m_impl(characters ? StringImpl::create(characters) : 0)
     72 {
     73 }
     74 
     75 void String::append(const String& str)
     76 {
     77     if (str.isEmpty())
     78        return;
     79 
     80     // FIXME: This is extremely inefficient. So much so that we might want to take this
     81     // out of String's API. We can make it better by optimizing the case where exactly
     82     // one String is pointing at this StringImpl, but even then it's going to require a
     83     // call to fastMalloc every single time.
     84     if (str.m_impl) {
     85         if (m_impl) {
     86             UChar* data;
     87             if (str.length() > numeric_limits<unsigned>::max() - m_impl->length())
     88                 CRASH();
     89             RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data);
     90             memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
     91             memcpy(data + m_impl->length(), str.characters(), str.length() * sizeof(UChar));
     92             m_impl = newImpl.release();
     93         } else
     94             m_impl = str.m_impl;
     95     }
     96 }
     97 
     98 void String::append(char c)
     99 {
    100     // FIXME: This is extremely inefficient. So much so that we might want to take this
    101     // out of String's API. We can make it better by optimizing the case where exactly
    102     // one String is pointing at this StringImpl, but even then it's going to require a
    103     // call to fastMalloc every single time.
    104     if (m_impl) {
    105         UChar* data;
    106         if (m_impl->length() >= numeric_limits<unsigned>::max())
    107             CRASH();
    108         RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
    109         memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
    110         data[m_impl->length()] = c;
    111         m_impl = newImpl.release();
    112     } else
    113         m_impl = StringImpl::create(&c, 1);
    114 }
    115 
    116 void String::append(UChar c)
    117 {
    118     // FIXME: This is extremely inefficient. So much so that we might want to take this
    119     // out of String's API. We can make it better by optimizing the case where exactly
    120     // one String is pointing at this StringImpl, but even then it's going to require a
    121     // call to fastMalloc every single time.
    122     if (m_impl) {
    123         UChar* data;
    124         if (m_impl->length() >= numeric_limits<unsigned>::max())
    125             CRASH();
    126         RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
    127         memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
    128         data[m_impl->length()] = c;
    129         m_impl = newImpl.release();
    130     } else
    131         m_impl = StringImpl::create(&c, 1);
    132 }
    133 
    134 String operator+(const String& a, const String& b)
    135 {
    136     if (a.isEmpty())
    137         return b;
    138     if (b.isEmpty())
    139         return a;
    140     String c = a;
    141     c += b;
    142     return c;
    143 }
    144 
    145 String operator+(const String& s, const char* cs)
    146 {
    147     return s + String(cs);
    148 }
    149 
    150 String operator+(const char* cs, const String& s)
    151 {
    152     return String(cs) + s;
    153 }
    154 
    155 int codePointCompare(const String& a, const String& b)
    156 {
    157     return codePointCompare(a.impl(), b.impl());
    158 }
    159 
    160 void String::insert(const String& str, unsigned pos)
    161 {
    162     if (str.isEmpty()) {
    163         if (str.isNull())
    164             return;
    165         if (isNull())
    166             m_impl = str.impl();
    167         return;
    168     }
    169     insert(str.characters(), str.length(), pos);
    170 }
    171 
    172 void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
    173 {
    174     if (!m_impl) {
    175         if (!charactersToAppend)
    176             return;
    177         m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
    178         return;
    179     }
    180 
    181     if (!lengthToAppend)
    182         return;
    183 
    184     ASSERT(charactersToAppend);
    185     UChar* data;
    186     if (lengthToAppend > numeric_limits<unsigned>::max() - length())
    187         CRASH();
    188     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data);
    189     memcpy(data, characters(), length() * sizeof(UChar));
    190     memcpy(data + length(), charactersToAppend, lengthToAppend * sizeof(UChar));
    191     m_impl = newImpl.release();
    192 }
    193 
    194 void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position)
    195 {
    196     if (position >= length()) {
    197         append(charactersToInsert, lengthToInsert);
    198         return;
    199     }
    200 
    201     ASSERT(m_impl);
    202 
    203     if (!lengthToInsert)
    204         return;
    205 
    206     ASSERT(charactersToInsert);
    207     UChar* data;
    208     if (lengthToInsert > numeric_limits<unsigned>::max() - length())
    209         CRASH();
    210     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToInsert, data);
    211     memcpy(data, characters(), position * sizeof(UChar));
    212     memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar));
    213     memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar));
    214     m_impl = newImpl.release();
    215 }
    216 
    217 UChar32 String::characterStartingAt(unsigned i) const
    218 {
    219     if (!m_impl || i >= m_impl->length())
    220         return 0;
    221     return m_impl->characterStartingAt(i);
    222 }
    223 
    224 void String::truncate(unsigned position)
    225 {
    226     if (position >= length())
    227         return;
    228     UChar* data;
    229     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data);
    230     memcpy(data, characters(), position * sizeof(UChar));
    231     m_impl = newImpl.release();
    232 }
    233 
    234 void String::remove(unsigned position, int lengthToRemove)
    235 {
    236     if (lengthToRemove <= 0)
    237         return;
    238     if (position >= length())
    239         return;
    240     if (static_cast<unsigned>(lengthToRemove) > length() - position)
    241         lengthToRemove = length() - position;
    242     UChar* data;
    243     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data);
    244     memcpy(data, characters(), position * sizeof(UChar));
    245     memcpy(data + position, characters() + position + lengthToRemove,
    246         (length() - lengthToRemove - position) * sizeof(UChar));
    247     m_impl = newImpl.release();
    248 }
    249 
    250 String String::substring(unsigned pos, unsigned len) const
    251 {
    252     if (!m_impl)
    253         return String();
    254     return m_impl->substring(pos, len);
    255 }
    256 
    257 String String::substringSharingImpl(unsigned offset, unsigned length) const
    258 {
    259     // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).
    260 
    261     unsigned stringLength = this->length();
    262     offset = min(offset, stringLength);
    263     length = min(length, stringLength - offset);
    264 
    265     if (!offset && length == stringLength)
    266         return *this;
    267     return String(StringImpl::create(m_impl, offset, length));
    268 }
    269 
    270 String String::lower() const
    271 {
    272     if (!m_impl)
    273         return String();
    274     return m_impl->lower();
    275 }
    276 
    277 String String::upper() const
    278 {
    279     if (!m_impl)
    280         return String();
    281     return m_impl->upper();
    282 }
    283 
    284 String String::stripWhiteSpace() const
    285 {
    286     if (!m_impl)
    287         return String();
    288     return m_impl->stripWhiteSpace();
    289 }
    290 
    291 String String::simplifyWhiteSpace() const
    292 {
    293     if (!m_impl)
    294         return String();
    295     return m_impl->simplifyWhiteSpace();
    296 }
    297 
    298 String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const
    299 {
    300     if (!m_impl)
    301         return String();
    302     return m_impl->removeCharacters(findMatch);
    303 }
    304 
    305 String String::foldCase() const
    306 {
    307     if (!m_impl)
    308         return String();
    309     return m_impl->foldCase();
    310 }
    311 
    312 bool String::percentage(int& result) const
    313 {
    314     if (!m_impl || !m_impl->length())
    315         return false;
    316 
    317     if ((*m_impl)[m_impl->length() - 1] != '%')
    318        return false;
    319 
    320     result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1);
    321     return true;
    322 }
    323 
    324 const UChar* String::charactersWithNullTermination()
    325 {
    326     if (!m_impl)
    327         return 0;
    328     if (m_impl->hasTerminatingNullCharacter())
    329         return m_impl->characters();
    330     m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl);
    331     return m_impl->characters();
    332 }
    333 
    334 String String::format(const char *format, ...)
    335 {
    336 #if PLATFORM(QT)
    337     // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf.
    338     // https://bugs.webkit.org/show_bug.cgi?id=18994
    339     va_list args;
    340     va_start(args, format);
    341 
    342     QString buffer;
    343     buffer.vsprintf(format, args);
    344 
    345     va_end(args);
    346 
    347     QByteArray ba = buffer.toUtf8();
    348     return StringImpl::create(ba.constData(), ba.length());
    349 
    350 #elif OS(WINCE)
    351     va_list args;
    352     va_start(args, format);
    353 
    354     Vector<char, 256> buffer;
    355 
    356     int bufferSize = 256;
    357     buffer.resize(bufferSize);
    358     for (;;) {
    359         int written = vsnprintf(buffer.data(), bufferSize, format, args);
    360         va_end(args);
    361 
    362         if (written == 0)
    363             return String("");
    364         if (written > 0)
    365             return StringImpl::create(buffer.data(), written);
    366 
    367         bufferSize <<= 1;
    368         buffer.resize(bufferSize);
    369         va_start(args, format);
    370     }
    371 
    372 #else
    373     va_list args;
    374     va_start(args, format);
    375 
    376     Vector<char, 256> buffer;
    377 
    378     // Do the format once to get the length.
    379 #if COMPILER(MSVC)
    380     int result = _vscprintf(format, args);
    381 #else
    382     char ch;
    383     int result = vsnprintf(&ch, 1, format, args);
    384     // We need to call va_end() and then va_start() again here, as the
    385     // contents of args is undefined after the call to vsnprintf
    386     // according to http://man.cx/snprintf(3)
    387     //
    388     // Not calling va_end/va_start here happens to work on lots of
    389     // systems, but fails e.g. on 64bit Linux.
    390     va_end(args);
    391     va_start(args, format);
    392 #endif
    393 
    394     if (result == 0)
    395         return String("");
    396     if (result < 0)
    397         return String();
    398     unsigned len = result;
    399     buffer.grow(len + 1);
    400 
    401     // Now do the formatting again, guaranteed to fit.
    402     vsnprintf(buffer.data(), buffer.size(), format, args);
    403 
    404     va_end(args);
    405 
    406     return StringImpl::create(buffer.data(), len);
    407 #endif
    408 }
    409 
    410 String String::number(short n)
    411 {
    412     return String::format("%hd", n);
    413 }
    414 
    415 String String::number(unsigned short n)
    416 {
    417     return String::format("%hu", n);
    418 }
    419 
    420 String String::number(int n)
    421 {
    422     return String::format("%d", n);
    423 }
    424 
    425 String String::number(unsigned n)
    426 {
    427     return String::format("%u", n);
    428 }
    429 
    430 String String::number(long n)
    431 {
    432     return String::format("%ld", n);
    433 }
    434 
    435 String String::number(unsigned long n)
    436 {
    437     return String::format("%lu", n);
    438 }
    439 
    440 String String::number(long long n)
    441 {
    442 #if OS(WINDOWS) && !PLATFORM(QT)
    443     return String::format("%I64i", n);
    444 #else
    445     return String::format("%lli", n);
    446 #endif
    447 }
    448 
    449 String String::number(unsigned long long n)
    450 {
    451 #if OS(WINDOWS) && !PLATFORM(QT)
    452     return String::format("%I64u", n);
    453 #else
    454     return String::format("%llu", n);
    455 #endif
    456 }
    457 
    458 String String::number(double n)
    459 {
    460     return String::format("%.6lg", n);
    461 }
    462 
    463 int String::toIntStrict(bool* ok, int base) const
    464 {
    465     if (!m_impl) {
    466         if (ok)
    467             *ok = false;
    468         return 0;
    469     }
    470     return m_impl->toIntStrict(ok, base);
    471 }
    472 
    473 unsigned String::toUIntStrict(bool* ok, int base) const
    474 {
    475     if (!m_impl) {
    476         if (ok)
    477             *ok = false;
    478         return 0;
    479     }
    480     return m_impl->toUIntStrict(ok, base);
    481 }
    482 
    483 int64_t String::toInt64Strict(bool* ok, int base) const
    484 {
    485     if (!m_impl) {
    486         if (ok)
    487             *ok = false;
    488         return 0;
    489     }
    490     return m_impl->toInt64Strict(ok, base);
    491 }
    492 
    493 uint64_t String::toUInt64Strict(bool* ok, int base) const
    494 {
    495     if (!m_impl) {
    496         if (ok)
    497             *ok = false;
    498         return 0;
    499     }
    500     return m_impl->toUInt64Strict(ok, base);
    501 }
    502 
    503 intptr_t String::toIntPtrStrict(bool* ok, int base) const
    504 {
    505     if (!m_impl) {
    506         if (ok)
    507             *ok = false;
    508         return 0;
    509     }
    510     return m_impl->toIntPtrStrict(ok, base);
    511 }
    512 
    513 
    514 int String::toInt(bool* ok) const
    515 {
    516     if (!m_impl) {
    517         if (ok)
    518             *ok = false;
    519         return 0;
    520     }
    521     return m_impl->toInt(ok);
    522 }
    523 
    524 unsigned String::toUInt(bool* ok) const
    525 {
    526     if (!m_impl) {
    527         if (ok)
    528             *ok = false;
    529         return 0;
    530     }
    531     return m_impl->toUInt(ok);
    532 }
    533 
    534 int64_t String::toInt64(bool* ok) const
    535 {
    536     if (!m_impl) {
    537         if (ok)
    538             *ok = false;
    539         return 0;
    540     }
    541     return m_impl->toInt64(ok);
    542 }
    543 
    544 uint64_t String::toUInt64(bool* ok) const
    545 {
    546     if (!m_impl) {
    547         if (ok)
    548             *ok = false;
    549         return 0;
    550     }
    551     return m_impl->toUInt64(ok);
    552 }
    553 
    554 intptr_t String::toIntPtr(bool* ok) const
    555 {
    556     if (!m_impl) {
    557         if (ok)
    558             *ok = false;
    559         return 0;
    560     }
    561     return m_impl->toIntPtr(ok);
    562 }
    563 
    564 double String::toDouble(bool* ok, bool* didReadNumber) const
    565 {
    566     if (!m_impl) {
    567         if (ok)
    568             *ok = false;
    569         if (didReadNumber)
    570             *didReadNumber = false;
    571         return 0.0;
    572     }
    573     return m_impl->toDouble(ok, didReadNumber);
    574 }
    575 
    576 float String::toFloat(bool* ok, bool* didReadNumber) const
    577 {
    578     if (!m_impl) {
    579         if (ok)
    580             *ok = false;
    581         if (didReadNumber)
    582             *didReadNumber = false;
    583         return 0.0f;
    584     }
    585     return m_impl->toFloat(ok, didReadNumber);
    586 }
    587 
    588 String String::threadsafeCopy() const
    589 {
    590     if (!m_impl)
    591         return String();
    592     return m_impl->threadsafeCopy();
    593 }
    594 
    595 String String::crossThreadString() const
    596 {
    597     if (!m_impl)
    598         return String();
    599     return m_impl->crossThreadString();
    600 }
    601 
    602 void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const
    603 {
    604     result.clear();
    605 
    606     unsigned startPos = 0;
    607     size_t endPos;
    608     while ((endPos = find(separator, startPos)) != notFound) {
    609         if (allowEmptyEntries || startPos != endPos)
    610             result.append(substring(startPos, endPos - startPos));
    611         startPos = endPos + separator.length();
    612     }
    613     if (allowEmptyEntries || startPos != length())
    614         result.append(substring(startPos));
    615 }
    616 
    617 void String::split(const String& separator, Vector<String>& result) const
    618 {
    619     split(separator, false, result);
    620 }
    621 
    622 void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const
    623 {
    624     result.clear();
    625 
    626     unsigned startPos = 0;
    627     size_t endPos;
    628     while ((endPos = find(separator, startPos)) != notFound) {
    629         if (allowEmptyEntries || startPos != endPos)
    630             result.append(substring(startPos, endPos - startPos));
    631         startPos = endPos + 1;
    632     }
    633     if (allowEmptyEntries || startPos != length())
    634         result.append(substring(startPos));
    635 }
    636 
    637 void String::split(UChar separator, Vector<String>& result) const
    638 {
    639     split(String(&separator, 1), false, result);
    640 }
    641 
    642 CString String::ascii() const
    643 {
    644     // Printable ASCII characters 32..127 and the null character are
    645     // preserved, characters outside of this range are converted to '?'.
    646 
    647     unsigned length = this->length();
    648     const UChar* characters = this->characters();
    649 
    650     char* characterBuffer;
    651     CString result = CString::newUninitialized(length, characterBuffer);
    652 
    653     for (unsigned i = 0; i < length; ++i) {
    654         UChar ch = characters[i];
    655         characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
    656     }
    657 
    658     return result;
    659 }
    660 
    661 CString String::latin1() const
    662 {
    663     // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
    664     // preserved, characters outside of this range are converted to '?'.
    665 
    666     unsigned length = this->length();
    667     const UChar* characters = this->characters();
    668 
    669     char* characterBuffer;
    670     CString result = CString::newUninitialized(length, characterBuffer);
    671 
    672     for (unsigned i = 0; i < length; ++i) {
    673         UChar ch = characters[i];
    674         characterBuffer[i] = ch > 0xff ? '?' : ch;
    675     }
    676 
    677     return result;
    678 }
    679 
    680 // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
    681 static inline void putUTF8Triple(char*& buffer, UChar ch)
    682 {
    683     ASSERT(ch >= 0x0800);
    684     *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
    685     *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
    686     *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
    687 }
    688 
    689 CString String::utf8(bool strict) const
    690 {
    691     unsigned length = this->length();
    692     const UChar* characters = this->characters();
    693 
    694     // Allocate a buffer big enough to hold all the characters
    695     // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
    696     // Optimization ideas, if we find this function is hot:
    697     //  * We could speculatively create a CStringBuffer to contain 'length'
    698     //    characters, and resize if necessary (i.e. if the buffer contains
    699     //    non-ascii characters). (Alternatively, scan the buffer first for
    700     //    ascii characters, so we know this will be sufficient).
    701     //  * We could allocate a CStringBuffer with an appropriate size to
    702     //    have a good chance of being able to write the string into the
    703     //    buffer without reallocing (say, 1.5 x length).
    704     if (length > numeric_limits<unsigned>::max() / 3)
    705         return CString();
    706     Vector<char, 1024> bufferVector(length * 3);
    707 
    708     char* buffer = bufferVector.data();
    709     ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
    710     ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
    711 
    712     // Only produced from strict conversion.
    713     if (result == sourceIllegal)
    714         return CString();
    715 
    716     // Check for an unconverted high surrogate.
    717     if (result == sourceExhausted) {
    718         if (strict)
    719             return CString();
    720         // This should be one unpaired high surrogate. Treat it the same
    721         // was as an unpaired high surrogate would have been handled in
    722         // the middle of a string with non-strict conversion - which is
    723         // to say, simply encode it to UTF-8.
    724         ASSERT((characters + 1) == (this->characters() + length));
    725         ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
    726         // There should be room left, since one UChar hasn't been converted.
    727         ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
    728         putUTF8Triple(buffer, *characters);
    729     }
    730 
    731     return CString(bufferVector.data(), buffer - bufferVector.data());
    732 }
    733 
    734 String String::fromUTF8(const char* stringStart, size_t length)
    735 {
    736     if (length > numeric_limits<unsigned>::max())
    737         CRASH();
    738 
    739     if (!stringStart)
    740         return String();
    741 
    742     // We'll use a StringImpl as a buffer; if the source string only contains ascii this should be
    743     // the right length, if there are any multi-byte sequences this buffer will be too large.
    744     UChar* buffer;
    745     String stringBuffer(StringImpl::createUninitialized(length, buffer));
    746     UChar* bufferEnd = buffer + length;
    747 
    748     // Try converting into the buffer.
    749     const char* stringCurrent = stringStart;
    750     if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &buffer, bufferEnd) != conversionOK)
    751         return String();
    752 
    753     // stringBuffer is full (the input must have been all ascii) so just return it!
    754     if (buffer == bufferEnd)
    755         return stringBuffer;
    756 
    757     // stringBuffer served its purpose as a buffer, copy the contents out into a new string.
    758     unsigned utf16Length = buffer - stringBuffer.characters();
    759     ASSERT(utf16Length < length);
    760     return String(stringBuffer.characters(), utf16Length);
    761 }
    762 
    763 String String::fromUTF8(const char* string)
    764 {
    765     if (!string)
    766         return String();
    767     return fromUTF8(string, strlen(string));
    768 }
    769 
    770 String String::fromUTF8WithLatin1Fallback(const char* string, size_t size)
    771 {
    772     String utf8 = fromUTF8(string, size);
    773     if (!utf8)
    774         return String(string, size);
    775     return utf8;
    776 }
    777 
    778 // String Operations
    779 
    780 static bool isCharacterAllowedInBase(UChar c, int base)
    781 {
    782     if (c > 0x7F)
    783         return false;
    784     if (isASCIIDigit(c))
    785         return c - '0' < base;
    786     if (isASCIIAlpha(c)) {
    787         if (base > 36)
    788             base = 36;
    789         return (c >= 'a' && c < 'a' + base - 10)
    790             || (c >= 'A' && c < 'A' + base - 10);
    791     }
    792     return false;
    793 }
    794 
    795 template <typename IntegralType>
    796 static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base)
    797 {
    798     static const IntegralType integralMax = numeric_limits<IntegralType>::max();
    799     static const bool isSigned = numeric_limits<IntegralType>::is_signed;
    800     const IntegralType maxMultiplier = integralMax / base;
    801 
    802     IntegralType value = 0;
    803     bool isOk = false;
    804     bool isNegative = false;
    805 
    806     if (!data)
    807         goto bye;
    808 
    809     // skip leading whitespace
    810     while (length && isSpaceOrNewline(*data)) {
    811         length--;
    812         data++;
    813     }
    814 
    815     if (isSigned && length && *data == '-') {
    816         length--;
    817         data++;
    818         isNegative = true;
    819     } else if (length && *data == '+') {
    820         length--;
    821         data++;
    822     }
    823 
    824     if (!length || !isCharacterAllowedInBase(*data, base))
    825         goto bye;
    826 
    827     while (length && isCharacterAllowedInBase(*data, base)) {
    828         length--;
    829         IntegralType digitValue;
    830         UChar c = *data;
    831         if (isASCIIDigit(c))
    832             digitValue = c - '0';
    833         else if (c >= 'a')
    834             digitValue = c - 'a' + 10;
    835         else
    836             digitValue = c - 'A' + 10;
    837 
    838         if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative))
    839             goto bye;
    840 
    841         value = base * value + digitValue;
    842         data++;
    843     }
    844 
    845 #if COMPILER(MSVC)
    846 #pragma warning(push, 0)
    847 #pragma warning(disable:4146)
    848 #endif
    849 
    850     if (isNegative)
    851         value = -value;
    852 
    853 #if COMPILER(MSVC)
    854 #pragma warning(pop)
    855 #endif
    856 
    857     // skip trailing space
    858     while (length && isSpaceOrNewline(*data)) {
    859         length--;
    860         data++;
    861     }
    862 
    863     if (!length)
    864         isOk = true;
    865 bye:
    866     if (ok)
    867         *ok = isOk;
    868     return isOk ? value : 0;
    869 }
    870 
    871 static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length)
    872 {
    873     size_t i = 0;
    874 
    875     // Allow leading spaces.
    876     for (; i != length; ++i) {
    877         if (!isSpaceOrNewline(data[i]))
    878             break;
    879     }
    880 
    881     // Allow sign.
    882     if (i != length && (data[i] == '+' || data[i] == '-'))
    883         ++i;
    884 
    885     // Allow digits.
    886     for (; i != length; ++i) {
    887         if (!isASCIIDigit(data[i]))
    888             break;
    889     }
    890 
    891     return i;
    892 }
    893 
    894 int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
    895 {
    896     return toIntegralType<int>(data, length, ok, base);
    897 }
    898 
    899 unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
    900 {
    901     return toIntegralType<unsigned>(data, length, ok, base);
    902 }
    903 
    904 int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
    905 {
    906     return toIntegralType<int64_t>(data, length, ok, base);
    907 }
    908 
    909 uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base)
    910 {
    911     return toIntegralType<uint64_t>(data, length, ok, base);
    912 }
    913 
    914 intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base)
    915 {
    916     return toIntegralType<intptr_t>(data, length, ok, base);
    917 }
    918 
    919 int charactersToInt(const UChar* data, size_t length, bool* ok)
    920 {
    921     return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
    922 }
    923 
    924 unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
    925 {
    926     return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
    927 }
    928 
    929 int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
    930 {
    931     return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
    932 }
    933 
    934 uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
    935 {
    936     return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
    937 }
    938 
    939 intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok)
    940 {
    941     return toIntegralType<intptr_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
    942 }
    943 
    944 double charactersToDouble(const UChar* data, size_t length, bool* ok, bool* didReadNumber)
    945 {
    946     if (!length) {
    947         if (ok)
    948             *ok = false;
    949         if (didReadNumber)
    950             *didReadNumber = false;
    951         return 0.0;
    952     }
    953 
    954     Vector<char, 256> bytes(length + 1);
    955     for (unsigned i = 0; i < length; ++i)
    956         bytes[i] = data[i] < 0x7F ? data[i] : '?';
    957     bytes[length] = '\0';
    958     char* start = bytes.data();
    959     char* end;
    960     double val = WTF::strtod(start, &end);
    961     if (ok)
    962         *ok = (end == 0 || *end == '\0');
    963     if (didReadNumber)
    964         *didReadNumber = end - start;
    965     return val;
    966 }
    967 
    968 float charactersToFloat(const UChar* data, size_t length, bool* ok, bool* didReadNumber)
    969 {
    970     // FIXME: This will return ok even when the string fits into a double but not a float.
    971     return static_cast<float>(charactersToDouble(data, length, ok, didReadNumber));
    972 }
    973 
    974 } // namespace WTF
    975 
    976 #ifndef NDEBUG
    977 // For use in the debugger
    978 String* string(const char*);
    979 Vector<char> asciiDebug(StringImpl* impl);
    980 Vector<char> asciiDebug(String& string);
    981 
    982 String* string(const char* s)
    983 {
    984     // leaks memory!
    985     return new String(s);
    986 }
    987 
    988 Vector<char> asciiDebug(StringImpl* impl)
    989 {
    990     if (!impl)
    991         return asciiDebug(String("[null]").impl());
    992 
    993     Vector<char> buffer;
    994     unsigned length = impl->length();
    995     const UChar* characters = impl->characters();
    996 
    997     buffer.resize(length + 1);
    998     for (unsigned i = 0; i < length; ++i) {
    999         UChar ch = characters[i];
   1000         buffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
   1001     }
   1002     buffer[length] = '\0';
   1003 
   1004     return buffer;
   1005 }
   1006 
   1007 Vector<char> asciiDebug(String& string)
   1008 {
   1009     return asciiDebug(string.impl());
   1010 }
   1011 
   1012 #endif
   1013