Home | History | Annotate | Download | only in text
      1 /*
      2  * (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights reserved.
      4  * Copyright (C) 2007-2009 Torch Mobile, Inc.
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Library General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Library General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Library General Public License
     17  * along with this library; see the file COPYING.LIB.  If not, write to
     18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  * Boston, MA 02110-1301, USA.
     20  */
     21 
     22 #include "config.h"
     23 #include "WTFString.h"
     24 
     25 #include "IntegerToStringConversion.h"
     26 #include <stdarg.h>
     27 #include "wtf/ASCIICType.h"
     28 #include "wtf/DataLog.h"
     29 #include "wtf/HexNumber.h"
     30 #include "wtf/MathExtras.h"
     31 #include "wtf/text/CString.h"
     32 #include "wtf/StringExtras.h"
     33 #include "wtf/Vector.h"
     34 #include "wtf/dtoa.h"
     35 #include "wtf/unicode/CharacterNames.h"
     36 #include "wtf/unicode/UTF8.h"
     37 #include "wtf/unicode/Unicode.h"
     38 
     39 using namespace std;
     40 
     41 namespace WTF {
     42 
     43 using namespace Unicode;
     44 using namespace std;
     45 
     46 // Construct a string with UTF-16 data.
     47 String::String(const UChar* characters, unsigned length)
     48     : m_impl(characters ? StringImpl::create(characters, length) : 0)
     49 {
     50 }
     51 
     52 // Construct a string with UTF-16 data, from a null-terminated source.
     53 String::String(const UChar* str)
     54 {
     55     if (!str)
     56         return;
     57     m_impl = StringImpl::create(str, lengthOfNullTerminatedString(str));
     58 }
     59 
     60 // Construct a string with latin1 data.
     61 String::String(const LChar* characters, unsigned length)
     62     : m_impl(characters ? StringImpl::create(characters, length) : 0)
     63 {
     64 }
     65 
     66 String::String(const char* characters, unsigned length)
     67     : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(characters), length) : 0)
     68 {
     69 }
     70 
     71 // Construct a string with latin1 data, from a null-terminated source.
     72 String::String(const LChar* characters)
     73     : m_impl(characters ? StringImpl::create(characters) : 0)
     74 {
     75 }
     76 
     77 String::String(const char* characters)
     78     : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(characters)) : 0)
     79 {
     80 }
     81 
     82 void String::append(const String& string)
     83 {
     84     if (string.isEmpty())
     85         return;
     86     if (!m_impl) {
     87         m_impl = string.m_impl;
     88         return;
     89     }
     90 
     91     // FIXME: This is extremely inefficient. So much so that we might want to take this
     92     // out of String's API. We can make it better by optimizing the case where exactly
     93     // one String is pointing at this StringImpl, but even then it's going to require a
     94     // call to fastMalloc every single time.
     95 
     96     if (m_impl->is8Bit() && string.m_impl->is8Bit()) {
     97         LChar* data;
     98         RELEASE_ASSERT(string.length() <= numeric_limits<unsigned>::max() - m_impl->length());
     99         RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + string.length(), data);
    100         memcpy(data, m_impl->characters8(), m_impl->length() * sizeof(LChar));
    101         memcpy(data + m_impl->length(), string.characters8(), string.length() * sizeof(LChar));
    102         m_impl = newImpl.release();
    103         return;
    104     }
    105 
    106     UChar* data;
    107     RELEASE_ASSERT(string.length() <= numeric_limits<unsigned>::max() - m_impl->length());
    108     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + string.length(), data);
    109 
    110     if (m_impl->is8Bit())
    111         StringImpl::copyChars(data, m_impl->characters8(), m_impl->length());
    112     else
    113         StringImpl::copyChars(data, m_impl->characters16(), m_impl->length());
    114 
    115     if (string.impl()->is8Bit())
    116         StringImpl::copyChars(data + m_impl->length(), string.impl()->characters8(), string.impl()->length());
    117     else
    118         StringImpl::copyChars(data + m_impl->length(), string.impl()->characters16(), string.impl()->length());
    119 
    120     m_impl = newImpl.release();
    121 }
    122 
    123 template <typename CharacterType>
    124 inline void String::appendInternal(CharacterType c)
    125 {
    126     // FIXME: This is extremely inefficient. So much so that we might want to take this
    127     // out of String's API. We can make it better by optimizing the case where exactly
    128     // one String is pointing at this StringImpl, but even then it's going to require a
    129     // call to fastMalloc every single time.
    130     if (!m_impl) {
    131         m_impl = StringImpl::create(&c, 1);
    132         return;
    133     }
    134 
    135     UChar* data; // FIXME: We should be able to create an 8 bit string via this code path.
    136     RELEASE_ASSERT(m_impl->length() < numeric_limits<unsigned>::max());
    137     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
    138     if (m_impl->is8Bit())
    139         StringImpl::copyChars(data, m_impl->characters8(), m_impl->length());
    140     else
    141         StringImpl::copyChars(data, m_impl->characters16(), m_impl->length());
    142     data[m_impl->length()] = c;
    143     m_impl = newImpl.release();
    144 }
    145 
    146 void String::append(LChar c)
    147 {
    148     appendInternal(c);
    149 }
    150 
    151 void String::append(UChar c)
    152 {
    153     appendInternal(c);
    154 }
    155 
    156 int codePointCompare(const String& a, const String& b)
    157 {
    158     return codePointCompare(a.impl(), b.impl());
    159 }
    160 
    161 void String::insert(const String& string, unsigned position)
    162 {
    163     if (string.isEmpty()) {
    164         if (string.isNull())
    165             return;
    166         if (isNull())
    167             m_impl = string.impl();
    168         return;
    169     }
    170 
    171     if (string.is8Bit())
    172         insert(string.impl()->characters8(), string.length(), position);
    173     else
    174         insert(string.impl()->characters16(), string.length(), position);
    175 }
    176 
    177 void String::append(const LChar* charactersToAppend, unsigned lengthToAppend)
    178 {
    179     if (!m_impl) {
    180         if (!charactersToAppend)
    181             return;
    182         m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
    183         return;
    184     }
    185 
    186     if (!lengthToAppend)
    187         return;
    188 
    189     ASSERT(charactersToAppend);
    190 
    191     unsigned strLength = m_impl->length();
    192 
    193     if (m_impl->is8Bit()) {
    194         RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength);
    195         LChar* data;
    196         RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
    197         StringImpl::copyChars(data, m_impl->characters8(), strLength);
    198         StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend);
    199         m_impl = newImpl.release();
    200         return;
    201     }
    202 
    203     RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength);
    204     UChar* data;
    205     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data);
    206     StringImpl::copyChars(data, m_impl->characters16(), strLength);
    207     StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend);
    208     m_impl = newImpl.release();
    209 }
    210 
    211 void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
    212 {
    213     if (!m_impl) {
    214         if (!charactersToAppend)
    215             return;
    216         m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
    217         return;
    218     }
    219 
    220     if (!lengthToAppend)
    221         return;
    222 
    223     unsigned strLength = m_impl->length();
    224 
    225     ASSERT(charactersToAppend);
    226     RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength);
    227     UChar* data;
    228     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
    229     if (m_impl->is8Bit())
    230         StringImpl::copyChars(data, characters8(), strLength);
    231     else
    232         StringImpl::copyChars(data, characters16(), strLength);
    233     StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend);
    234     m_impl = newImpl.release();
    235 }
    236 
    237 template<typename CharType>
    238 PassRefPtr<StringImpl> insertInternal(PassRefPtr<StringImpl> impl, const CharType* charactersToInsert, unsigned lengthToInsert, unsigned position)
    239 {
    240     if (!lengthToInsert)
    241         return impl;
    242 
    243     ASSERT(charactersToInsert);
    244     UChar* data; // FIXME: We should be able to create an 8 bit string here.
    245     RELEASE_ASSERT(lengthToInsert <= numeric_limits<unsigned>::max() - impl->length());
    246     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(impl->length() + lengthToInsert, data);
    247 
    248     if (impl->is8Bit())
    249         StringImpl::copyChars(data, impl->characters8(), position);
    250     else
    251         StringImpl::copyChars(data, impl->characters16(), position);
    252 
    253     StringImpl::copyChars(data + position, charactersToInsert, lengthToInsert);
    254 
    255     if (impl->is8Bit())
    256         StringImpl::copyChars(data + position + lengthToInsert, impl->characters8() + position, impl->length() - position);
    257     else
    258         StringImpl::copyChars(data + position + lengthToInsert, impl->characters16() + position, impl->length() - position);
    259 
    260     return newImpl.release();
    261 }
    262 
    263 void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position)
    264 {
    265     if (position >= length()) {
    266         append(charactersToInsert, lengthToInsert);
    267         return;
    268     }
    269     ASSERT(m_impl);
    270     m_impl = insertInternal(m_impl.release(), charactersToInsert, lengthToInsert, position);
    271 }
    272 
    273 void String::insert(const LChar* charactersToInsert, unsigned lengthToInsert, unsigned position)
    274 {
    275     if (position >= length()) {
    276         append(charactersToInsert, lengthToInsert);
    277         return;
    278     }
    279     ASSERT(m_impl);
    280     m_impl = insertInternal(m_impl.release(), charactersToInsert, lengthToInsert, position);
    281 }
    282 
    283 UChar32 String::characterStartingAt(unsigned i) const
    284 {
    285     if (!m_impl || i >= m_impl->length())
    286         return 0;
    287     return m_impl->characterStartingAt(i);
    288 }
    289 
    290 void String::ensure16Bit()
    291 {
    292     unsigned length = this->length();
    293     if (!length || !is8Bit())
    294         return;
    295     m_impl = make16BitFrom8BitSource(m_impl->characters8(), length).impl();
    296 }
    297 
    298 void String::truncate(unsigned position)
    299 {
    300     if (position >= length())
    301         return;
    302     if (m_impl->is8Bit()) {
    303         LChar* data;
    304         RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data);
    305         memcpy(data, m_impl->characters8(), position * sizeof(LChar));
    306         m_impl = newImpl.release();
    307     } else {
    308         UChar* data;
    309         RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data);
    310         memcpy(data, m_impl->characters16(), position * sizeof(UChar));
    311         m_impl = newImpl.release();
    312     }
    313 }
    314 
    315 template <typename CharacterType>
    316 inline void String::removeInternal(const CharacterType* characters, unsigned position, int lengthToRemove)
    317 {
    318     CharacterType* data;
    319     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data);
    320     memcpy(data, characters, position * sizeof(CharacterType));
    321     memcpy(data + position, characters + position + lengthToRemove,
    322         (length() - lengthToRemove - position) * sizeof(CharacterType));
    323 
    324     m_impl = newImpl.release();
    325 }
    326 
    327 void String::remove(unsigned position, int lengthToRemove)
    328 {
    329     if (lengthToRemove <= 0)
    330         return;
    331     if (position >= length())
    332         return;
    333     if (static_cast<unsigned>(lengthToRemove) > length() - position)
    334         lengthToRemove = length() - position;
    335 
    336     if (is8Bit()) {
    337         removeInternal(characters8(), position, lengthToRemove);
    338 
    339         return;
    340     }
    341 
    342     removeInternal(characters16(), position, lengthToRemove);
    343 }
    344 
    345 String String::substring(unsigned pos, unsigned len) const
    346 {
    347     if (!m_impl)
    348         return String();
    349     return m_impl->substring(pos, len);
    350 }
    351 
    352 String String::lower() const
    353 {
    354     if (!m_impl)
    355         return String();
    356     return m_impl->lower();
    357 }
    358 
    359 String String::upper() const
    360 {
    361     if (!m_impl)
    362         return String();
    363     return m_impl->upper();
    364 }
    365 
    366 String String::stripWhiteSpace() const
    367 {
    368     if (!m_impl)
    369         return String();
    370     return m_impl->stripWhiteSpace();
    371 }
    372 
    373 String String::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const
    374 {
    375     if (!m_impl)
    376         return String();
    377     return m_impl->stripWhiteSpace(isWhiteSpace);
    378 }
    379 
    380 String String::simplifyWhiteSpace() const
    381 {
    382     if (!m_impl)
    383         return String();
    384     return m_impl->simplifyWhiteSpace();
    385 }
    386 
    387 String String::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const
    388 {
    389     if (!m_impl)
    390         return String();
    391     return m_impl->simplifyWhiteSpace(isWhiteSpace);
    392 }
    393 
    394 String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const
    395 {
    396     if (!m_impl)
    397         return String();
    398     return m_impl->removeCharacters(findMatch);
    399 }
    400 
    401 String String::foldCase() const
    402 {
    403     if (!m_impl)
    404         return String();
    405     return m_impl->foldCase();
    406 }
    407 
    408 bool String::percentage(int& result) const
    409 {
    410     if (!m_impl || !m_impl->length())
    411         return false;
    412 
    413     if ((*m_impl)[m_impl->length() - 1] != '%')
    414         return false;
    415 
    416     if (m_impl->is8Bit())
    417         result = charactersToIntStrict(m_impl->characters8(), m_impl->length() - 1);
    418     else
    419         result = charactersToIntStrict(m_impl->characters16(), m_impl->length() - 1);
    420 
    421     return true;
    422 }
    423 
    424 Vector<UChar> String::charactersWithNullTermination() const
    425 {
    426     if (!m_impl)
    427         return Vector<UChar>();
    428 
    429     Vector<UChar> result;
    430     result.reserveInitialCapacity(length() + 1);
    431     appendTo(result);
    432     result.append(0);
    433     return result;
    434 }
    435 
    436 unsigned String::copyTo(UChar* buffer, unsigned pos, unsigned maxLength) const
    437 {
    438     unsigned length = this->length();
    439     RELEASE_ASSERT(pos <= length);
    440     unsigned numCharacters = std::min(length - pos, maxLength);
    441     if (!numCharacters)
    442         return 0;
    443     if (is8Bit())
    444         StringImpl::copyChars(buffer, characters8() + pos, numCharacters);
    445     else
    446         StringImpl::copyChars(buffer, characters16() + pos, numCharacters);
    447     return numCharacters;
    448 }
    449 
    450 String String::format(const char *format, ...)
    451 {
    452     va_list args;
    453     va_start(args, format);
    454 
    455     Vector<char, 256> buffer;
    456 
    457     // Do the format once to get the length.
    458 #if COMPILER(MSVC)
    459     int result = _vscprintf(format, args);
    460 #else
    461     char ch;
    462     int result = vsnprintf(&ch, 1, format, args);
    463     // We need to call va_end() and then va_start() again here, as the
    464     // contents of args is undefined after the call to vsnprintf
    465     // according to http://man.cx/snprintf(3)
    466     //
    467     // Not calling va_end/va_start here happens to work on lots of
    468     // systems, but fails e.g. on 64bit Linux.
    469     va_end(args);
    470     va_start(args, format);
    471 #endif
    472 
    473     if (result == 0)
    474         return String("");
    475     if (result < 0)
    476         return String();
    477     unsigned len = result;
    478     buffer.grow(len + 1);
    479 
    480     // Now do the formatting again, guaranteed to fit.
    481     vsnprintf(buffer.data(), buffer.size(), format, args);
    482 
    483     va_end(args);
    484 
    485     return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), len);
    486 }
    487 
    488 String String::number(int number)
    489 {
    490     return numberToStringSigned<String>(number);
    491 }
    492 
    493 String String::number(unsigned int number)
    494 {
    495     return numberToStringUnsigned<String>(number);
    496 }
    497 
    498 String String::number(long number)
    499 {
    500     return numberToStringSigned<String>(number);
    501 }
    502 
    503 String String::number(unsigned long number)
    504 {
    505     return numberToStringUnsigned<String>(number);
    506 }
    507 
    508 String String::number(long long number)
    509 {
    510     return numberToStringSigned<String>(number);
    511 }
    512 
    513 String String::number(unsigned long long number)
    514 {
    515     return numberToStringUnsigned<String>(number);
    516 }
    517 
    518 String String::number(double number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy)
    519 {
    520     NumberToStringBuffer buffer;
    521     return String(numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros));
    522 }
    523 
    524 String String::numberToStringECMAScript(double number)
    525 {
    526     NumberToStringBuffer buffer;
    527     return String(numberToString(number, buffer));
    528 }
    529 
    530 String String::numberToStringFixedWidth(double number, unsigned decimalPlaces)
    531 {
    532     NumberToStringBuffer buffer;
    533     return String(numberToFixedWidthString(number, decimalPlaces, buffer));
    534 }
    535 
    536 int String::toIntStrict(bool* ok, int base) const
    537 {
    538     if (!m_impl) {
    539         if (ok)
    540             *ok = false;
    541         return 0;
    542     }
    543     return m_impl->toIntStrict(ok, base);
    544 }
    545 
    546 unsigned String::toUIntStrict(bool* ok, int base) const
    547 {
    548     if (!m_impl) {
    549         if (ok)
    550             *ok = false;
    551         return 0;
    552     }
    553     return m_impl->toUIntStrict(ok, base);
    554 }
    555 
    556 int64_t String::toInt64Strict(bool* ok, int base) const
    557 {
    558     if (!m_impl) {
    559         if (ok)
    560             *ok = false;
    561         return 0;
    562     }
    563     return m_impl->toInt64Strict(ok, base);
    564 }
    565 
    566 uint64_t String::toUInt64Strict(bool* ok, int base) const
    567 {
    568     if (!m_impl) {
    569         if (ok)
    570             *ok = false;
    571         return 0;
    572     }
    573     return m_impl->toUInt64Strict(ok, base);
    574 }
    575 
    576 intptr_t String::toIntPtrStrict(bool* ok, int base) const
    577 {
    578     if (!m_impl) {
    579         if (ok)
    580             *ok = false;
    581         return 0;
    582     }
    583     return m_impl->toIntPtrStrict(ok, base);
    584 }
    585 
    586 int String::toInt(bool* ok) const
    587 {
    588     if (!m_impl) {
    589         if (ok)
    590             *ok = false;
    591         return 0;
    592     }
    593     return m_impl->toInt(ok);
    594 }
    595 
    596 unsigned String::toUInt(bool* ok) const
    597 {
    598     if (!m_impl) {
    599         if (ok)
    600             *ok = false;
    601         return 0;
    602     }
    603     return m_impl->toUInt(ok);
    604 }
    605 
    606 int64_t String::toInt64(bool* ok) const
    607 {
    608     if (!m_impl) {
    609         if (ok)
    610             *ok = false;
    611         return 0;
    612     }
    613     return m_impl->toInt64(ok);
    614 }
    615 
    616 uint64_t String::toUInt64(bool* ok) const
    617 {
    618     if (!m_impl) {
    619         if (ok)
    620             *ok = false;
    621         return 0;
    622     }
    623     return m_impl->toUInt64(ok);
    624 }
    625 
    626 intptr_t String::toIntPtr(bool* ok) const
    627 {
    628     if (!m_impl) {
    629         if (ok)
    630             *ok = false;
    631         return 0;
    632     }
    633     return m_impl->toIntPtr(ok);
    634 }
    635 
    636 double String::toDouble(bool* ok) const
    637 {
    638     if (!m_impl) {
    639         if (ok)
    640             *ok = false;
    641         return 0.0;
    642     }
    643     return m_impl->toDouble(ok);
    644 }
    645 
    646 float String::toFloat(bool* ok) const
    647 {
    648     if (!m_impl) {
    649         if (ok)
    650             *ok = false;
    651         return 0.0f;
    652     }
    653     return m_impl->toFloat(ok);
    654 }
    655 
    656 String String::isolatedCopy() const
    657 {
    658     if (!m_impl)
    659         return String();
    660     return m_impl->isolatedCopy();
    661 }
    662 
    663 bool String::isSafeToSendToAnotherThread() const
    664 {
    665     if (!impl())
    666         return true;
    667     if (impl()->isStatic())
    668         return true;
    669     // AtomicStrings are not safe to send between threads as ~StringImpl()
    670     // will try to remove them from the wrong AtomicStringTable.
    671     if (impl()->isAtomic())
    672         return false;
    673     if (impl()->hasOneRef())
    674         return true;
    675     return false;
    676 }
    677 
    678 void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const
    679 {
    680     result.clear();
    681 
    682     unsigned startPos = 0;
    683     size_t endPos;
    684     while ((endPos = find(separator, startPos)) != notFound) {
    685         if (allowEmptyEntries || startPos != endPos)
    686             result.append(substring(startPos, endPos - startPos));
    687         startPos = endPos + separator.length();
    688     }
    689     if (allowEmptyEntries || startPos != length())
    690         result.append(substring(startPos));
    691 }
    692 
    693 void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const
    694 {
    695     result.clear();
    696 
    697     unsigned startPos = 0;
    698     size_t endPos;
    699     while ((endPos = find(separator, startPos)) != notFound) {
    700         if (allowEmptyEntries || startPos != endPos)
    701             result.append(substring(startPos, endPos - startPos));
    702         startPos = endPos + 1;
    703     }
    704     if (allowEmptyEntries || startPos != length())
    705         result.append(substring(startPos));
    706 }
    707 
    708 CString String::ascii() const
    709 {
    710     // Printable ASCII characters 32..127 and the null character are
    711     // preserved, characters outside of this range are converted to '?'.
    712 
    713     unsigned length = this->length();
    714     if (!length) {
    715         char* characterBuffer;
    716         return CString::newUninitialized(length, characterBuffer);
    717     }
    718 
    719     if (this->is8Bit()) {
    720         const LChar* characters = this->characters8();
    721 
    722         char* characterBuffer;
    723         CString result = CString::newUninitialized(length, characterBuffer);
    724 
    725         for (unsigned i = 0; i < length; ++i) {
    726             LChar ch = characters[i];
    727             characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
    728         }
    729 
    730         return result;
    731     }
    732 
    733     const UChar* characters = this->characters16();
    734 
    735     char* characterBuffer;
    736     CString result = CString::newUninitialized(length, characterBuffer);
    737 
    738     for (unsigned i = 0; i < length; ++i) {
    739         UChar ch = characters[i];
    740         characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
    741     }
    742 
    743     return result;
    744 }
    745 
    746 CString String::latin1() const
    747 {
    748     // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
    749     // preserved, characters outside of this range are converted to '?'.
    750 
    751     unsigned length = this->length();
    752 
    753     if (!length)
    754         return CString("", 0);
    755 
    756     if (is8Bit())
    757         return CString(reinterpret_cast<const char*>(this->characters8()), length);
    758 
    759     const UChar* characters = this->characters16();
    760 
    761     char* characterBuffer;
    762     CString result = CString::newUninitialized(length, characterBuffer);
    763 
    764     for (unsigned i = 0; i < length; ++i) {
    765         UChar ch = characters[i];
    766         characterBuffer[i] = ch > 0xff ? '?' : ch;
    767     }
    768 
    769     return result;
    770 }
    771 
    772 // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
    773 static inline void putUTF8Triple(char*& buffer, UChar ch)
    774 {
    775     ASSERT(ch >= 0x0800);
    776     *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
    777     *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
    778     *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
    779 }
    780 
    781 CString String::utf8(ConversionMode mode) const
    782 {
    783     unsigned length = this->length();
    784 
    785     if (!length)
    786         return CString("", 0);
    787 
    788     // Allocate a buffer big enough to hold all the characters
    789     // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
    790     // Optimization ideas, if we find this function is hot:
    791     //  * We could speculatively create a CStringBuffer to contain 'length'
    792     //    characters, and resize if necessary (i.e. if the buffer contains
    793     //    non-ascii characters). (Alternatively, scan the buffer first for
    794     //    ascii characters, so we know this will be sufficient).
    795     //  * We could allocate a CStringBuffer with an appropriate size to
    796     //    have a good chance of being able to write the string into the
    797     //    buffer without reallocing (say, 1.5 x length).
    798     if (length > numeric_limits<unsigned>::max() / 3)
    799         return CString();
    800     Vector<char, 1024> bufferVector(length * 3);
    801 
    802     char* buffer = bufferVector.data();
    803 
    804     if (is8Bit()) {
    805         const LChar* characters = this->characters8();
    806 
    807         ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size());
    808         ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion
    809     } else {
    810         const UChar* characters = this->characters16();
    811 
    812         if (mode == StrictConversionReplacingUnpairedSurrogatesWithFFFD) {
    813             const UChar* charactersEnd = characters + length;
    814             char* bufferEnd = buffer + bufferVector.size();
    815             while (characters < charactersEnd) {
    816                 // Use strict conversion to detect unpaired surrogates.
    817                 ConversionResult result = convertUTF16ToUTF8(&characters, charactersEnd, &buffer, bufferEnd, true);
    818                 ASSERT(result != targetExhausted);
    819                 // Conversion fails when there is an unpaired surrogate.
    820                 // Put replacement character (U+FFFD) instead of the unpaired surrogate.
    821                 if (result != conversionOK) {
    822                     ASSERT((0xD800 <= *characters && *characters <= 0xDFFF));
    823                     // There should be room left, since one UChar hasn't been converted.
    824                     ASSERT((buffer + 3) <= bufferEnd);
    825                     putUTF8Triple(buffer, replacementCharacter);
    826                     ++characters;
    827                 }
    828             }
    829         } else {
    830             bool strict = mode == StrictConversion;
    831             ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
    832             ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
    833 
    834             // Only produced from strict conversion.
    835             if (result == sourceIllegal) {
    836                 ASSERT(strict);
    837                 return CString();
    838             }
    839 
    840             // Check for an unconverted high surrogate.
    841             if (result == sourceExhausted) {
    842                 if (strict)
    843                     return CString();
    844                 // This should be one unpaired high surrogate. Treat it the same
    845                 // was as an unpaired high surrogate would have been handled in
    846                 // the middle of a string with non-strict conversion - which is
    847                 // to say, simply encode it to UTF-8.
    848                 ASSERT((characters + 1) == (this->characters16() + length));
    849                 ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
    850                 // There should be room left, since one UChar hasn't been converted.
    851                 ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
    852                 putUTF8Triple(buffer, *characters);
    853             }
    854         }
    855     }
    856 
    857     return CString(bufferVector.data(), buffer - bufferVector.data());
    858 }
    859 
    860 String String::make8BitFrom16BitSource(const UChar* source, size_t length)
    861 {
    862     if (!length)
    863         return String();
    864 
    865     LChar* destination;
    866     String result = String::createUninitialized(length, destination);
    867 
    868     copyLCharsFromUCharSource(destination, source, length);
    869 
    870     return result;
    871 }
    872 
    873 String String::make16BitFrom8BitSource(const LChar* source, size_t length)
    874 {
    875     if (!length)
    876         return String();
    877 
    878     UChar* destination;
    879     String result = String::createUninitialized(length, destination);
    880 
    881     StringImpl::copyChars(destination, source, length);
    882 
    883     return result;
    884 }
    885 
    886 String String::fromUTF8(const LChar* stringStart, size_t length)
    887 {
    888     RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
    889 
    890     if (!stringStart)
    891         return String();
    892 
    893     if (!length)
    894         return emptyString();
    895 
    896     if (charactersAreAllASCII(stringStart, length))
    897         return StringImpl::create(stringStart, length);
    898 
    899     Vector<UChar, 1024> buffer(length);
    900     UChar* bufferStart = buffer.data();
    901 
    902     UChar* bufferCurrent = bufferStart;
    903     const char* stringCurrent = reinterpret_cast<const char*>(stringStart);
    904     if (convertUTF8ToUTF16(&stringCurrent, reinterpret_cast<const char *>(stringStart + length), &bufferCurrent, bufferCurrent + buffer.size()) != conversionOK)
    905         return String();
    906 
    907     unsigned utf16Length = bufferCurrent - bufferStart;
    908     ASSERT(utf16Length < length);
    909     return StringImpl::create(bufferStart, utf16Length);
    910 }
    911 
    912 String String::fromUTF8(const LChar* string)
    913 {
    914     if (!string)
    915         return String();
    916     return fromUTF8(string, strlen(reinterpret_cast<const char*>(string)));
    917 }
    918 
    919 String String::fromUTF8(const CString& s)
    920 {
    921     return fromUTF8(s.data());
    922 }
    923 
    924 String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size)
    925 {
    926     String utf8 = fromUTF8(string, size);
    927     if (!utf8)
    928         return String(string, size);
    929     return utf8;
    930 }
    931 
    932 // String Operations
    933 
    934 static bool isCharacterAllowedInBase(UChar c, int base)
    935 {
    936     if (c > 0x7F)
    937         return false;
    938     if (isASCIIDigit(c))
    939         return c - '0' < base;
    940     if (isASCIIAlpha(c)) {
    941         if (base > 36)
    942             base = 36;
    943         return (c >= 'a' && c < 'a' + base - 10)
    944             || (c >= 'A' && c < 'A' + base - 10);
    945     }
    946     return false;
    947 }
    948 
    949 template <typename IntegralType, typename CharType>
    950 static inline IntegralType toIntegralType(const CharType* data, size_t length, bool* ok, int base)
    951 {
    952     static const IntegralType integralMax = numeric_limits<IntegralType>::max();
    953     static const bool isSigned = numeric_limits<IntegralType>::is_signed;
    954     const IntegralType maxMultiplier = integralMax / base;
    955 
    956     IntegralType value = 0;
    957     bool isOk = false;
    958     bool isNegative = false;
    959 
    960     if (!data)
    961         goto bye;
    962 
    963     // skip leading whitespace
    964     while (length && isSpaceOrNewline(*data)) {
    965         --length;
    966         ++data;
    967     }
    968 
    969     if (isSigned && length && *data == '-') {
    970         --length;
    971         ++data;
    972         isNegative = true;
    973     } else if (length && *data == '+') {
    974         --length;
    975         ++data;
    976     }
    977 
    978     if (!length || !isCharacterAllowedInBase(*data, base))
    979         goto bye;
    980 
    981     while (length && isCharacterAllowedInBase(*data, base)) {
    982         --length;
    983         IntegralType digitValue;
    984         CharType c = *data;
    985         if (isASCIIDigit(c))
    986             digitValue = c - '0';
    987         else if (c >= 'a')
    988             digitValue = c - 'a' + 10;
    989         else
    990             digitValue = c - 'A' + 10;
    991 
    992         if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative))
    993             goto bye;
    994 
    995         value = base * value + digitValue;
    996         ++data;
    997     }
    998 
    999 #if COMPILER(MSVC)
   1000 #pragma warning(push, 0)
   1001 #pragma warning(disable:4146)
   1002 #endif
   1003 
   1004     if (isNegative)
   1005         value = -value;
   1006 
   1007 #if COMPILER(MSVC)
   1008 #pragma warning(pop)
   1009 #endif
   1010 
   1011     // skip trailing space
   1012     while (length && isSpaceOrNewline(*data)) {
   1013         --length;
   1014         ++data;
   1015     }
   1016 
   1017     if (!length)
   1018         isOk = true;
   1019 bye:
   1020     if (ok)
   1021         *ok = isOk;
   1022     return isOk ? value : 0;
   1023 }
   1024 
   1025 template <typename CharType>
   1026 static unsigned lengthOfCharactersAsInteger(const CharType* data, size_t length)
   1027 {
   1028     size_t i = 0;
   1029 
   1030     // Allow leading spaces.
   1031     for (; i != length; ++i) {
   1032         if (!isSpaceOrNewline(data[i]))
   1033             break;
   1034     }
   1035 
   1036     // Allow sign.
   1037     if (i != length && (data[i] == '+' || data[i] == '-'))
   1038         ++i;
   1039 
   1040     // Allow digits.
   1041     for (; i != length; ++i) {
   1042         if (!isASCIIDigit(data[i]))
   1043             break;
   1044     }
   1045 
   1046     return i;
   1047 }
   1048 
   1049 int charactersToIntStrict(const LChar* data, size_t length, bool* ok, int base)
   1050 {
   1051     return toIntegralType<int, LChar>(data, length, ok, base);
   1052 }
   1053 
   1054 int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
   1055 {
   1056     return toIntegralType<int, UChar>(data, length, ok, base);
   1057 }
   1058 
   1059 unsigned charactersToUIntStrict(const LChar* data, size_t length, bool* ok, int base)
   1060 {
   1061     return toIntegralType<unsigned, LChar>(data, length, ok, base);
   1062 }
   1063 
   1064 unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
   1065 {
   1066     return toIntegralType<unsigned, UChar>(data, length, ok, base);
   1067 }
   1068 
   1069 int64_t charactersToInt64Strict(const LChar* data, size_t length, bool* ok, int base)
   1070 {
   1071     return toIntegralType<int64_t, LChar>(data, length, ok, base);
   1072 }
   1073 
   1074 int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
   1075 {
   1076     return toIntegralType<int64_t, UChar>(data, length, ok, base);
   1077 }
   1078 
   1079 uint64_t charactersToUInt64Strict(const LChar* data, size_t length, bool* ok, int base)
   1080 {
   1081     return toIntegralType<uint64_t, LChar>(data, length, ok, base);
   1082 }
   1083 
   1084 uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base)
   1085 {
   1086     return toIntegralType<uint64_t, UChar>(data, length, ok, base);
   1087 }
   1088 
   1089 intptr_t charactersToIntPtrStrict(const LChar* data, size_t length, bool* ok, int base)
   1090 {
   1091     return toIntegralType<intptr_t, LChar>(data, length, ok, base);
   1092 }
   1093 
   1094 intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base)
   1095 {
   1096     return toIntegralType<intptr_t, UChar>(data, length, ok, base);
   1097 }
   1098 
   1099 int charactersToInt(const LChar* data, size_t length, bool* ok)
   1100 {
   1101     return toIntegralType<int, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
   1102 }
   1103 
   1104 int charactersToInt(const UChar* data, size_t length, bool* ok)
   1105 {
   1106     return toIntegralType<int, UChar>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
   1107 }
   1108 
   1109 unsigned charactersToUInt(const LChar* data, size_t length, bool* ok)
   1110 {
   1111     return toIntegralType<unsigned, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
   1112 }
   1113 
   1114 unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
   1115 {
   1116     return toIntegralType<unsigned, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
   1117 }
   1118 
   1119 int64_t charactersToInt64(const LChar* data, size_t length, bool* ok)
   1120 {
   1121     return toIntegralType<int64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
   1122 }
   1123 
   1124 int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
   1125 {
   1126     return toIntegralType<int64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
   1127 }
   1128 
   1129 uint64_t charactersToUInt64(const LChar* data, size_t length, bool* ok)
   1130 {
   1131     return toIntegralType<uint64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
   1132 }
   1133 
   1134 uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
   1135 {
   1136     return toIntegralType<uint64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
   1137 }
   1138 
   1139 intptr_t charactersToIntPtr(const LChar* data, size_t length, bool* ok)
   1140 {
   1141     return toIntegralType<intptr_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
   1142 }
   1143 
   1144 intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok)
   1145 {
   1146     return toIntegralType<intptr_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
   1147 }
   1148 
   1149 enum TrailingJunkPolicy { DisallowTrailingJunk, AllowTrailingJunk };
   1150 
   1151 template <typename CharType, TrailingJunkPolicy policy>
   1152 static inline double toDoubleType(const CharType* data, size_t length, bool* ok, size_t& parsedLength)
   1153 {
   1154     size_t leadingSpacesLength = 0;
   1155     while (leadingSpacesLength < length && isASCIISpace(data[leadingSpacesLength]))
   1156         ++leadingSpacesLength;
   1157 
   1158     double number = parseDouble(data + leadingSpacesLength, length - leadingSpacesLength, parsedLength);
   1159     if (!parsedLength) {
   1160         if (ok)
   1161             *ok = false;
   1162         return 0.0;
   1163     }
   1164 
   1165     parsedLength += leadingSpacesLength;
   1166     if (ok)
   1167         *ok = policy == AllowTrailingJunk || parsedLength == length;
   1168     return number;
   1169 }
   1170 
   1171 double charactersToDouble(const LChar* data, size_t length, bool* ok)
   1172 {
   1173     size_t parsedLength;
   1174     return toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength);
   1175 }
   1176 
   1177 double charactersToDouble(const UChar* data, size_t length, bool* ok)
   1178 {
   1179     size_t parsedLength;
   1180     return toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength);
   1181 }
   1182 
   1183 float charactersToFloat(const LChar* data, size_t length, bool* ok)
   1184 {
   1185     // FIXME: This will return ok even when the string fits into a double but not a float.
   1186     size_t parsedLength;
   1187     return static_cast<float>(toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength));
   1188 }
   1189 
   1190 float charactersToFloat(const UChar* data, size_t length, bool* ok)
   1191 {
   1192     // FIXME: This will return ok even when the string fits into a double but not a float.
   1193     size_t parsedLength;
   1194     return static_cast<float>(toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength));
   1195 }
   1196 
   1197 float charactersToFloat(const LChar* data, size_t length, size_t& parsedLength)
   1198 {
   1199     // FIXME: This will return ok even when the string fits into a double but not a float.
   1200     return static_cast<float>(toDoubleType<LChar, AllowTrailingJunk>(data, length, 0, parsedLength));
   1201 }
   1202 
   1203 float charactersToFloat(const UChar* data, size_t length, size_t& parsedLength)
   1204 {
   1205     // FIXME: This will return ok even when the string fits into a double but not a float.
   1206     return static_cast<float>(toDoubleType<UChar, AllowTrailingJunk>(data, length, 0, parsedLength));
   1207 }
   1208 
   1209 const String& emptyString()
   1210 {
   1211     DEFINE_STATIC_LOCAL(String, emptyString, (StringImpl::empty()));
   1212     return emptyString;
   1213 }
   1214 
   1215 } // namespace WTF
   1216 
   1217 #ifndef NDEBUG
   1218 // For use in the debugger
   1219 String* string(const char*);
   1220 Vector<char> asciiDebug(StringImpl* impl);
   1221 Vector<char> asciiDebug(String& string);
   1222 
   1223 void String::show() const
   1224 {
   1225     dataLogF("%s\n", asciiDebug(impl()).data());
   1226 }
   1227 
   1228 String* string(const char* s)
   1229 {
   1230     // leaks memory!
   1231     return new String(s);
   1232 }
   1233 
   1234 Vector<char> asciiDebug(StringImpl* impl)
   1235 {
   1236     if (!impl)
   1237         return asciiDebug(String("[null]").impl());
   1238 
   1239     Vector<char> buffer;
   1240     for (unsigned i = 0; i < impl->length(); ++i) {
   1241         UChar ch = (*impl)[i];
   1242         if (isASCIIPrintable(ch)) {
   1243             if (ch == '\\')
   1244                 buffer.append(ch);
   1245             buffer.append(ch);
   1246         } else {
   1247             buffer.append('\\');
   1248             buffer.append('u');
   1249             appendUnsignedAsHexFixedSize(ch, buffer, 4);
   1250         }
   1251     }
   1252     buffer.append('\0');
   1253     return buffer;
   1254 }
   1255 
   1256 Vector<char> asciiDebug(String& string)
   1257 {
   1258     return asciiDebug(string.impl());
   1259 }
   1260 
   1261 #endif
   1262