Home | History | Annotate | Download | only in text
      1 /*
      2  * (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 2012 Apple Inc. All rights reserved.
      4  * Copyright (C) 2007-2009 Torch Mobile, Inc.
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Library General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Library General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Library General Public License
     17  * along with this library; see the file COPYING.LIB.  If not, write to
     18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  * Boston, MA 02110-1301, USA.
     20  */
     21 
     22 #include "config.h"
     23 #include "WTFString.h"
     24 
     25 #include "IntegerToStringConversion.h"
     26 #include <stdarg.h>
     27 #include "wtf/ASCIICType.h"
     28 #include "wtf/DataLog.h"
     29 #include "wtf/HexNumber.h"
     30 #include "wtf/MathExtras.h"
     31 #include "wtf/text/CString.h"
     32 #include "wtf/StringExtras.h"
     33 #include "wtf/Vector.h"
     34 #include "wtf/dtoa.h"
     35 #include "wtf/unicode/CharacterNames.h"
     36 #include "wtf/unicode/UTF8.h"
     37 #include "wtf/unicode/Unicode.h"
     38 
     39 using namespace std;
     40 
     41 namespace WTF {
     42 
     43 using namespace Unicode;
     44 using namespace std;
     45 
     46 // Construct a string with UTF-16 data.
     47 String::String(const UChar* characters, unsigned length)
     48     : m_impl(characters ? StringImpl::create(characters, length) : nullptr)
     49 {
     50 }
     51 
     52 // Construct a string with UTF-16 data, from a null-terminated source.
     53 String::String(const UChar* str)
     54 {
     55     if (!str)
     56         return;
     57     m_impl = StringImpl::create(str, lengthOfNullTerminatedString(str));
     58 }
     59 
     60 // Construct a string with latin1 data.
     61 String::String(const LChar* characters, unsigned length)
     62     : m_impl(characters ? StringImpl::create(characters, length) : nullptr)
     63 {
     64 }
     65 
     66 String::String(const char* characters, unsigned length)
     67     : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(characters), length) : nullptr)
     68 {
     69 }
     70 
     71 // Construct a string with latin1 data, from a null-terminated source.
     72 String::String(const LChar* characters)
     73     : m_impl(characters ? StringImpl::create(characters) : nullptr)
     74 {
     75 }
     76 
     77 String::String(const char* characters)
     78     : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(characters)) : nullptr)
     79 {
     80 }
     81 
     82 void String::append(const String& string)
     83 {
     84     if (string.isEmpty())
     85         return;
     86     if (!m_impl) {
     87         m_impl = string.m_impl;
     88         return;
     89     }
     90 
     91     // FIXME: This is extremely inefficient. So much so that we might want to take this
     92     // out of String's API. We can make it better by optimizing the case where exactly
     93     // one String is pointing at this StringImpl, but even then it's going to require a
     94     // call into the allocator every single time.
     95 
     96     if (m_impl->is8Bit() && string.m_impl->is8Bit()) {
     97         LChar* data;
     98         RELEASE_ASSERT(string.length() <= numeric_limits<unsigned>::max() - m_impl->length());
     99         RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + string.length(), data);
    100         memcpy(data, m_impl->characters8(), m_impl->length() * sizeof(LChar));
    101         memcpy(data + m_impl->length(), string.characters8(), string.length() * sizeof(LChar));
    102         m_impl = newImpl.release();
    103         return;
    104     }
    105 
    106     UChar* data;
    107     RELEASE_ASSERT(string.length() <= numeric_limits<unsigned>::max() - m_impl->length());
    108     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + string.length(), data);
    109 
    110     if (m_impl->is8Bit())
    111         StringImpl::copyChars(data, m_impl->characters8(), m_impl->length());
    112     else
    113         StringImpl::copyChars(data, m_impl->characters16(), m_impl->length());
    114 
    115     if (string.impl()->is8Bit())
    116         StringImpl::copyChars(data + m_impl->length(), string.impl()->characters8(), string.impl()->length());
    117     else
    118         StringImpl::copyChars(data + m_impl->length(), string.impl()->characters16(), string.impl()->length());
    119 
    120     m_impl = newImpl.release();
    121 }
    122 
    123 template <typename CharacterType>
    124 inline void String::appendInternal(CharacterType c)
    125 {
    126     // FIXME: This is extremely inefficient. So much so that we might want to take this
    127     // out of String's API. We can make it better by optimizing the case where exactly
    128     // one String is pointing at this StringImpl, but even then it's going to require a
    129     // call into the allocator every single time.
    130     if (!m_impl) {
    131         m_impl = StringImpl::create(&c, 1);
    132         return;
    133     }
    134 
    135     UChar* data; // FIXME: We should be able to create an 8 bit string via this code path.
    136     RELEASE_ASSERT(m_impl->length() < numeric_limits<unsigned>::max());
    137     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
    138     if (m_impl->is8Bit())
    139         StringImpl::copyChars(data, m_impl->characters8(), m_impl->length());
    140     else
    141         StringImpl::copyChars(data, m_impl->characters16(), m_impl->length());
    142     data[m_impl->length()] = c;
    143     m_impl = newImpl.release();
    144 }
    145 
    146 void String::append(LChar c)
    147 {
    148     appendInternal(c);
    149 }
    150 
    151 void String::append(UChar c)
    152 {
    153     appendInternal(c);
    154 }
    155 
    156 int codePointCompare(const String& a, const String& b)
    157 {
    158     return codePointCompare(a.impl(), b.impl());
    159 }
    160 
    161 void String::insert(const String& string, unsigned position)
    162 {
    163     if (string.isEmpty()) {
    164         if (string.isNull())
    165             return;
    166         if (isNull())
    167             m_impl = string.impl();
    168         return;
    169     }
    170 
    171     if (string.is8Bit())
    172         insert(string.impl()->characters8(), string.length(), position);
    173     else
    174         insert(string.impl()->characters16(), string.length(), position);
    175 }
    176 
    177 void String::append(const LChar* charactersToAppend, unsigned lengthToAppend)
    178 {
    179     if (!m_impl) {
    180         if (!charactersToAppend)
    181             return;
    182         m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
    183         return;
    184     }
    185 
    186     if (!lengthToAppend)
    187         return;
    188 
    189     ASSERT(charactersToAppend);
    190 
    191     unsigned strLength = m_impl->length();
    192 
    193     if (m_impl->is8Bit()) {
    194         RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength);
    195         LChar* data;
    196         RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
    197         StringImpl::copyChars(data, m_impl->characters8(), strLength);
    198         StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend);
    199         m_impl = newImpl.release();
    200         return;
    201     }
    202 
    203     RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength);
    204     UChar* data;
    205     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data);
    206     StringImpl::copyChars(data, m_impl->characters16(), strLength);
    207     StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend);
    208     m_impl = newImpl.release();
    209 }
    210 
    211 void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
    212 {
    213     if (!m_impl) {
    214         if (!charactersToAppend)
    215             return;
    216         m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
    217         return;
    218     }
    219 
    220     if (!lengthToAppend)
    221         return;
    222 
    223     unsigned strLength = m_impl->length();
    224 
    225     ASSERT(charactersToAppend);
    226     RELEASE_ASSERT(lengthToAppend <= numeric_limits<unsigned>::max() - strLength);
    227     UChar* data;
    228     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(strLength + lengthToAppend, data);
    229     if (m_impl->is8Bit())
    230         StringImpl::copyChars(data, characters8(), strLength);
    231     else
    232         StringImpl::copyChars(data, characters16(), strLength);
    233     StringImpl::copyChars(data + strLength, charactersToAppend, lengthToAppend);
    234     m_impl = newImpl.release();
    235 }
    236 
    237 template<typename CharType>
    238 PassRefPtr<StringImpl> insertInternal(PassRefPtr<StringImpl> impl, const CharType* charactersToInsert, unsigned lengthToInsert, unsigned position)
    239 {
    240     if (!lengthToInsert)
    241         return impl;
    242 
    243     ASSERT(charactersToInsert);
    244     UChar* data; // FIXME: We should be able to create an 8 bit string here.
    245     RELEASE_ASSERT(lengthToInsert <= numeric_limits<unsigned>::max() - impl->length());
    246     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(impl->length() + lengthToInsert, data);
    247 
    248     if (impl->is8Bit())
    249         StringImpl::copyChars(data, impl->characters8(), position);
    250     else
    251         StringImpl::copyChars(data, impl->characters16(), position);
    252 
    253     StringImpl::copyChars(data + position, charactersToInsert, lengthToInsert);
    254 
    255     if (impl->is8Bit())
    256         StringImpl::copyChars(data + position + lengthToInsert, impl->characters8() + position, impl->length() - position);
    257     else
    258         StringImpl::copyChars(data + position + lengthToInsert, impl->characters16() + position, impl->length() - position);
    259 
    260     return newImpl.release();
    261 }
    262 
    263 void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position)
    264 {
    265     if (position >= length()) {
    266         append(charactersToInsert, lengthToInsert);
    267         return;
    268     }
    269     ASSERT(m_impl);
    270     m_impl = insertInternal(m_impl.release(), charactersToInsert, lengthToInsert, position);
    271 }
    272 
    273 void String::insert(const LChar* charactersToInsert, unsigned lengthToInsert, unsigned position)
    274 {
    275     if (position >= length()) {
    276         append(charactersToInsert, lengthToInsert);
    277         return;
    278     }
    279     ASSERT(m_impl);
    280     m_impl = insertInternal(m_impl.release(), charactersToInsert, lengthToInsert, position);
    281 }
    282 
    283 UChar32 String::characterStartingAt(unsigned i) const
    284 {
    285     if (!m_impl || i >= m_impl->length())
    286         return 0;
    287     return m_impl->characterStartingAt(i);
    288 }
    289 
    290 void String::ensure16Bit()
    291 {
    292     unsigned length = this->length();
    293     if (!length || !is8Bit())
    294         return;
    295     m_impl = make16BitFrom8BitSource(m_impl->characters8(), length).impl();
    296 }
    297 
    298 void String::truncate(unsigned position)
    299 {
    300     if (position >= length())
    301         return;
    302     if (m_impl->is8Bit()) {
    303         LChar* data;
    304         RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data);
    305         memcpy(data, m_impl->characters8(), position * sizeof(LChar));
    306         m_impl = newImpl.release();
    307     } else {
    308         UChar* data;
    309         RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data);
    310         memcpy(data, m_impl->characters16(), position * sizeof(UChar));
    311         m_impl = newImpl.release();
    312     }
    313 }
    314 
    315 template <typename CharacterType>
    316 inline void String::removeInternal(const CharacterType* characters, unsigned position, int lengthToRemove)
    317 {
    318     CharacterType* data;
    319     RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data);
    320     memcpy(data, characters, position * sizeof(CharacterType));
    321     memcpy(data + position, characters + position + lengthToRemove,
    322         (length() - lengthToRemove - position) * sizeof(CharacterType));
    323 
    324     m_impl = newImpl.release();
    325 }
    326 
    327 void String::remove(unsigned position, int lengthToRemove)
    328 {
    329     if (lengthToRemove <= 0)
    330         return;
    331     if (position >= length())
    332         return;
    333     if (static_cast<unsigned>(lengthToRemove) > length() - position)
    334         lengthToRemove = length() - position;
    335 
    336     if (is8Bit()) {
    337         removeInternal(characters8(), position, lengthToRemove);
    338 
    339         return;
    340     }
    341 
    342     removeInternal(characters16(), position, lengthToRemove);
    343 }
    344 
    345 String String::substring(unsigned pos, unsigned len) const
    346 {
    347     if (!m_impl)
    348         return String();
    349     return m_impl->substring(pos, len);
    350 }
    351 
    352 String String::lower() const
    353 {
    354     if (!m_impl)
    355         return String();
    356     return m_impl->lower();
    357 }
    358 
    359 String String::upper() const
    360 {
    361     if (!m_impl)
    362         return String();
    363     return m_impl->upper();
    364 }
    365 
    366 String String::lower(const AtomicString& localeIdentifier) const
    367 {
    368     if (!m_impl)
    369         return String();
    370     return m_impl->lower(localeIdentifier);
    371 }
    372 
    373 String String::upper(const AtomicString& localeIdentifier) const
    374 {
    375     if (!m_impl)
    376         return String();
    377     return m_impl->upper(localeIdentifier);
    378 }
    379 
    380 String String::stripWhiteSpace() const
    381 {
    382     if (!m_impl)
    383         return String();
    384     return m_impl->stripWhiteSpace();
    385 }
    386 
    387 String String::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace) const
    388 {
    389     if (!m_impl)
    390         return String();
    391     return m_impl->stripWhiteSpace(isWhiteSpace);
    392 }
    393 
    394 String String::simplifyWhiteSpace(StripBehavior stripBehavior) const
    395 {
    396     if (!m_impl)
    397         return String();
    398     return m_impl->simplifyWhiteSpace(stripBehavior);
    399 }
    400 
    401 String String::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, StripBehavior stripBehavior) const
    402 {
    403     if (!m_impl)
    404         return String();
    405     return m_impl->simplifyWhiteSpace(isWhiteSpace, stripBehavior);
    406 }
    407 
    408 String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const
    409 {
    410     if (!m_impl)
    411         return String();
    412     return m_impl->removeCharacters(findMatch);
    413 }
    414 
    415 String String::foldCase() const
    416 {
    417     if (!m_impl)
    418         return String();
    419     return m_impl->foldCase();
    420 }
    421 
    422 bool String::percentage(int& result) const
    423 {
    424     if (!m_impl || !m_impl->length())
    425         return false;
    426 
    427     if ((*m_impl)[m_impl->length() - 1] != '%')
    428         return false;
    429 
    430     if (m_impl->is8Bit())
    431         result = charactersToIntStrict(m_impl->characters8(), m_impl->length() - 1);
    432     else
    433         result = charactersToIntStrict(m_impl->characters16(), m_impl->length() - 1);
    434 
    435     return true;
    436 }
    437 
    438 Vector<UChar> String::charactersWithNullTermination() const
    439 {
    440     if (!m_impl)
    441         return Vector<UChar>();
    442 
    443     Vector<UChar> result;
    444     result.reserveInitialCapacity(length() + 1);
    445     appendTo(result);
    446     result.append(0);
    447     return result;
    448 }
    449 
    450 unsigned String::copyTo(UChar* buffer, unsigned pos, unsigned maxLength) const
    451 {
    452     unsigned length = this->length();
    453     RELEASE_ASSERT(pos <= length);
    454     unsigned numCharacters = std::min(length - pos, maxLength);
    455     if (!numCharacters)
    456         return 0;
    457     if (is8Bit())
    458         StringImpl::copyChars(buffer, characters8() + pos, numCharacters);
    459     else
    460         StringImpl::copyChars(buffer, characters16() + pos, numCharacters);
    461     return numCharacters;
    462 }
    463 
    464 String String::format(const char *format, ...)
    465 {
    466     va_list args;
    467     va_start(args, format);
    468 
    469     Vector<char, 256> buffer;
    470 
    471     // Do the format once to get the length.
    472 #if COMPILER(MSVC)
    473     int result = _vscprintf(format, args);
    474 #else
    475     char ch;
    476     int result = vsnprintf(&ch, 1, format, args);
    477     // We need to call va_end() and then va_start() again here, as the
    478     // contents of args is undefined after the call to vsnprintf
    479     // according to http://man.cx/snprintf(3)
    480     //
    481     // Not calling va_end/va_start here happens to work on lots of
    482     // systems, but fails e.g. on 64bit Linux.
    483     va_end(args);
    484     va_start(args, format);
    485 #endif
    486 
    487     if (result == 0)
    488         return String("");
    489     if (result < 0)
    490         return String();
    491     unsigned len = result;
    492     buffer.grow(len + 1);
    493 
    494     // Now do the formatting again, guaranteed to fit.
    495     vsnprintf(buffer.data(), buffer.size(), format, args);
    496 
    497     va_end(args);
    498 
    499     return StringImpl::create(reinterpret_cast<const LChar*>(buffer.data()), len);
    500 }
    501 
    502 String String::number(int number)
    503 {
    504     return numberToStringSigned<String>(number);
    505 }
    506 
    507 String String::number(unsigned number)
    508 {
    509     return numberToStringUnsigned<String>(number);
    510 }
    511 
    512 String String::number(long number)
    513 {
    514     return numberToStringSigned<String>(number);
    515 }
    516 
    517 String String::number(unsigned long number)
    518 {
    519     return numberToStringUnsigned<String>(number);
    520 }
    521 
    522 String String::number(long long number)
    523 {
    524     return numberToStringSigned<String>(number);
    525 }
    526 
    527 String String::number(unsigned long long number)
    528 {
    529     return numberToStringUnsigned<String>(number);
    530 }
    531 
    532 String String::number(double number, unsigned precision, TrailingZerosTruncatingPolicy trailingZerosTruncatingPolicy)
    533 {
    534     NumberToStringBuffer buffer;
    535     return String(numberToFixedPrecisionString(number, precision, buffer, trailingZerosTruncatingPolicy == TruncateTrailingZeros));
    536 }
    537 
    538 String String::numberToStringECMAScript(double number)
    539 {
    540     NumberToStringBuffer buffer;
    541     return String(numberToString(number, buffer));
    542 }
    543 
    544 String String::numberToStringFixedWidth(double number, unsigned decimalPlaces)
    545 {
    546     NumberToStringBuffer buffer;
    547     return String(numberToFixedWidthString(number, decimalPlaces, buffer));
    548 }
    549 
    550 int String::toIntStrict(bool* ok, int base) const
    551 {
    552     if (!m_impl) {
    553         if (ok)
    554             *ok = false;
    555         return 0;
    556     }
    557     return m_impl->toIntStrict(ok, base);
    558 }
    559 
    560 unsigned String::toUIntStrict(bool* ok, int base) const
    561 {
    562     if (!m_impl) {
    563         if (ok)
    564             *ok = false;
    565         return 0;
    566     }
    567     return m_impl->toUIntStrict(ok, base);
    568 }
    569 
    570 int64_t String::toInt64Strict(bool* ok, int base) const
    571 {
    572     if (!m_impl) {
    573         if (ok)
    574             *ok = false;
    575         return 0;
    576     }
    577     return m_impl->toInt64Strict(ok, base);
    578 }
    579 
    580 uint64_t String::toUInt64Strict(bool* ok, int base) const
    581 {
    582     if (!m_impl) {
    583         if (ok)
    584             *ok = false;
    585         return 0;
    586     }
    587     return m_impl->toUInt64Strict(ok, base);
    588 }
    589 
    590 intptr_t String::toIntPtrStrict(bool* ok, int base) const
    591 {
    592     if (!m_impl) {
    593         if (ok)
    594             *ok = false;
    595         return 0;
    596     }
    597     return m_impl->toIntPtrStrict(ok, base);
    598 }
    599 
    600 int String::toInt(bool* ok) const
    601 {
    602     if (!m_impl) {
    603         if (ok)
    604             *ok = false;
    605         return 0;
    606     }
    607     return m_impl->toInt(ok);
    608 }
    609 
    610 unsigned String::toUInt(bool* ok) const
    611 {
    612     if (!m_impl) {
    613         if (ok)
    614             *ok = false;
    615         return 0;
    616     }
    617     return m_impl->toUInt(ok);
    618 }
    619 
    620 int64_t String::toInt64(bool* ok) const
    621 {
    622     if (!m_impl) {
    623         if (ok)
    624             *ok = false;
    625         return 0;
    626     }
    627     return m_impl->toInt64(ok);
    628 }
    629 
    630 uint64_t String::toUInt64(bool* ok) const
    631 {
    632     if (!m_impl) {
    633         if (ok)
    634             *ok = false;
    635         return 0;
    636     }
    637     return m_impl->toUInt64(ok);
    638 }
    639 
    640 intptr_t String::toIntPtr(bool* ok) const
    641 {
    642     if (!m_impl) {
    643         if (ok)
    644             *ok = false;
    645         return 0;
    646     }
    647     return m_impl->toIntPtr(ok);
    648 }
    649 
    650 double String::toDouble(bool* ok) const
    651 {
    652     if (!m_impl) {
    653         if (ok)
    654             *ok = false;
    655         return 0.0;
    656     }
    657     return m_impl->toDouble(ok);
    658 }
    659 
    660 float String::toFloat(bool* ok) const
    661 {
    662     if (!m_impl) {
    663         if (ok)
    664             *ok = false;
    665         return 0.0f;
    666     }
    667     return m_impl->toFloat(ok);
    668 }
    669 
    670 String String::isolatedCopy() const
    671 {
    672     if (!m_impl)
    673         return String();
    674     return m_impl->isolatedCopy();
    675 }
    676 
    677 bool String::isSafeToSendToAnotherThread() const
    678 {
    679     if (!impl())
    680         return true;
    681     if (impl()->isStatic())
    682         return true;
    683     // AtomicStrings are not safe to send between threads as ~StringImpl()
    684     // will try to remove them from the wrong AtomicStringTable.
    685     if (impl()->isAtomic())
    686         return false;
    687     if (impl()->hasOneRef())
    688         return true;
    689     return false;
    690 }
    691 
    692 void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const
    693 {
    694     result.clear();
    695 
    696     unsigned startPos = 0;
    697     size_t endPos;
    698     while ((endPos = find(separator, startPos)) != kNotFound) {
    699         if (allowEmptyEntries || startPos != endPos)
    700             result.append(substring(startPos, endPos - startPos));
    701         startPos = endPos + separator.length();
    702     }
    703     if (allowEmptyEntries || startPos != length())
    704         result.append(substring(startPos));
    705 }
    706 
    707 void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const
    708 {
    709     result.clear();
    710 
    711     unsigned startPos = 0;
    712     size_t endPos;
    713     while ((endPos = find(separator, startPos)) != kNotFound) {
    714         if (allowEmptyEntries || startPos != endPos)
    715             result.append(substring(startPos, endPos - startPos));
    716         startPos = endPos + 1;
    717     }
    718     if (allowEmptyEntries || startPos != length())
    719         result.append(substring(startPos));
    720 }
    721 
    722 CString String::ascii() const
    723 {
    724     // Printable ASCII characters 32..127 and the null character are
    725     // preserved, characters outside of this range are converted to '?'.
    726 
    727     unsigned length = this->length();
    728     if (!length) {
    729         char* characterBuffer;
    730         return CString::newUninitialized(length, characterBuffer);
    731     }
    732 
    733     if (this->is8Bit()) {
    734         const LChar* characters = this->characters8();
    735 
    736         char* characterBuffer;
    737         CString result = CString::newUninitialized(length, characterBuffer);
    738 
    739         for (unsigned i = 0; i < length; ++i) {
    740             LChar ch = characters[i];
    741             characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
    742         }
    743 
    744         return result;
    745     }
    746 
    747     const UChar* characters = this->characters16();
    748 
    749     char* characterBuffer;
    750     CString result = CString::newUninitialized(length, characterBuffer);
    751 
    752     for (unsigned i = 0; i < length; ++i) {
    753         UChar ch = characters[i];
    754         characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
    755     }
    756 
    757     return result;
    758 }
    759 
    760 CString String::latin1() const
    761 {
    762     // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
    763     // preserved, characters outside of this range are converted to '?'.
    764 
    765     unsigned length = this->length();
    766 
    767     if (!length)
    768         return CString("", 0);
    769 
    770     if (is8Bit())
    771         return CString(reinterpret_cast<const char*>(this->characters8()), length);
    772 
    773     const UChar* characters = this->characters16();
    774 
    775     char* characterBuffer;
    776     CString result = CString::newUninitialized(length, characterBuffer);
    777 
    778     for (unsigned i = 0; i < length; ++i) {
    779         UChar ch = characters[i];
    780         characterBuffer[i] = ch > 0xff ? '?' : ch;
    781     }
    782 
    783     return result;
    784 }
    785 
    786 // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
    787 static inline void putUTF8Triple(char*& buffer, UChar ch)
    788 {
    789     ASSERT(ch >= 0x0800);
    790     *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
    791     *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
    792     *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
    793 }
    794 
    795 CString String::utf8(UTF8ConversionMode mode) const
    796 {
    797     unsigned length = this->length();
    798 
    799     if (!length)
    800         return CString("", 0);
    801 
    802     // Allocate a buffer big enough to hold all the characters
    803     // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
    804     // Optimization ideas, if we find this function is hot:
    805     //  * We could speculatively create a CStringBuffer to contain 'length'
    806     //    characters, and resize if necessary (i.e. if the buffer contains
    807     //    non-ascii characters). (Alternatively, scan the buffer first for
    808     //    ascii characters, so we know this will be sufficient).
    809     //  * We could allocate a CStringBuffer with an appropriate size to
    810     //    have a good chance of being able to write the string into the
    811     //    buffer without reallocing (say, 1.5 x length).
    812     if (length > numeric_limits<unsigned>::max() / 3)
    813         return CString();
    814     Vector<char, 1024> bufferVector(length * 3);
    815 
    816     char* buffer = bufferVector.data();
    817 
    818     if (is8Bit()) {
    819         const LChar* characters = this->characters8();
    820 
    821         ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size());
    822         ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion
    823     } else {
    824         const UChar* characters = this->characters16();
    825 
    826         if (mode == StrictUTF8ConversionReplacingUnpairedSurrogatesWithFFFD) {
    827             const UChar* charactersEnd = characters + length;
    828             char* bufferEnd = buffer + bufferVector.size();
    829             while (characters < charactersEnd) {
    830                 // Use strict conversion to detect unpaired surrogates.
    831                 ConversionResult result = convertUTF16ToUTF8(&characters, charactersEnd, &buffer, bufferEnd, true);
    832                 ASSERT(result != targetExhausted);
    833                 // Conversion fails when there is an unpaired surrogate.
    834                 // Put replacement character (U+FFFD) instead of the unpaired surrogate.
    835                 if (result != conversionOK) {
    836                     ASSERT((0xD800 <= *characters && *characters <= 0xDFFF));
    837                     // There should be room left, since one UChar hasn't been converted.
    838                     ASSERT((buffer + 3) <= bufferEnd);
    839                     putUTF8Triple(buffer, replacementCharacter);
    840                     ++characters;
    841                 }
    842             }
    843         } else {
    844             bool strict = mode == StrictUTF8Conversion;
    845             ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
    846             ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
    847 
    848             // Only produced from strict conversion.
    849             if (result == sourceIllegal) {
    850                 ASSERT(strict);
    851                 return CString();
    852             }
    853 
    854             // Check for an unconverted high surrogate.
    855             if (result == sourceExhausted) {
    856                 if (strict)
    857                     return CString();
    858                 // This should be one unpaired high surrogate. Treat it the same
    859                 // was as an unpaired high surrogate would have been handled in
    860                 // the middle of a string with non-strict conversion - which is
    861                 // to say, simply encode it to UTF-8.
    862                 ASSERT((characters + 1) == (this->characters16() + length));
    863                 ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
    864                 // There should be room left, since one UChar hasn't been converted.
    865                 ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
    866                 putUTF8Triple(buffer, *characters);
    867             }
    868         }
    869     }
    870 
    871     return CString(bufferVector.data(), buffer - bufferVector.data());
    872 }
    873 
    874 String String::make8BitFrom16BitSource(const UChar* source, size_t length)
    875 {
    876     if (!length)
    877         return emptyString();
    878 
    879     LChar* destination;
    880     String result = String::createUninitialized(length, destination);
    881 
    882     copyLCharsFromUCharSource(destination, source, length);
    883 
    884     return result;
    885 }
    886 
    887 String String::make16BitFrom8BitSource(const LChar* source, size_t length)
    888 {
    889     if (!length)
    890         return emptyString16Bit();
    891 
    892     UChar* destination;
    893     String result = String::createUninitialized(length, destination);
    894 
    895     StringImpl::copyChars(destination, source, length);
    896 
    897     return result;
    898 }
    899 
    900 String String::fromUTF8(const LChar* stringStart, size_t length)
    901 {
    902     RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
    903 
    904     if (!stringStart)
    905         return String();
    906 
    907     if (!length)
    908         return emptyString();
    909 
    910     if (charactersAreAllASCII(stringStart, length))
    911         return StringImpl::create(stringStart, length);
    912 
    913     Vector<UChar, 1024> buffer(length);
    914     UChar* bufferStart = buffer.data();
    915 
    916     UChar* bufferCurrent = bufferStart;
    917     const char* stringCurrent = reinterpret_cast<const char*>(stringStart);
    918     if (convertUTF8ToUTF16(&stringCurrent, reinterpret_cast<const char *>(stringStart + length), &bufferCurrent, bufferCurrent + buffer.size()) != conversionOK)
    919         return String();
    920 
    921     unsigned utf16Length = bufferCurrent - bufferStart;
    922     ASSERT(utf16Length < length);
    923     return StringImpl::create(bufferStart, utf16Length);
    924 }
    925 
    926 String String::fromUTF8(const LChar* string)
    927 {
    928     if (!string)
    929         return String();
    930     return fromUTF8(string, strlen(reinterpret_cast<const char*>(string)));
    931 }
    932 
    933 String String::fromUTF8(const CString& s)
    934 {
    935     return fromUTF8(s.data());
    936 }
    937 
    938 String String::fromUTF8WithLatin1Fallback(const LChar* string, size_t size)
    939 {
    940     String utf8 = fromUTF8(string, size);
    941     if (!utf8)
    942         return String(string, size);
    943     return utf8;
    944 }
    945 
    946 // String Operations
    947 
    948 static bool isCharacterAllowedInBase(UChar c, int base)
    949 {
    950     if (c > 0x7F)
    951         return false;
    952     if (isASCIIDigit(c))
    953         return c - '0' < base;
    954     if (isASCIIAlpha(c)) {
    955         if (base > 36)
    956             base = 36;
    957         return (c >= 'a' && c < 'a' + base - 10)
    958             || (c >= 'A' && c < 'A' + base - 10);
    959     }
    960     return false;
    961 }
    962 
    963 template <typename IntegralType, typename CharType>
    964 static inline IntegralType toIntegralType(const CharType* data, size_t length, bool* ok, int base)
    965 {
    966     static const IntegralType integralMax = numeric_limits<IntegralType>::max();
    967     static const bool isSigned = numeric_limits<IntegralType>::is_signed;
    968     const IntegralType maxMultiplier = integralMax / base;
    969 
    970     IntegralType value = 0;
    971     bool isOk = false;
    972     bool isNegative = false;
    973 
    974     if (!data)
    975         goto bye;
    976 
    977     // skip leading whitespace
    978     while (length && isSpaceOrNewline(*data)) {
    979         --length;
    980         ++data;
    981     }
    982 
    983     if (isSigned && length && *data == '-') {
    984         --length;
    985         ++data;
    986         isNegative = true;
    987     } else if (length && *data == '+') {
    988         --length;
    989         ++data;
    990     }
    991 
    992     if (!length || !isCharacterAllowedInBase(*data, base))
    993         goto bye;
    994 
    995     while (length && isCharacterAllowedInBase(*data, base)) {
    996         --length;
    997         IntegralType digitValue;
    998         CharType c = *data;
    999         if (isASCIIDigit(c))
   1000             digitValue = c - '0';
   1001         else if (c >= 'a')
   1002             digitValue = c - 'a' + 10;
   1003         else
   1004             digitValue = c - 'A' + 10;
   1005 
   1006         if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative))
   1007             goto bye;
   1008 
   1009         value = base * value + digitValue;
   1010         ++data;
   1011     }
   1012 
   1013 #if COMPILER(MSVC)
   1014 #pragma warning(push, 0)
   1015 #pragma warning(disable:4146)
   1016 #endif
   1017 
   1018     if (isNegative)
   1019         value = -value;
   1020 
   1021 #if COMPILER(MSVC)
   1022 #pragma warning(pop)
   1023 #endif
   1024 
   1025     // skip trailing space
   1026     while (length && isSpaceOrNewline(*data)) {
   1027         --length;
   1028         ++data;
   1029     }
   1030 
   1031     if (!length)
   1032         isOk = true;
   1033 bye:
   1034     if (ok)
   1035         *ok = isOk;
   1036     return isOk ? value : 0;
   1037 }
   1038 
   1039 template <typename CharType>
   1040 static unsigned lengthOfCharactersAsInteger(const CharType* data, size_t length)
   1041 {
   1042     size_t i = 0;
   1043 
   1044     // Allow leading spaces.
   1045     for (; i != length; ++i) {
   1046         if (!isSpaceOrNewline(data[i]))
   1047             break;
   1048     }
   1049 
   1050     // Allow sign.
   1051     if (i != length && (data[i] == '+' || data[i] == '-'))
   1052         ++i;
   1053 
   1054     // Allow digits.
   1055     for (; i != length; ++i) {
   1056         if (!isASCIIDigit(data[i]))
   1057             break;
   1058     }
   1059 
   1060     return i;
   1061 }
   1062 
   1063 int charactersToIntStrict(const LChar* data, size_t length, bool* ok, int base)
   1064 {
   1065     return toIntegralType<int, LChar>(data, length, ok, base);
   1066 }
   1067 
   1068 int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
   1069 {
   1070     return toIntegralType<int, UChar>(data, length, ok, base);
   1071 }
   1072 
   1073 unsigned charactersToUIntStrict(const LChar* data, size_t length, bool* ok, int base)
   1074 {
   1075     return toIntegralType<unsigned, LChar>(data, length, ok, base);
   1076 }
   1077 
   1078 unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
   1079 {
   1080     return toIntegralType<unsigned, UChar>(data, length, ok, base);
   1081 }
   1082 
   1083 int64_t charactersToInt64Strict(const LChar* data, size_t length, bool* ok, int base)
   1084 {
   1085     return toIntegralType<int64_t, LChar>(data, length, ok, base);
   1086 }
   1087 
   1088 int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
   1089 {
   1090     return toIntegralType<int64_t, UChar>(data, length, ok, base);
   1091 }
   1092 
   1093 uint64_t charactersToUInt64Strict(const LChar* data, size_t length, bool* ok, int base)
   1094 {
   1095     return toIntegralType<uint64_t, LChar>(data, length, ok, base);
   1096 }
   1097 
   1098 uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base)
   1099 {
   1100     return toIntegralType<uint64_t, UChar>(data, length, ok, base);
   1101 }
   1102 
   1103 intptr_t charactersToIntPtrStrict(const LChar* data, size_t length, bool* ok, int base)
   1104 {
   1105     return toIntegralType<intptr_t, LChar>(data, length, ok, base);
   1106 }
   1107 
   1108 intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base)
   1109 {
   1110     return toIntegralType<intptr_t, UChar>(data, length, ok, base);
   1111 }
   1112 
   1113 int charactersToInt(const LChar* data, size_t length, bool* ok)
   1114 {
   1115     return toIntegralType<int, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
   1116 }
   1117 
   1118 int charactersToInt(const UChar* data, size_t length, bool* ok)
   1119 {
   1120     return toIntegralType<int, UChar>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
   1121 }
   1122 
   1123 unsigned charactersToUInt(const LChar* data, size_t length, bool* ok)
   1124 {
   1125     return toIntegralType<unsigned, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
   1126 }
   1127 
   1128 unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
   1129 {
   1130     return toIntegralType<unsigned, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
   1131 }
   1132 
   1133 int64_t charactersToInt64(const LChar* data, size_t length, bool* ok)
   1134 {
   1135     return toIntegralType<int64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
   1136 }
   1137 
   1138 int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
   1139 {
   1140     return toIntegralType<int64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
   1141 }
   1142 
   1143 uint64_t charactersToUInt64(const LChar* data, size_t length, bool* ok)
   1144 {
   1145     return toIntegralType<uint64_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
   1146 }
   1147 
   1148 uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
   1149 {
   1150     return toIntegralType<uint64_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
   1151 }
   1152 
   1153 intptr_t charactersToIntPtr(const LChar* data, size_t length, bool* ok)
   1154 {
   1155     return toIntegralType<intptr_t, LChar>(data, lengthOfCharactersAsInteger<LChar>(data, length), ok, 10);
   1156 }
   1157 
   1158 intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok)
   1159 {
   1160     return toIntegralType<intptr_t, UChar>(data, lengthOfCharactersAsInteger<UChar>(data, length), ok, 10);
   1161 }
   1162 
   1163 enum TrailingJunkPolicy { DisallowTrailingJunk, AllowTrailingJunk };
   1164 
   1165 template <typename CharType, TrailingJunkPolicy policy>
   1166 static inline double toDoubleType(const CharType* data, size_t length, bool* ok, size_t& parsedLength)
   1167 {
   1168     size_t leadingSpacesLength = 0;
   1169     while (leadingSpacesLength < length && isASCIISpace(data[leadingSpacesLength]))
   1170         ++leadingSpacesLength;
   1171 
   1172     double number = parseDouble(data + leadingSpacesLength, length - leadingSpacesLength, parsedLength);
   1173     if (!parsedLength) {
   1174         if (ok)
   1175             *ok = false;
   1176         return 0.0;
   1177     }
   1178 
   1179     parsedLength += leadingSpacesLength;
   1180     if (ok)
   1181         *ok = policy == AllowTrailingJunk || parsedLength == length;
   1182     return number;
   1183 }
   1184 
   1185 double charactersToDouble(const LChar* data, size_t length, bool* ok)
   1186 {
   1187     size_t parsedLength;
   1188     return toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength);
   1189 }
   1190 
   1191 double charactersToDouble(const UChar* data, size_t length, bool* ok)
   1192 {
   1193     size_t parsedLength;
   1194     return toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength);
   1195 }
   1196 
   1197 float charactersToFloat(const LChar* data, size_t length, bool* ok)
   1198 {
   1199     // FIXME: This will return ok even when the string fits into a double but not a float.
   1200     size_t parsedLength;
   1201     return static_cast<float>(toDoubleType<LChar, DisallowTrailingJunk>(data, length, ok, parsedLength));
   1202 }
   1203 
   1204 float charactersToFloat(const UChar* data, size_t length, bool* ok)
   1205 {
   1206     // FIXME: This will return ok even when the string fits into a double but not a float.
   1207     size_t parsedLength;
   1208     return static_cast<float>(toDoubleType<UChar, DisallowTrailingJunk>(data, length, ok, parsedLength));
   1209 }
   1210 
   1211 float charactersToFloat(const LChar* data, size_t length, size_t& parsedLength)
   1212 {
   1213     // FIXME: This will return ok even when the string fits into a double but not a float.
   1214     return static_cast<float>(toDoubleType<LChar, AllowTrailingJunk>(data, length, 0, parsedLength));
   1215 }
   1216 
   1217 float charactersToFloat(const UChar* data, size_t length, size_t& parsedLength)
   1218 {
   1219     // FIXME: This will return ok even when the string fits into a double but not a float.
   1220     return static_cast<float>(toDoubleType<UChar, AllowTrailingJunk>(data, length, 0, parsedLength));
   1221 }
   1222 
   1223 const String& emptyString()
   1224 {
   1225     DEFINE_STATIC_LOCAL(String, emptyString, (StringImpl::empty()));
   1226     return emptyString;
   1227 }
   1228 
   1229 const String& emptyString16Bit()
   1230 {
   1231     DEFINE_STATIC_LOCAL(String, emptyString, (StringImpl::empty16Bit()));
   1232     return emptyString;
   1233 }
   1234 
   1235 } // namespace WTF
   1236 
   1237 #ifndef NDEBUG
   1238 // For use in the debugger
   1239 String* string(const char*);
   1240 Vector<char> asciiDebug(StringImpl* impl);
   1241 Vector<char> asciiDebug(String& string);
   1242 
   1243 void String::show() const
   1244 {
   1245     dataLogF("%s\n", asciiDebug(impl()).data());
   1246 }
   1247 
   1248 String* string(const char* s)
   1249 {
   1250     // leaks memory!
   1251     return new String(s);
   1252 }
   1253 
   1254 Vector<char> asciiDebug(StringImpl* impl)
   1255 {
   1256     if (!impl)
   1257         return asciiDebug(String("[null]").impl());
   1258 
   1259     Vector<char> buffer;
   1260     for (unsigned i = 0; i < impl->length(); ++i) {
   1261         UChar ch = (*impl)[i];
   1262         if (isASCIIPrintable(ch)) {
   1263             if (ch == '\\')
   1264                 buffer.append(ch);
   1265             buffer.append(ch);
   1266         } else {
   1267             buffer.append('\\');
   1268             buffer.append('u');
   1269             appendUnsignedAsHexFixedSize(ch, buffer, 4);
   1270         }
   1271     }
   1272     buffer.append('\0');
   1273     return buffer;
   1274 }
   1275 
   1276 Vector<char> asciiDebug(String& string)
   1277 {
   1278     return asciiDebug(string.impl());
   1279 }
   1280 
   1281 #endif
   1282