Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  *           (C) 1999 Antti Koivisto (koivisto (at) kde.org)
      4  *           (C) 2001 Dirk Mueller ( mueller (at) kde.org )
      5  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved.
      6  * Copyright (C) 2006 Andrew Wellington (proton (at) wiretapped.net)
      7  *
      8  * This library is free software; you can redistribute it and/or
      9  * modify it under the terms of the GNU Library General Public
     10  * License as published by the Free Software Foundation; either
     11  * version 2 of the License, or (at your option) any later version.
     12  *
     13  * This library is distributed in the hope that it will be useful,
     14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16  * Library General Public License for more details.
     17  *
     18  * You should have received a copy of the GNU Library General Public License
     19  * along with this library; see the file COPYING.LIB.  If not, write to
     20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     21  * Boston, MA 02110-1301, USA.
     22  *
     23  */
     24 
     25 #include "config.h"
     26 #include "wtf/text/StringImpl.h"
     27 
     28 #include "wtf/DynamicAnnotations.h"
     29 #include "wtf/LeakAnnotations.h"
     30 #include "wtf/MainThread.h"
     31 #include "wtf/PartitionAlloc.h"
     32 #include "wtf/StdLibExtras.h"
     33 #include "wtf/WTF.h"
     34 #include "wtf/text/AtomicString.h"
     35 #include "wtf/text/StringBuffer.h"
     36 #include "wtf/text/StringHash.h"
     37 #include "wtf/unicode/CharacterNames.h"
     38 
     39 #ifdef STRING_STATS
     40 #include "wtf/DataLog.h"
     41 #include "wtf/HashMap.h"
     42 #include "wtf/HashSet.h"
     43 #include "wtf/ProcessID.h"
     44 #include "wtf/RefCounted.h"
     45 #include "wtf/ThreadingPrimitives.h"
     46 #include <unistd.h>
     47 #endif
     48 
     49 using namespace std;
     50 
     51 namespace WTF {
     52 
     53 using namespace Unicode;
     54 
     55 COMPILE_ASSERT(sizeof(StringImpl) == 3 * sizeof(int), StringImpl_should_stay_small);
     56 
     57 #ifdef STRING_STATS
     58 
     59 static Mutex& statsMutex()
     60 {
     61     DEFINE_STATIC_LOCAL(Mutex, mutex, ());
     62     return mutex;
     63 }
     64 
     65 static HashSet<void*>& liveStrings()
     66 {
     67     // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings.
     68     DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());
     69     return strings;
     70 }
     71 
     72 void addStringForStats(StringImpl* string)
     73 {
     74     MutexLocker locker(statsMutex());
     75     liveStrings().add(string);
     76 }
     77 
     78 void removeStringForStats(StringImpl* string)
     79 {
     80     MutexLocker locker(statsMutex());
     81     liveStrings().remove(string);
     82 }
     83 
     84 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet)
     85 {
     86     const unsigned kMaxSnippetLength = 64;
     87     snippet.clear();
     88 
     89     size_t expectedLength = std::min(string->length(), kMaxSnippetLength);
     90     if (expectedLength == kMaxSnippetLength)
     91         expectedLength += 3; // For the "...".
     92     ++expectedLength; // For the terminating '\0'.
     93     snippet.reserveCapacity(expectedLength);
     94 
     95     size_t i;
     96     for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {
     97         UChar c = (*string)[i];
     98         if (isASCIIPrintable(c))
     99             snippet.append(c);
    100         else
    101             snippet.append('?');
    102     }
    103     if (i < string->length()) {
    104         snippet.append('.');
    105         snippet.append('.');
    106         snippet.append('.');
    107     }
    108     snippet.append('\0');
    109 }
    110 
    111 static bool isUnnecessarilyWide(const StringImpl* string)
    112 {
    113     if (string->is8Bit())
    114         return false;
    115     UChar c = 0;
    116     for (unsigned i = 0; i < string->length(); ++i)
    117         c |= (*string)[i] >> 8;
    118     return !c;
    119 }
    120 
    121 class PerStringStats : public RefCounted<PerStringStats> {
    122 public:
    123     static PassRefPtr<PerStringStats> create()
    124     {
    125         return adoptRef(new PerStringStats);
    126     }
    127 
    128     void add(const StringImpl* string)
    129     {
    130         ++m_numberOfCopies;
    131         if (!m_length) {
    132             m_length = string->length();
    133             fillWithSnippet(string, m_snippet);
    134         }
    135         if (string->isAtomic())
    136             ++m_numberOfAtomicCopies;
    137         if (isUnnecessarilyWide(string))
    138             m_unnecessarilyWide = true;
    139     }
    140 
    141     size_t totalCharacters() const
    142     {
    143         return m_numberOfCopies * m_length;
    144     }
    145 
    146     void print()
    147     {
    148         const char* status = "ok";
    149         if (m_unnecessarilyWide)
    150             status = "16";
    151         dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_length, m_snippet.data());
    152     }
    153 
    154     bool m_unnecessarilyWide;
    155     unsigned m_numberOfCopies;
    156     unsigned m_length;
    157     unsigned m_numberOfAtomicCopies;
    158     Vector<char> m_snippet;
    159 
    160 private:
    161     PerStringStats()
    162         : m_unnecessarilyWide(false)
    163         , m_numberOfCopies(0)
    164         , m_length(0)
    165         , m_numberOfAtomicCopies(0)
    166     {
    167     }
    168 };
    169 
    170 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b)
    171 {
    172     if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)
    173         return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;
    174     if (a->totalCharacters() != b->totalCharacters())
    175         return a->totalCharacters() < b->totalCharacters();
    176     if (a->m_numberOfCopies != b->m_numberOfCopies)
    177         return a->m_numberOfCopies < b->m_numberOfCopies;
    178     if (a->m_length != b->m_length)
    179         return a->m_length < b->m_length;
    180     return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;
    181 }
    182 
    183 static void printLiveStringStats(void*)
    184 {
    185     MutexLocker locker(statsMutex());
    186     HashSet<void*>& strings = liveStrings();
    187 
    188     HashMap<StringImpl*, RefPtr<PerStringStats> > stats;
    189     for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); ++iter) {
    190         StringImpl* string = static_cast<StringImpl*>(*iter);
    191         HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator entry = stats.find(string);
    192         RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) : entry->value;
    193         value->add(string);
    194         stats.set(string, value.release());
    195     }
    196 
    197     Vector<RefPtr<PerStringStats> > all;
    198     for (HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator iter = stats.begin(); iter != stats.end(); ++iter)
    199         all.append(iter->value);
    200 
    201     std::sort(all.begin(), all.end());
    202     std::reverse(all.begin(), all.end());
    203     for (size_t i = 0; i < 20 && i < all.size(); ++i)
    204         all[i]->print();
    205 }
    206 
    207 StringStats StringImpl::m_stringStats;
    208 
    209 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printStringStatsFrequency;
    210 
    211 void StringStats::removeString(StringImpl* string)
    212 {
    213     unsigned length = string->length();
    214     --m_totalNumberStrings;
    215 
    216     if (string->is8Bit()) {
    217         --m_number8BitStrings;
    218         m_total8BitData -= length;
    219     } else {
    220         --m_number16BitStrings;
    221         m_total16BitData -= length;
    222     }
    223 
    224     if (!--s_stringRemovesTillPrintStats) {
    225         s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
    226         printStats();
    227     }
    228 }
    229 
    230 void StringStats::printStats()
    231 {
    232     dataLogF("String stats for process id %d:\n", getCurrentProcessID());
    233 
    234     unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
    235     double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
    236     double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : 0.0;
    237     dataLogF("%8u (%5.2f%%) 8 bit        %12llu chars  %12llu bytes  avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, average8bitLength);
    238 
    239     double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
    240     double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0;
    241     dataLogF("%8u (%5.2f%%) 16 bit       %12llu chars  %12llu bytes  avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength);
    242 
    243     double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0;
    244     unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
    245     dataLogF("%8u Total                 %12llu chars  %12llu bytes  avg length %6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLength);
    246     unsigned long long totalSavedBytes = m_total8BitData;
    247     double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0;
    248     dataLogF("         Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings);
    249 
    250     unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);
    251     double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;
    252     dataLogF("         StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, overheadPercent);
    253 
    254     callOnMainThread(printLiveStringStats, 0);
    255 }
    256 #endif
    257 
    258 void* StringImpl::operator new(size_t size)
    259 {
    260     ASSERT(size == sizeof(StringImpl));
    261     return partitionAllocGeneric(Partitions::getBufferPartition(), size);
    262 }
    263 
    264 void StringImpl::operator delete(void* ptr)
    265 {
    266     partitionFreeGeneric(Partitions::getBufferPartition(), ptr);
    267 }
    268 
    269 inline StringImpl::~StringImpl()
    270 {
    271     ASSERT(!isStatic());
    272 
    273     STRING_STATS_REMOVE_STRING(this);
    274 
    275     if (isAtomic())
    276         AtomicString::remove(this);
    277 }
    278 
    279 void StringImpl::destroyIfNotStatic()
    280 {
    281     if (!isStatic())
    282         delete this;
    283 }
    284 
    285 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)
    286 {
    287     if (!length) {
    288         data = 0;
    289         return empty();
    290     }
    291 
    292     // Allocate a single buffer large enough to contain the StringImpl
    293     // struct as well as the data which it contains. This removes one
    294     // heap allocation from this call.
    295     RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
    296     size_t size = sizeof(StringImpl) + length * sizeof(LChar);
    297     StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
    298 
    299     data = reinterpret_cast<LChar*>(string + 1);
    300     return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
    301 }
    302 
    303 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
    304 {
    305     if (!length) {
    306         data = 0;
    307         return empty();
    308     }
    309 
    310     // Allocate a single buffer large enough to contain the StringImpl
    311     // struct as well as the data which it contains. This removes one
    312     // heap allocation from this call.
    313     RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar)));
    314     size_t size = sizeof(StringImpl) + length * sizeof(UChar);
    315     StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
    316 
    317     data = reinterpret_cast<UChar*>(string + 1);
    318     return adoptRef(new (string) StringImpl(length));
    319 }
    320 
    321 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length, LChar*& data)
    322 {
    323     ASSERT(originalString->is8Bit());
    324     ASSERT(originalString->hasOneRef());
    325 
    326     if (!length) {
    327         data = 0;
    328         return empty();
    329     }
    330 
    331     // Same as createUninitialized() except here we use realloc.
    332     RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
    333     size_t size = sizeof(StringImpl) + length * sizeof(LChar);
    334     originalString->~StringImpl();
    335     StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size));
    336 
    337     data = reinterpret_cast<LChar*>(string + 1);
    338     return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
    339 }
    340 
    341 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length, UChar*& data)
    342 {
    343     ASSERT(!originalString->is8Bit());
    344     ASSERT(originalString->hasOneRef());
    345 
    346     if (!length) {
    347         data = 0;
    348         return empty();
    349     }
    350 
    351     // Same as createUninitialized() except here we use realloc.
    352     RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar)));
    353     size_t size = sizeof(StringImpl) + length * sizeof(UChar);
    354     originalString->~StringImpl();
    355     StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size));
    356 
    357     data = reinterpret_cast<UChar*>(string + 1);
    358     return adoptRef(new (string) StringImpl(length));
    359 }
    360 
    361 static StaticStringsTable& staticStrings()
    362 {
    363     DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
    364     return staticStrings;
    365 }
    366 
    367 #ifndef NDEBUG
    368 static bool s_allowCreationOfStaticStrings = true;
    369 #endif
    370 
    371 const StaticStringsTable& StringImpl::allStaticStrings()
    372 {
    373     return staticStrings();
    374 }
    375 
    376 void StringImpl::freezeStaticStrings()
    377 {
    378     ASSERT(isMainThread());
    379 
    380 #ifndef NDEBUG
    381     s_allowCreationOfStaticStrings = false;
    382 #endif
    383 }
    384 
    385 unsigned StringImpl::m_highestStaticStringLength = 0;
    386 
    387 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsigned hash)
    388 {
    389     ASSERT(s_allowCreationOfStaticStrings);
    390     ASSERT(string);
    391     ASSERT(length);
    392 
    393     StaticStringsTable::const_iterator it = staticStrings().find(hash);
    394     if (it != staticStrings().end()) {
    395         ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar)));
    396         return it->value;
    397     }
    398 
    399     // Allocate a single buffer large enough to contain the StringImpl
    400     // struct as well as the data which it contains. This removes one
    401     // heap allocation from this call.
    402     RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
    403     size_t size = sizeof(StringImpl) + length * sizeof(LChar);
    404 
    405     WTF_ANNOTATE_SCOPED_MEMORY_LEAK;
    406     StringImpl* impl = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
    407 
    408     LChar* data = reinterpret_cast<LChar*>(impl + 1);
    409     impl = new (impl) StringImpl(length, hash, StaticString);
    410     memcpy(data, string, length * sizeof(LChar));
    411 #ifndef NDEBUG
    412     impl->assertHashIsCorrect();
    413 #endif
    414 
    415     ASSERT(isMainThread());
    416     m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
    417     staticStrings().add(hash, impl);
    418     WTF_ANNOTATE_BENIGN_RACE(impl,
    419         "Benign race on the reference counter of a static string created by StringImpl::createStatic");
    420 
    421     return impl;
    422 }
    423 
    424 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
    425 {
    426     if (!characters || !length)
    427         return empty();
    428 
    429     UChar* data;
    430     RefPtr<StringImpl> string = createUninitialized(length, data);
    431     memcpy(data, characters, length * sizeof(UChar));
    432     return string.release();
    433 }
    434 
    435 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned length)
    436 {
    437     if (!characters || !length)
    438         return empty();
    439 
    440     LChar* data;
    441     RefPtr<StringImpl> string = createUninitialized(length, data);
    442     memcpy(data, characters, length * sizeof(LChar));
    443     return string.release();
    444 }
    445 
    446 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)
    447 {
    448     if (!characters || !length)
    449         return empty();
    450 
    451     LChar* data;
    452     RefPtr<StringImpl> string = createUninitialized(length, data);
    453 
    454     for (size_t i = 0; i < length; ++i) {
    455         if (characters[i] & 0xff00)
    456             return create(characters, length);
    457         data[i] = static_cast<LChar>(characters[i]);
    458     }
    459 
    460     return string.release();
    461 }
    462 
    463 PassRefPtr<StringImpl> StringImpl::create(const LChar* string)
    464 {
    465     if (!string)
    466         return empty();
    467     size_t length = strlen(reinterpret_cast<const char*>(string));
    468     RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
    469     return create(string, length);
    470 }
    471 
    472 bool StringImpl::containsOnlyWhitespace()
    473 {
    474     // FIXME: The definition of whitespace here includes a number of characters
    475     // that are not whitespace from the point of view of RenderText; I wonder if
    476     // that's a problem in practice.
    477     if (is8Bit()) {
    478         for (unsigned i = 0; i < m_length; ++i) {
    479             UChar c = characters8()[i];
    480             if (!isASCIISpace(c))
    481                 return false;
    482         }
    483 
    484         return true;
    485     }
    486 
    487     for (unsigned i = 0; i < m_length; ++i) {
    488         UChar c = characters16()[i];
    489         if (!isASCIISpace(c))
    490             return false;
    491     }
    492     return true;
    493 }
    494 
    495 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
    496 {
    497     if (start >= m_length)
    498         return empty();
    499     unsigned maxLength = m_length - start;
    500     if (length >= maxLength) {
    501         if (!start)
    502             return this;
    503         length = maxLength;
    504     }
    505     if (is8Bit())
    506         return create(characters8() + start, length);
    507 
    508     return create(characters16() + start, length);
    509 }
    510 
    511 UChar32 StringImpl::characterStartingAt(unsigned i)
    512 {
    513     if (is8Bit())
    514         return characters8()[i];
    515     if (U16_IS_SINGLE(characters16()[i]))
    516         return characters16()[i];
    517     if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(characters16()[i + 1]))
    518         return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);
    519     return 0;
    520 }
    521 
    522 PassRefPtr<StringImpl> StringImpl::lower()
    523 {
    524     // Note: This is a hot function in the Dromaeo benchmark, specifically the
    525     // no-op code path up through the first 'return' statement.
    526 
    527     // First scan the string for uppercase and non-ASCII characters:
    528     bool noUpper = true;
    529     UChar ored = 0;
    530     if (is8Bit()) {
    531         const LChar* end = characters8() + m_length;
    532         for (const LChar* chp = characters8(); chp != end; ++chp) {
    533             if (UNLIKELY(isASCIIUpper(*chp)))
    534                 noUpper = false;
    535             ored |= *chp;
    536         }
    537         // Nothing to do if the string is all ASCII with no uppercase.
    538         if (noUpper && !(ored & ~0x7F))
    539             return this;
    540 
    541         RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
    542         int32_t length = m_length;
    543 
    544         LChar* data8;
    545         RefPtr<StringImpl> newImpl = createUninitialized(length, data8);
    546 
    547         if (!(ored & ~0x7F)) {
    548             for (int32_t i = 0; i < length; ++i)
    549                 data8[i] = toASCIILower(characters8()[i]);
    550 
    551             return newImpl.release();
    552         }
    553 
    554         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
    555         for (int32_t i = 0; i < length; ++i)
    556             data8[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
    557 
    558         return newImpl.release();
    559     }
    560 
    561     const UChar* end = characters16() + m_length;
    562     for (const UChar* chp = characters16(); chp != end; ++chp) {
    563         if (UNLIKELY(isASCIIUpper(*chp)))
    564             noUpper = false;
    565         ored |= *chp;
    566     }
    567     // Nothing to do if the string is all ASCII with no uppercase.
    568     if (noUpper && !(ored & ~0x7F))
    569         return this;
    570 
    571     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
    572     int32_t length = m_length;
    573 
    574     if (!(ored & ~0x7F)) {
    575         UChar* data16;
    576         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
    577 
    578         for (int32_t i = 0; i < length; ++i) {
    579             UChar c = characters16()[i];
    580             data16[i] = toASCIILower(c);
    581         }
    582         return newImpl.release();
    583     }
    584 
    585     // Do a slower implementation for cases that include non-ASCII characters.
    586     UChar* data16;
    587     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
    588 
    589     bool error;
    590     int32_t realLength = Unicode::toLower(data16, length, characters16(), m_length, &error);
    591     if (!error && realLength == length)
    592         return newImpl.release();
    593 
    594     newImpl = createUninitialized(realLength, data16);
    595     Unicode::toLower(data16, realLength, characters16(), m_length, &error);
    596     if (error)
    597         return this;
    598     return newImpl.release();
    599 }
    600 
    601 PassRefPtr<StringImpl> StringImpl::upper()
    602 {
    603     // This function could be optimized for no-op cases the way lower() is,
    604     // but in empirical testing, few actual calls to upper() are no-ops, so
    605     // it wouldn't be worth the extra time for pre-scanning.
    606 
    607     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
    608     int32_t length = m_length;
    609 
    610     if (is8Bit()) {
    611         LChar* data8;
    612         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
    613 
    614         // Do a faster loop for the case where all the characters are ASCII.
    615         LChar ored = 0;
    616         for (int i = 0; i < length; ++i) {
    617             LChar c = characters8()[i];
    618             ored |= c;
    619             data8[i] = toASCIIUpper(c);
    620         }
    621         if (!(ored & ~0x7F))
    622             return newImpl.release();
    623 
    624         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
    625         int numberSharpSCharacters = 0;
    626 
    627         // There are two special cases.
    628         //  1. latin-1 characters when converted to upper case are 16 bit characters.
    629         //  2. Lower case sharp-S converts to "SS" (two characters)
    630         for (int32_t i = 0; i < length; ++i) {
    631             LChar c = characters8()[i];
    632             if (UNLIKELY(c == smallLetterSharpS))
    633                 ++numberSharpSCharacters;
    634             UChar upper = Unicode::toUpper(c);
    635             if (UNLIKELY(upper > 0xff)) {
    636                 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
    637                 goto upconvert;
    638             }
    639             data8[i] = static_cast<LChar>(upper);
    640         }
    641 
    642         if (!numberSharpSCharacters)
    643             return newImpl.release();
    644 
    645         // We have numberSSCharacters sharp-s characters, but none of the other special characters.
    646         newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
    647 
    648         LChar* dest = data8;
    649 
    650         for (int32_t i = 0; i < length; ++i) {
    651             LChar c = characters8()[i];
    652             if (c == smallLetterSharpS) {
    653                 *dest++ = 'S';
    654                 *dest++ = 'S';
    655             } else
    656                 *dest++ = static_cast<LChar>(Unicode::toUpper(c));
    657         }
    658 
    659         return newImpl.release();
    660     }
    661 
    662 upconvert:
    663     RefPtr<StringImpl> upconverted = upconvertedString();
    664     const UChar* source16 = upconverted->characters16();
    665 
    666     UChar* data16;
    667     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
    668 
    669     // Do a faster loop for the case where all the characters are ASCII.
    670     UChar ored = 0;
    671     for (int i = 0; i < length; ++i) {
    672         UChar c = source16[i];
    673         ored |= c;
    674         data16[i] = toASCIIUpper(c);
    675     }
    676     if (!(ored & ~0x7F))
    677         return newImpl.release();
    678 
    679     // Do a slower implementation for cases that include non-ASCII characters.
    680     bool error;
    681     int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &error);
    682     if (!error && realLength == length)
    683         return newImpl;
    684     newImpl = createUninitialized(realLength, data16);
    685     Unicode::toUpper(data16, realLength, source16, m_length, &error);
    686     if (error)
    687         return this;
    688     return newImpl.release();
    689 }
    690 
    691 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
    692 {
    693     // Use the more-optimized code path most of the time.
    694     // Note the assumption here that the only locale-specific lowercasing is
    695     // in the "tr" and "az" locales.
    696     // FIXME: Could possibly optimize further by looking for the specific sequences
    697     // that have locale-specific lowercasing. There are only three of them.
    698     if (!(localeIdentifier == "tr" || localeIdentifier == "az"))
    699         return lower();
    700 
    701     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
    702         CRASH();
    703     int length = m_length;
    704 
    705     // Below, we pass in the hardcoded locale "tr". Passing that is more efficient than
    706     // allocating memory just to turn localeIdentifier into a C string, and there is no
    707     // difference between the uppercasing for "tr" and "az" locales.
    708     RefPtr<StringImpl> upconverted = upconvertedString();
    709     const UChar* source16 = upconverted->characters16();
    710     UChar* data16;
    711     RefPtr<StringImpl> newString = createUninitialized(length, data16);
    712     do {
    713         UErrorCode status = U_ZERO_ERROR;
    714         int realLength = u_strToLower(data16, length, source16, length, "tr", &status);
    715         if (U_SUCCESS(status)) {
    716             newString->truncateAssumingIsolated(realLength);
    717             return newString.release();
    718         }
    719         if (status != U_BUFFER_OVERFLOW_ERROR)
    720             return this;
    721         // Expand the buffer.
    722         newString = createUninitialized(realLength, data16);
    723     } while (true);
    724 }
    725 
    726 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier)
    727 {
    728     // Use the more-optimized code path most of the time.
    729     // Note the assumption here that the only locale-specific uppercasing is of the
    730     // letter "i" in the "tr" and "az" locales.
    731     if (!(localeIdentifier == "tr" || localeIdentifier == "az") || find('i') == kNotFound)
    732         return upper();
    733 
    734     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
    735         CRASH();
    736     int length = m_length;
    737 
    738     // Below, we pass in the hardcoded locale "tr". Passing that is more efficient than
    739     // allocating memory just to turn localeIdentifier into a C string, and there is no
    740     // difference between the uppercasing for "tr" and "az" locales.
    741     RefPtr<StringImpl> upconverted = upconvertedString();
    742     const UChar* source16 = upconverted->characters16();
    743     UChar* data16;
    744     RefPtr<StringImpl> newString = createUninitialized(length, data16);
    745     do {
    746         UErrorCode status = U_ZERO_ERROR;
    747         int realLength = u_strToUpper(data16, length, source16, length, "tr", &status);
    748         if (U_SUCCESS(status)) {
    749             newString->truncateAssumingIsolated(realLength);
    750             return newString.release();
    751         }
    752         if (status != U_BUFFER_OVERFLOW_ERROR)
    753             return this;
    754         // Expand the buffer.
    755         newString = createUninitialized(realLength, data16);
    756     } while (true);
    757 }
    758 
    759 PassRefPtr<StringImpl> StringImpl::fill(UChar character)
    760 {
    761     if (!(character & ~0x7F)) {
    762         LChar* data;
    763         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    764         for (unsigned i = 0; i < m_length; ++i)
    765             data[i] = character;
    766         return newImpl.release();
    767     }
    768     UChar* data;
    769     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    770     for (unsigned i = 0; i < m_length; ++i)
    771         data[i] = character;
    772     return newImpl.release();
    773 }
    774 
    775 PassRefPtr<StringImpl> StringImpl::foldCase()
    776 {
    777     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
    778     int32_t length = m_length;
    779 
    780     if (is8Bit()) {
    781         // Do a faster loop for the case where all the characters are ASCII.
    782         LChar* data;
    783         RefPtr <StringImpl>newImpl = createUninitialized(m_length, data);
    784         LChar ored = 0;
    785 
    786         for (int32_t i = 0; i < length; ++i) {
    787             LChar c = characters8()[i];
    788             data[i] = toASCIILower(c);
    789             ored |= c;
    790         }
    791 
    792         if (!(ored & ~0x7F))
    793             return newImpl.release();
    794 
    795         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
    796         for (int32_t i = 0; i < length; ++i)
    797             data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
    798 
    799         return newImpl.release();
    800     }
    801 
    802     // Do a faster loop for the case where all the characters are ASCII.
    803     UChar* data;
    804     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    805     UChar ored = 0;
    806     for (int32_t i = 0; i < length; ++i) {
    807         UChar c = characters16()[i];
    808         ored |= c;
    809         data[i] = toASCIILower(c);
    810     }
    811     if (!(ored & ~0x7F))
    812         return newImpl.release();
    813 
    814     // Do a slower implementation for cases that include non-ASCII characters.
    815     bool error;
    816     int32_t realLength = Unicode::foldCase(data, length, characters16(), m_length, &error);
    817     if (!error && realLength == length)
    818         return newImpl.release();
    819     newImpl = createUninitialized(realLength, data);
    820     Unicode::foldCase(data, realLength, characters16(), m_length, &error);
    821     if (error)
    822         return this;
    823     return newImpl.release();
    824 }
    825 
    826 template <class UCharPredicate>
    827 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)
    828 {
    829     if (!m_length)
    830         return empty();
    831 
    832     unsigned start = 0;
    833     unsigned end = m_length - 1;
    834 
    835     // skip white space from start
    836     while (start <= end && predicate(is8Bit() ? characters8()[start] : characters16()[start]))
    837         ++start;
    838 
    839     // only white space
    840     if (start > end)
    841         return empty();
    842 
    843     // skip white space from end
    844     while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))
    845         --end;
    846 
    847     if (!start && end == m_length - 1)
    848         return this;
    849     if (is8Bit())
    850         return create(characters8() + start, end + 1 - start);
    851     return create(characters16() + start, end + 1 - start);
    852 }
    853 
    854 class UCharPredicate {
    855 public:
    856     inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(function) { }
    857 
    858     inline bool operator()(UChar ch) const
    859     {
    860         return m_function(ch);
    861     }
    862 
    863 private:
    864     const CharacterMatchFunctionPtr m_function;
    865 };
    866 
    867 class SpaceOrNewlinePredicate {
    868 public:
    869     inline bool operator()(UChar ch) const
    870     {
    871         return isSpaceOrNewline(ch);
    872     }
    873 };
    874 
    875 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
    876 {
    877     return stripMatchedCharacters(SpaceOrNewlinePredicate());
    878 }
    879 
    880 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)
    881 {
    882     return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
    883 }
    884 
    885 template <typename CharType>
    886 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch)
    887 {
    888     const CharType* from = characters;
    889     const CharType* fromend = from + m_length;
    890 
    891     // Assume the common case will not remove any characters
    892     while (from != fromend && !findMatch(*from))
    893         ++from;
    894     if (from == fromend)
    895         return this;
    896 
    897     StringBuffer<CharType> data(m_length);
    898     CharType* to = data.characters();
    899     unsigned outc = from - characters;
    900 
    901     if (outc)
    902         memcpy(to, characters, outc * sizeof(CharType));
    903 
    904     while (true) {
    905         while (from != fromend && findMatch(*from))
    906             ++from;
    907         while (from != fromend && !findMatch(*from))
    908             to[outc++] = *from++;
    909         if (from == fromend)
    910             break;
    911     }
    912 
    913     data.shrink(outc);
    914 
    915     return data.release();
    916 }
    917 
    918 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
    919 {
    920     if (is8Bit())
    921         return removeCharacters(characters8(), findMatch);
    922     return removeCharacters(characters16(), findMatch);
    923 }
    924 
    925 template <typename CharType, class UCharPredicate>
    926 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate, StripBehavior stripBehavior)
    927 {
    928     StringBuffer<CharType> data(m_length);
    929 
    930     const CharType* from = getCharacters<CharType>();
    931     const CharType* fromend = from + m_length;
    932     int outc = 0;
    933     bool changedToSpace = false;
    934 
    935     CharType* to = data.characters();
    936 
    937     if (stripBehavior == StripExtraWhiteSpace) {
    938         while (true) {
    939             while (from != fromend && predicate(*from)) {
    940                 if (*from != ' ')
    941                     changedToSpace = true;
    942                 ++from;
    943             }
    944             while (from != fromend && !predicate(*from))
    945                 to[outc++] = *from++;
    946             if (from != fromend)
    947                 to[outc++] = ' ';
    948             else
    949                 break;
    950         }
    951 
    952         if (outc > 0 && to[outc - 1] == ' ')
    953             --outc;
    954     } else {
    955         for (; from != fromend; ++from) {
    956             if (predicate(*from)) {
    957                 if (*from != ' ')
    958                     changedToSpace = true;
    959                 to[outc++] = ' ';
    960             } else {
    961                 to[outc++] = *from;
    962             }
    963         }
    964     }
    965 
    966     if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
    967         return this;
    968 
    969     data.shrink(outc);
    970 
    971     return data.release();
    972 }
    973 
    974 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavior)
    975 {
    976     if (is8Bit())
    977         return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePredicate(), stripBehavior);
    978     return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredicate(), stripBehavior);
    979 }
    980 
    981 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, StripBehavior stripBehavior)
    982 {
    983     if (is8Bit())
    984         return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(isWhiteSpace), stripBehavior);
    985     return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(isWhiteSpace), stripBehavior);
    986 }
    987 
    988 int StringImpl::toIntStrict(bool* ok, int base)
    989 {
    990     if (is8Bit())
    991         return charactersToIntStrict(characters8(), m_length, ok, base);
    992     return charactersToIntStrict(characters16(), m_length, ok, base);
    993 }
    994 
    995 unsigned StringImpl::toUIntStrict(bool* ok, int base)
    996 {
    997     if (is8Bit())
    998         return charactersToUIntStrict(characters8(), m_length, ok, base);
    999     return charactersToUIntStrict(characters16(), m_length, ok, base);
   1000 }
   1001 
   1002 int64_t StringImpl::toInt64Strict(bool* ok, int base)
   1003 {
   1004     if (is8Bit())
   1005         return charactersToInt64Strict(characters8(), m_length, ok, base);
   1006     return charactersToInt64Strict(characters16(), m_length, ok, base);
   1007 }
   1008 
   1009 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
   1010 {
   1011     if (is8Bit())
   1012         return charactersToUInt64Strict(characters8(), m_length, ok, base);
   1013     return charactersToUInt64Strict(characters16(), m_length, ok, base);
   1014 }
   1015 
   1016 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
   1017 {
   1018     if (is8Bit())
   1019         return charactersToIntPtrStrict(characters8(), m_length, ok, base);
   1020     return charactersToIntPtrStrict(characters16(), m_length, ok, base);
   1021 }
   1022 
   1023 int StringImpl::toInt(bool* ok)
   1024 {
   1025     if (is8Bit())
   1026         return charactersToInt(characters8(), m_length, ok);
   1027     return charactersToInt(characters16(), m_length, ok);
   1028 }
   1029 
   1030 unsigned StringImpl::toUInt(bool* ok)
   1031 {
   1032     if (is8Bit())
   1033         return charactersToUInt(characters8(), m_length, ok);
   1034     return charactersToUInt(characters16(), m_length, ok);
   1035 }
   1036 
   1037 int64_t StringImpl::toInt64(bool* ok)
   1038 {
   1039     if (is8Bit())
   1040         return charactersToInt64(characters8(), m_length, ok);
   1041     return charactersToInt64(characters16(), m_length, ok);
   1042 }
   1043 
   1044 uint64_t StringImpl::toUInt64(bool* ok)
   1045 {
   1046     if (is8Bit())
   1047         return charactersToUInt64(characters8(), m_length, ok);
   1048     return charactersToUInt64(characters16(), m_length, ok);
   1049 }
   1050 
   1051 intptr_t StringImpl::toIntPtr(bool* ok)
   1052 {
   1053     if (is8Bit())
   1054         return charactersToIntPtr(characters8(), m_length, ok);
   1055     return charactersToIntPtr(characters16(), m_length, ok);
   1056 }
   1057 
   1058 double StringImpl::toDouble(bool* ok)
   1059 {
   1060     if (is8Bit())
   1061         return charactersToDouble(characters8(), m_length, ok);
   1062     return charactersToDouble(characters16(), m_length, ok);
   1063 }
   1064 
   1065 float StringImpl::toFloat(bool* ok)
   1066 {
   1067     if (is8Bit())
   1068         return charactersToFloat(characters8(), m_length, ok);
   1069     return charactersToFloat(characters16(), m_length, ok);
   1070 }
   1071 
   1072 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length)
   1073 {
   1074     while (length--) {
   1075         LChar bc = *b++;
   1076         if (foldCase(*a++) != foldCase(bc))
   1077             return false;
   1078     }
   1079     return true;
   1080 }
   1081 
   1082 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length)
   1083 {
   1084     while (length--) {
   1085         LChar bc = *b++;
   1086         if (foldCase(*a++) != foldCase(bc))
   1087             return false;
   1088     }
   1089     return true;
   1090 }
   1091 
   1092 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
   1093 {
   1094     if (is8Bit())
   1095         return WTF::find(characters8(), m_length, matchFunction, start);
   1096     return WTF::find(characters16(), m_length, matchFunction, start);
   1097 }
   1098 
   1099 size_t StringImpl::find(const LChar* matchString, unsigned index)
   1100 {
   1101     // Check for null or empty string to match against
   1102     if (!matchString)
   1103         return kNotFound;
   1104     size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
   1105     RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
   1106     unsigned matchLength = matchStringLength;
   1107     if (!matchLength)
   1108         return min(index, length());
   1109 
   1110     // Optimization 1: fast case for strings of length 1.
   1111     if (matchLength == 1)
   1112         return WTF::find(characters16(), length(), *matchString, index);
   1113 
   1114     // Check index & matchLength are in range.
   1115     if (index > length())
   1116         return kNotFound;
   1117     unsigned searchLength = length() - index;
   1118     if (matchLength > searchLength)
   1119         return kNotFound;
   1120     // delta is the number of additional times to test; delta == 0 means test only once.
   1121     unsigned delta = searchLength - matchLength;
   1122 
   1123     const UChar* searchCharacters = characters16() + index;
   1124 
   1125     // Optimization 2: keep a running hash of the strings,
   1126     // only call equal if the hashes match.
   1127     unsigned searchHash = 0;
   1128     unsigned matchHash = 0;
   1129     for (unsigned i = 0; i < matchLength; ++i) {
   1130         searchHash += searchCharacters[i];
   1131         matchHash += matchString[i];
   1132     }
   1133 
   1134     unsigned i = 0;
   1135     // keep looping until we match
   1136     while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
   1137         if (i == delta)
   1138             return kNotFound;
   1139         searchHash += searchCharacters[i + matchLength];
   1140         searchHash -= searchCharacters[i];
   1141         ++i;
   1142     }
   1143     return index + i;
   1144 }
   1145 
   1146 template<typename CharType>
   1147 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchLength)
   1148 {
   1149     // delta is the number of additional times to test; delta == 0 means test only once.
   1150     unsigned delta = searchLength - matchLength;
   1151 
   1152     unsigned i = 0;
   1153     while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
   1154         if (i == delta)
   1155             return kNotFound;
   1156         ++i;
   1157     }
   1158     return index + i;
   1159 }
   1160 
   1161 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index)
   1162 {
   1163     // Check for null or empty string to match against
   1164     if (!matchString)
   1165         return kNotFound;
   1166     size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
   1167     RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
   1168     unsigned matchLength = matchStringLength;
   1169     if (!matchLength)
   1170         return min(index, length());
   1171 
   1172     // Check index & matchLength are in range.
   1173     if (index > length())
   1174         return kNotFound;
   1175     unsigned searchLength = length() - index;
   1176     if (matchLength > searchLength)
   1177         return kNotFound;
   1178 
   1179     if (is8Bit())
   1180         return findIgnoringCaseInternal(characters8() + index, matchString, index, searchLength, matchLength);
   1181     return findIgnoringCaseInternal(characters16() + index, matchString, index, searchLength, matchLength);
   1182 }
   1183 
   1184 template <typename SearchCharacterType, typename MatchCharacterType>
   1185 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
   1186 {
   1187     // Optimization: keep a running hash of the strings,
   1188     // only call equal() if the hashes match.
   1189 
   1190     // delta is the number of additional times to test; delta == 0 means test only once.
   1191     unsigned delta = searchLength - matchLength;
   1192 
   1193     unsigned searchHash = 0;
   1194     unsigned matchHash = 0;
   1195 
   1196     for (unsigned i = 0; i < matchLength; ++i) {
   1197         searchHash += searchCharacters[i];
   1198         matchHash += matchCharacters[i];
   1199     }
   1200 
   1201     unsigned i = 0;
   1202     // keep looping until we match
   1203     while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) {
   1204         if (i == delta)
   1205             return kNotFound;
   1206         searchHash += searchCharacters[i + matchLength];
   1207         searchHash -= searchCharacters[i];
   1208         ++i;
   1209     }
   1210     return index + i;
   1211 }
   1212 
   1213 size_t StringImpl::find(StringImpl* matchString)
   1214 {
   1215     // Check for null string to match against
   1216     if (UNLIKELY(!matchString))
   1217         return kNotFound;
   1218     unsigned matchLength = matchString->length();
   1219 
   1220     // Optimization 1: fast case for strings of length 1.
   1221     if (matchLength == 1) {
   1222         if (is8Bit()) {
   1223             if (matchString->is8Bit())
   1224                 return WTF::find(characters8(), length(), matchString->characters8()[0]);
   1225             return WTF::find(characters8(), length(), matchString->characters16()[0]);
   1226         }
   1227         if (matchString->is8Bit())
   1228             return WTF::find(characters16(), length(), matchString->characters8()[0]);
   1229         return WTF::find(characters16(), length(), matchString->characters16()[0]);
   1230     }
   1231 
   1232     // Check matchLength is in range.
   1233     if (matchLength > length())
   1234         return kNotFound;
   1235 
   1236     // Check for empty string to match against
   1237     if (UNLIKELY(!matchLength))
   1238         return 0;
   1239 
   1240     if (is8Bit()) {
   1241         if (matchString->is8Bit())
   1242             return findInternal(characters8(), matchString->characters8(), 0, length(), matchLength);
   1243         return findInternal(characters8(), matchString->characters16(), 0, length(), matchLength);
   1244     }
   1245 
   1246     if (matchString->is8Bit())
   1247         return findInternal(characters16(), matchString->characters8(), 0, length(), matchLength);
   1248 
   1249     return findInternal(characters16(), matchString->characters16(), 0, length(), matchLength);
   1250 }
   1251 
   1252 size_t StringImpl::find(StringImpl* matchString, unsigned index)
   1253 {
   1254     // Check for null or empty string to match against
   1255     if (UNLIKELY(!matchString))
   1256         return kNotFound;
   1257 
   1258     unsigned matchLength = matchString->length();
   1259 
   1260     // Optimization 1: fast case for strings of length 1.
   1261     if (matchLength == 1) {
   1262         if (is8Bit())
   1263             return WTF::find(characters8(), length(), (*matchString)[0], index);
   1264         return WTF::find(characters16(), length(), (*matchString)[0], index);
   1265     }
   1266 
   1267     if (UNLIKELY(!matchLength))
   1268         return min(index, length());
   1269 
   1270     // Check index & matchLength are in range.
   1271     if (index > length())
   1272         return kNotFound;
   1273     unsigned searchLength = length() - index;
   1274     if (matchLength > searchLength)
   1275         return kNotFound;
   1276 
   1277     if (is8Bit()) {
   1278         if (matchString->is8Bit())
   1279             return findInternal(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
   1280         return findInternal(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
   1281     }
   1282 
   1283     if (matchString->is8Bit())
   1284         return findInternal(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
   1285 
   1286     return findInternal(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
   1287 }
   1288 
   1289 template <typename SearchCharacterType, typename MatchCharacterType>
   1290 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
   1291 {
   1292     // delta is the number of additional times to test; delta == 0 means test only once.
   1293     unsigned delta = searchLength - matchLength;
   1294 
   1295     unsigned i = 0;
   1296     // keep looping until we match
   1297     while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
   1298         if (i == delta)
   1299             return kNotFound;
   1300         ++i;
   1301     }
   1302     return index + i;
   1303 }
   1304 
   1305 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
   1306 {
   1307     // Check for null or empty string to match against
   1308     if (!matchString)
   1309         return kNotFound;
   1310     unsigned matchLength = matchString->length();
   1311     if (!matchLength)
   1312         return min(index, length());
   1313 
   1314     // Check index & matchLength are in range.
   1315     if (index > length())
   1316         return kNotFound;
   1317     unsigned searchLength = length() - index;
   1318     if (matchLength > searchLength)
   1319         return kNotFound;
   1320 
   1321     if (is8Bit()) {
   1322         if (matchString->is8Bit())
   1323             return findIgnoringCaseInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
   1324         return findIgnoringCaseInner(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
   1325     }
   1326 
   1327     if (matchString->is8Bit())
   1328         return findIgnoringCaseInner(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
   1329 
   1330     return findIgnoringCaseInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
   1331 }
   1332 
   1333 size_t StringImpl::findNextLineStart(unsigned index)
   1334 {
   1335     if (is8Bit())
   1336         return WTF::findNextLineStart(characters8(), m_length, index);
   1337     return WTF::findNextLineStart(characters16(), m_length, index);
   1338 }
   1339 
   1340 size_t StringImpl::count(LChar c) const
   1341 {
   1342     int count = 0;
   1343     if (is8Bit()) {
   1344         for (size_t i = 0; i < m_length; ++i)
   1345             count += characters8()[i] == c;
   1346     } else {
   1347         for (size_t i = 0; i < m_length; ++i)
   1348             count += characters16()[i] == c;
   1349     }
   1350     return count;
   1351 }
   1352 
   1353 size_t StringImpl::reverseFind(UChar c, unsigned index)
   1354 {
   1355     if (is8Bit())
   1356         return WTF::reverseFind(characters8(), m_length, c, index);
   1357     return WTF::reverseFind(characters16(), m_length, c, index);
   1358 }
   1359 
   1360 template <typename SearchCharacterType, typename MatchCharacterType>
   1361 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
   1362 {
   1363     // Optimization: keep a running hash of the strings,
   1364     // only call equal if the hashes match.
   1365 
   1366     // delta is the number of additional times to test; delta == 0 means test only once.
   1367     unsigned delta = min(index, length - matchLength);
   1368 
   1369     unsigned searchHash = 0;
   1370     unsigned matchHash = 0;
   1371     for (unsigned i = 0; i < matchLength; ++i) {
   1372         searchHash += searchCharacters[delta + i];
   1373         matchHash += matchCharacters[i];
   1374     }
   1375 
   1376     // keep looping until we match
   1377     while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharacters, matchLength)) {
   1378         if (!delta)
   1379             return kNotFound;
   1380         --delta;
   1381         searchHash -= searchCharacters[delta + matchLength];
   1382         searchHash += searchCharacters[delta];
   1383     }
   1384     return delta;
   1385 }
   1386 
   1387 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
   1388 {
   1389     // Check for null or empty string to match against
   1390     if (!matchString)
   1391         return kNotFound;
   1392     unsigned matchLength = matchString->length();
   1393     unsigned ourLength = length();
   1394     if (!matchLength)
   1395         return min(index, ourLength);
   1396 
   1397     // Optimization 1: fast case for strings of length 1.
   1398     if (matchLength == 1) {
   1399         if (is8Bit())
   1400             return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index);
   1401         return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], index);
   1402     }
   1403 
   1404     // Check index & matchLength are in range.
   1405     if (matchLength > ourLength)
   1406         return kNotFound;
   1407 
   1408     if (is8Bit()) {
   1409         if (matchString->is8Bit())
   1410             return reverseFindInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
   1411         return reverseFindInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
   1412     }
   1413 
   1414     if (matchString->is8Bit())
   1415         return reverseFindInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
   1416 
   1417     return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
   1418 }
   1419 
   1420 template <typename SearchCharacterType, typename MatchCharacterType>
   1421 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
   1422 {
   1423     // delta is the number of additional times to test; delta == 0 means test only once.
   1424     unsigned delta = min(index, length - matchLength);
   1425 
   1426     // keep looping until we match
   1427     while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
   1428         if (!delta)
   1429             return kNotFound;
   1430         --delta;
   1431     }
   1432     return delta;
   1433 }
   1434 
   1435 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
   1436 {
   1437     // Check for null or empty string to match against
   1438     if (!matchString)
   1439         return kNotFound;
   1440     unsigned matchLength = matchString->length();
   1441     unsigned ourLength = length();
   1442     if (!matchLength)
   1443         return min(index, ourLength);
   1444 
   1445     // Check index & matchLength are in range.
   1446     if (matchLength > ourLength)
   1447         return kNotFound;
   1448 
   1449     if (is8Bit()) {
   1450         if (matchString->is8Bit())
   1451             return reverseFindIgnoringCaseInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
   1452         return reverseFindIgnoringCaseInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
   1453     }
   1454 
   1455     if (matchString->is8Bit())
   1456         return reverseFindIgnoringCaseInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
   1457 
   1458     return reverseFindIgnoringCaseInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
   1459 }
   1460 
   1461 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned startOffset, const char* matchString, unsigned matchLength, bool caseSensitive)
   1462 {
   1463     ASSERT(stringImpl);
   1464     ASSERT(matchLength <= stringImpl->length());
   1465     ASSERT(startOffset + matchLength <= stringImpl->length());
   1466 
   1467     if (caseSensitive) {
   1468         if (stringImpl->is8Bit())
   1469             return equal(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
   1470         return equal(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
   1471     }
   1472     if (stringImpl->is8Bit())
   1473         return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
   1474     return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
   1475 }
   1476 
   1477 bool StringImpl::startsWith(UChar character) const
   1478 {
   1479     return m_length && (*this)[0] == character;
   1480 }
   1481 
   1482 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
   1483 {
   1484     ASSERT(matchLength);
   1485     if (matchLength > length())
   1486         return false;
   1487     return equalInner(this, 0, matchString, matchLength, caseSensitive);
   1488 }
   1489 
   1490 bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive)
   1491 {
   1492     ASSERT(matchString);
   1493     if (m_length >= matchString->m_length) {
   1494         unsigned start = m_length - matchString->m_length;
   1495         return (caseSensitive ? find(matchString, start) : findIgnoringCase(matchString, start)) == start;
   1496     }
   1497     return false;
   1498 }
   1499 
   1500 bool StringImpl::endsWith(UChar character) const
   1501 {
   1502     return m_length && (*this)[m_length - 1] == character;
   1503 }
   1504 
   1505 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
   1506 {
   1507     ASSERT(matchLength);
   1508     if (matchLength > length())
   1509         return false;
   1510     unsigned startOffset = length() - matchLength;
   1511     return equalInner(this, startOffset, matchString, matchLength, caseSensitive);
   1512 }
   1513 
   1514 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
   1515 {
   1516     if (oldC == newC)
   1517         return this;
   1518     unsigned i;
   1519     for (i = 0; i != m_length; ++i) {
   1520         UChar c = is8Bit() ? characters8()[i] : characters16()[i];
   1521         if (c == oldC)
   1522             break;
   1523     }
   1524     if (i == m_length)
   1525         return this;
   1526 
   1527     if (is8Bit()) {
   1528         if (oldC > 0xff)
   1529             // Looking for a 16 bit char in an 8 bit string, we're done.
   1530             return this;
   1531 
   1532         if (newC <= 0xff) {
   1533             LChar* data;
   1534             LChar oldChar = static_cast<LChar>(oldC);
   1535             LChar newChar = static_cast<LChar>(newC);
   1536 
   1537             RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
   1538 
   1539             for (i = 0; i != m_length; ++i) {
   1540                 LChar ch = characters8()[i];
   1541                 if (ch == oldChar)
   1542                     ch = newChar;
   1543                 data[i] = ch;
   1544             }
   1545             return newImpl.release();
   1546         }
   1547 
   1548         // There is the possibility we need to up convert from 8 to 16 bit,
   1549         // create a 16 bit string for the result.
   1550         UChar* data;
   1551         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
   1552 
   1553         for (i = 0; i != m_length; ++i) {
   1554             UChar ch = characters8()[i];
   1555             if (ch == oldC)
   1556                 ch = newC;
   1557             data[i] = ch;
   1558         }
   1559 
   1560         return newImpl.release();
   1561     }
   1562 
   1563     UChar* data;
   1564     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
   1565 
   1566     for (i = 0; i != m_length; ++i) {
   1567         UChar ch = characters16()[i];
   1568         if (ch == oldC)
   1569             ch = newC;
   1570         data[i] = ch;
   1571     }
   1572     return newImpl.release();
   1573 }
   1574 
   1575 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
   1576 {
   1577     position = min(position, length());
   1578     lengthToReplace = min(lengthToReplace, length() - position);
   1579     unsigned lengthToInsert = str ? str->length() : 0;
   1580     if (!lengthToReplace && !lengthToInsert)
   1581         return this;
   1582 
   1583     RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max() - lengthToInsert));
   1584 
   1585     if (is8Bit() && (!str || str->is8Bit())) {
   1586         LChar* data;
   1587         RefPtr<StringImpl> newImpl =
   1588         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
   1589         memcpy(data, characters8(), position * sizeof(LChar));
   1590         if (str)
   1591             memcpy(data + position, str->characters8(), lengthToInsert * sizeof(LChar));
   1592         memcpy(data + position + lengthToInsert, characters8() + position + lengthToReplace,
   1593                (length() - position - lengthToReplace) * sizeof(LChar));
   1594         return newImpl.release();
   1595     }
   1596     UChar* data;
   1597     RefPtr<StringImpl> newImpl =
   1598         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
   1599     if (is8Bit())
   1600         for (unsigned i = 0; i < position; ++i)
   1601             data[i] = characters8()[i];
   1602     else
   1603         memcpy(data, characters16(), position * sizeof(UChar));
   1604     if (str) {
   1605         if (str->is8Bit())
   1606             for (unsigned i = 0; i < lengthToInsert; ++i)
   1607                 data[i + position] = str->characters8()[i];
   1608         else
   1609             memcpy(data + position, str->characters16(), lengthToInsert * sizeof(UChar));
   1610     }
   1611     if (is8Bit()) {
   1612         for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
   1613             data[i + position + lengthToInsert] = characters8()[i + position + lengthToReplace];
   1614     } else {
   1615         memcpy(data + position + lengthToInsert, characters16() + position + lengthToReplace,
   1616             (length() - position - lengthToReplace) * sizeof(UChar));
   1617     }
   1618     return newImpl.release();
   1619 }
   1620 
   1621 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
   1622 {
   1623     if (!replacement)
   1624         return this;
   1625 
   1626     if (replacement->is8Bit())
   1627         return replace(pattern, replacement->characters8(), replacement->length());
   1628 
   1629     return replace(pattern, replacement->characters16(), replacement->length());
   1630 }
   1631 
   1632 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength)
   1633 {
   1634     ASSERT(replacement);
   1635 
   1636     size_t srcSegmentStart = 0;
   1637     unsigned matchCount = 0;
   1638 
   1639     // Count the matches.
   1640     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
   1641         ++matchCount;
   1642         ++srcSegmentStart;
   1643     }
   1644 
   1645     // If we have 0 matches then we don't have to do any more work.
   1646     if (!matchCount)
   1647         return this;
   1648 
   1649     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
   1650 
   1651     unsigned replaceSize = matchCount * repStrLength;
   1652     unsigned newSize = m_length - matchCount;
   1653     RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
   1654 
   1655     newSize += replaceSize;
   1656 
   1657     // Construct the new data.
   1658     size_t srcSegmentEnd;
   1659     unsigned srcSegmentLength;
   1660     srcSegmentStart = 0;
   1661     unsigned dstOffset = 0;
   1662 
   1663     if (is8Bit()) {
   1664         LChar* data;
   1665         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
   1666 
   1667         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
   1668             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
   1669             memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
   1670             dstOffset += srcSegmentLength;
   1671             memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
   1672             dstOffset += repStrLength;
   1673             srcSegmentStart = srcSegmentEnd + 1;
   1674         }
   1675 
   1676         srcSegmentLength = m_length - srcSegmentStart;
   1677         memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
   1678 
   1679         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
   1680 
   1681         return newImpl.release();
   1682     }
   1683 
   1684     UChar* data;
   1685     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
   1686 
   1687     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
   1688         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
   1689         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
   1690 
   1691         dstOffset += srcSegmentLength;
   1692         for (unsigned i = 0; i < repStrLength; ++i)
   1693             data[i + dstOffset] = replacement[i];
   1694 
   1695         dstOffset += repStrLength;
   1696         srcSegmentStart = srcSegmentEnd + 1;
   1697     }
   1698 
   1699     srcSegmentLength = m_length - srcSegmentStart;
   1700     memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
   1701 
   1702     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
   1703 
   1704     return newImpl.release();
   1705 }
   1706 
   1707 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength)
   1708 {
   1709     ASSERT(replacement);
   1710 
   1711     size_t srcSegmentStart = 0;
   1712     unsigned matchCount = 0;
   1713 
   1714     // Count the matches.
   1715     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
   1716         ++matchCount;
   1717         ++srcSegmentStart;
   1718     }
   1719 
   1720     // If we have 0 matches then we don't have to do any more work.
   1721     if (!matchCount)
   1722         return this;
   1723 
   1724     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
   1725 
   1726     unsigned replaceSize = matchCount * repStrLength;
   1727     unsigned newSize = m_length - matchCount;
   1728     RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
   1729 
   1730     newSize += replaceSize;
   1731 
   1732     // Construct the new data.
   1733     size_t srcSegmentEnd;
   1734     unsigned srcSegmentLength;
   1735     srcSegmentStart = 0;
   1736     unsigned dstOffset = 0;
   1737 
   1738     if (is8Bit()) {
   1739         UChar* data;
   1740         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
   1741 
   1742         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
   1743             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
   1744             for (unsigned i = 0; i < srcSegmentLength; ++i)
   1745                 data[i + dstOffset] = characters8()[i + srcSegmentStart];
   1746 
   1747             dstOffset += srcSegmentLength;
   1748             memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
   1749 
   1750             dstOffset += repStrLength;
   1751             srcSegmentStart = srcSegmentEnd + 1;
   1752         }
   1753 
   1754         srcSegmentLength = m_length - srcSegmentStart;
   1755         for (unsigned i = 0; i < srcSegmentLength; ++i)
   1756             data[i + dstOffset] = characters8()[i + srcSegmentStart];
   1757 
   1758         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
   1759 
   1760         return newImpl.release();
   1761     }
   1762 
   1763     UChar* data;
   1764     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
   1765 
   1766     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
   1767         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
   1768         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
   1769 
   1770         dstOffset += srcSegmentLength;
   1771         memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
   1772 
   1773         dstOffset += repStrLength;
   1774         srcSegmentStart = srcSegmentEnd + 1;
   1775     }
   1776 
   1777     srcSegmentLength = m_length - srcSegmentStart;
   1778     memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
   1779 
   1780     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
   1781 
   1782     return newImpl.release();
   1783 }
   1784 
   1785 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
   1786 {
   1787     if (!pattern || !replacement)
   1788         return this;
   1789 
   1790     unsigned patternLength = pattern->length();
   1791     if (!patternLength)
   1792         return this;
   1793 
   1794     unsigned repStrLength = replacement->length();
   1795     size_t srcSegmentStart = 0;
   1796     unsigned matchCount = 0;
   1797 
   1798     // Count the matches.
   1799     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
   1800         ++matchCount;
   1801         srcSegmentStart += patternLength;
   1802     }
   1803 
   1804     // If we have 0 matches, we don't have to do any more work
   1805     if (!matchCount)
   1806         return this;
   1807 
   1808     unsigned newSize = m_length - matchCount * patternLength;
   1809     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
   1810 
   1811     RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * repStrLength));
   1812 
   1813     newSize += matchCount * repStrLength;
   1814 
   1815 
   1816     // Construct the new data
   1817     size_t srcSegmentEnd;
   1818     unsigned srcSegmentLength;
   1819     srcSegmentStart = 0;
   1820     unsigned dstOffset = 0;
   1821     bool srcIs8Bit = is8Bit();
   1822     bool replacementIs8Bit = replacement->is8Bit();
   1823 
   1824     // There are 4 cases:
   1825     // 1. This and replacement are both 8 bit.
   1826     // 2. This and replacement are both 16 bit.
   1827     // 3. This is 8 bit and replacement is 16 bit.
   1828     // 4. This is 16 bit and replacement is 8 bit.
   1829     if (srcIs8Bit && replacementIs8Bit) {
   1830         // Case 1
   1831         LChar* data;
   1832         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
   1833         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
   1834             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
   1835             memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
   1836             dstOffset += srcSegmentLength;
   1837             memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof(LChar));
   1838             dstOffset += repStrLength;
   1839             srcSegmentStart = srcSegmentEnd + patternLength;
   1840         }
   1841 
   1842         srcSegmentLength = m_length - srcSegmentStart;
   1843         memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
   1844 
   1845         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
   1846 
   1847         return newImpl.release();
   1848     }
   1849 
   1850     UChar* data;
   1851     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
   1852     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
   1853         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
   1854         if (srcIs8Bit) {
   1855             // Case 3.
   1856             for (unsigned i = 0; i < srcSegmentLength; ++i)
   1857                 data[i + dstOffset] = characters8()[i + srcSegmentStart];
   1858         } else {
   1859             // Case 2 & 4.
   1860             memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
   1861         }
   1862         dstOffset += srcSegmentLength;
   1863         if (replacementIs8Bit) {
   1864             // Cases 2 & 3.
   1865             for (unsigned i = 0; i < repStrLength; ++i)
   1866                 data[i + dstOffset] = replacement->characters8()[i];
   1867         } else {
   1868             // Case 4
   1869             memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeof(UChar));
   1870         }
   1871         dstOffset += repStrLength;
   1872         srcSegmentStart = srcSegmentEnd + patternLength;
   1873     }
   1874 
   1875     srcSegmentLength = m_length - srcSegmentStart;
   1876     if (srcIs8Bit) {
   1877         // Case 3.
   1878         for (unsigned i = 0; i < srcSegmentLength; ++i)
   1879             data[i + dstOffset] = characters8()[i + srcSegmentStart];
   1880     } else {
   1881         // Cases 2 & 4.
   1882         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
   1883     }
   1884 
   1885     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
   1886 
   1887     return newImpl.release();
   1888 }
   1889 
   1890 PassRefPtr<StringImpl> StringImpl::upconvertedString()
   1891 {
   1892     if (is8Bit())
   1893         return String::make16BitFrom8BitSource(characters8(), m_length).releaseImpl();
   1894     return this;
   1895 }
   1896 
   1897 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)
   1898 {
   1899     unsigned aLength = a->length();
   1900     unsigned bLength = b->length();
   1901     if (aLength != bLength)
   1902         return false;
   1903 
   1904     if (a->is8Bit()) {
   1905         if (b->is8Bit())
   1906             return equal(a->characters8(), b->characters8(), aLength);
   1907 
   1908         return equal(a->characters8(), b->characters16(), aLength);
   1909     }
   1910 
   1911     if (b->is8Bit())
   1912         return equal(a->characters16(), b->characters8(), aLength);
   1913 
   1914     return equal(a->characters16(), b->characters16(), aLength);
   1915 }
   1916 
   1917 bool equal(const StringImpl* a, const StringImpl* b)
   1918 {
   1919     if (a == b)
   1920         return true;
   1921     if (!a || !b)
   1922         return false;
   1923 
   1924     return stringImplContentEqual(a, b);
   1925 }
   1926 
   1927 template <typename CharType>
   1928 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned length)
   1929 {
   1930     if (!a)
   1931         return !b;
   1932     if (!b)
   1933         return false;
   1934 
   1935     if (a->length() != length)
   1936         return false;
   1937     if (a->is8Bit())
   1938         return equal(a->characters8(), b, length);
   1939     return equal(a->characters16(), b, length);
   1940 }
   1941 
   1942 bool equal(const StringImpl* a, const LChar* b, unsigned length)
   1943 {
   1944     return equalInternal(a, b, length);
   1945 }
   1946 
   1947 bool equal(const StringImpl* a, const UChar* b, unsigned length)
   1948 {
   1949     return equalInternal(a, b, length);
   1950 }
   1951 
   1952 bool equal(const StringImpl* a, const LChar* b)
   1953 {
   1954     if (!a)
   1955         return !b;
   1956     if (!b)
   1957         return !a;
   1958 
   1959     unsigned length = a->length();
   1960 
   1961     if (a->is8Bit()) {
   1962         const LChar* aPtr = a->characters8();
   1963         for (unsigned i = 0; i != length; ++i) {
   1964             LChar bc = b[i];
   1965             LChar ac = aPtr[i];
   1966             if (!bc)
   1967                 return false;
   1968             if (ac != bc)
   1969                 return false;
   1970         }
   1971 
   1972         return !b[length];
   1973     }
   1974 
   1975     const UChar* aPtr = a->characters16();
   1976     for (unsigned i = 0; i != length; ++i) {
   1977         LChar bc = b[i];
   1978         if (!bc)
   1979             return false;
   1980         if (aPtr[i] != bc)
   1981             return false;
   1982     }
   1983 
   1984     return !b[length];
   1985 }
   1986 
   1987 bool equalNonNull(const StringImpl* a, const StringImpl* b)
   1988 {
   1989     ASSERT(a && b);
   1990     if (a == b)
   1991         return true;
   1992 
   1993     return stringImplContentEqual(a, b);
   1994 }
   1995 
   1996 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
   1997 {
   1998     if (a == b)
   1999         return true;
   2000     if (!a || !b)
   2001         return false;
   2002 
   2003     return CaseFoldingHash::equal(a, b);
   2004 }
   2005 
   2006 bool equalIgnoringCase(const StringImpl* a, const LChar* b)
   2007 {
   2008     if (!a)
   2009         return !b;
   2010     if (!b)
   2011         return !a;
   2012 
   2013     unsigned length = a->length();
   2014 
   2015     // Do a faster loop for the case where all the characters are ASCII.
   2016     UChar ored = 0;
   2017     bool equal = true;
   2018     if (a->is8Bit()) {
   2019         const LChar* as = a->characters8();
   2020         for (unsigned i = 0; i != length; ++i) {
   2021             LChar bc = b[i];
   2022             if (!bc)
   2023                 return false;
   2024             UChar ac = as[i];
   2025             ored |= ac;
   2026             equal = equal && (toASCIILower(ac) == toASCIILower(bc));
   2027         }
   2028 
   2029         // Do a slower implementation for cases that include non-ASCII characters.
   2030         if (ored & ~0x7F) {
   2031             equal = true;
   2032             for (unsigned i = 0; i != length; ++i)
   2033                 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
   2034         }
   2035 
   2036         return equal && !b[length];
   2037     }
   2038 
   2039     const UChar* as = a->characters16();
   2040     for (unsigned i = 0; i != length; ++i) {
   2041         LChar bc = b[i];
   2042         if (!bc)
   2043             return false;
   2044         UChar ac = as[i];
   2045         ored |= ac;
   2046         equal = equal && (toASCIILower(ac) == toASCIILower(bc));
   2047     }
   2048 
   2049     // Do a slower implementation for cases that include non-ASCII characters.
   2050     if (ored & ~0x7F) {
   2051         equal = true;
   2052         for (unsigned i = 0; i != length; ++i) {
   2053             equal = equal && (foldCase(as[i]) == foldCase(b[i]));
   2054         }
   2055     }
   2056 
   2057     return equal && !b[length];
   2058 }
   2059 
   2060 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b)
   2061 {
   2062     ASSERT(a && b);
   2063     if (a == b)
   2064         return true;
   2065 
   2066     unsigned length = a->length();
   2067     if (length != b->length())
   2068         return false;
   2069 
   2070     if (a->is8Bit()) {
   2071         if (b->is8Bit())
   2072             return equalIgnoringCase(a->characters8(), b->characters8(), length);
   2073 
   2074         return equalIgnoringCase(b->characters16(), a->characters8(), length);
   2075     }
   2076 
   2077     if (b->is8Bit())
   2078         return equalIgnoringCase(a->characters16(), b->characters8(), length);
   2079 
   2080     return equalIgnoringCase(a->characters16(), b->characters16(), length);
   2081 }
   2082 
   2083 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
   2084 {
   2085     if (!a && b && !b->length())
   2086         return true;
   2087     if (!b && a && !a->length())
   2088         return true;
   2089     return equal(a, b);
   2090 }
   2091 
   2092 size_t StringImpl::sizeInBytes() const
   2093 {
   2094     size_t size = length();
   2095     if (!is8Bit())
   2096         size *= 2;
   2097     return size + sizeof(*this);
   2098 }
   2099 
   2100 } // namespace WTF
   2101