Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  *           (C) 1999 Antti Koivisto (koivisto (at) kde.org)
      4  *           (C) 2001 Dirk Mueller ( mueller (at) kde.org )
      5  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved.
      6  * Copyright (C) 2006 Andrew Wellington (proton (at) wiretapped.net)
      7  *
      8  * This library is free software; you can redistribute it and/or
      9  * modify it under the terms of the GNU Library General Public
     10  * License as published by the Free Software Foundation; either
     11  * version 2 of the License, or (at your option) any later version.
     12  *
     13  * This library is distributed in the hope that it will be useful,
     14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16  * Library General Public License for more details.
     17  *
     18  * You should have received a copy of the GNU Library General Public License
     19  * along with this library; see the file COPYING.LIB.  If not, write to
     20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     21  * Boston, MA 02110-1301, USA.
     22  *
     23  */
     24 
     25 #include "config.h"
     26 #include "wtf/text/StringImpl.h"
     27 
     28 #include "wtf/DynamicAnnotations.h"
     29 #include "wtf/LeakAnnotations.h"
     30 #include "wtf/MainThread.h"
     31 #include "wtf/OwnPtr.h"
     32 #include "wtf/PartitionAlloc.h"
     33 #include "wtf/PassOwnPtr.h"
     34 #include "wtf/StdLibExtras.h"
     35 #include "wtf/WTF.h"
     36 #include "wtf/text/AtomicString.h"
     37 #include "wtf/text/StringBuffer.h"
     38 #include "wtf/text/StringHash.h"
     39 #include "wtf/unicode/CharacterNames.h"
     40 #include <unicode/translit.h>
     41 #include <unicode/unistr.h>
     42 
     43 #ifdef STRING_STATS
     44 #include "wtf/DataLog.h"
     45 #include "wtf/HashMap.h"
     46 #include "wtf/HashSet.h"
     47 #include "wtf/ProcessID.h"
     48 #include "wtf/RefCounted.h"
     49 #include "wtf/ThreadingPrimitives.h"
     50 #include <unistd.h>
     51 #endif
     52 
     53 using namespace std;
     54 
     55 namespace WTF {
     56 
     57 using namespace Unicode;
     58 
     59 COMPILE_ASSERT(sizeof(StringImpl) == 3 * sizeof(int), StringImpl_should_stay_small);
     60 
     61 #ifdef STRING_STATS
     62 
     63 static Mutex& statsMutex()
     64 {
     65     DEFINE_STATIC_LOCAL(Mutex, mutex, ());
     66     return mutex;
     67 }
     68 
     69 static HashSet<void*>& liveStrings()
     70 {
     71     // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings.
     72     DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());
     73     return strings;
     74 }
     75 
     76 void addStringForStats(StringImpl* string)
     77 {
     78     MutexLocker locker(statsMutex());
     79     liveStrings().add(string);
     80 }
     81 
     82 void removeStringForStats(StringImpl* string)
     83 {
     84     MutexLocker locker(statsMutex());
     85     liveStrings().remove(string);
     86 }
     87 
     88 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet)
     89 {
     90     const unsigned kMaxSnippetLength = 64;
     91     snippet.clear();
     92 
     93     size_t expectedLength = std::min(string->length(), kMaxSnippetLength);
     94     if (expectedLength == kMaxSnippetLength)
     95         expectedLength += 3; // For the "...".
     96     ++expectedLength; // For the terminating '\0'.
     97     snippet.reserveCapacity(expectedLength);
     98 
     99     size_t i;
    100     for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {
    101         UChar c = (*string)[i];
    102         if (isASCIIPrintable(c))
    103             snippet.append(c);
    104         else
    105             snippet.append('?');
    106     }
    107     if (i < string->length()) {
    108         snippet.append('.');
    109         snippet.append('.');
    110         snippet.append('.');
    111     }
    112     snippet.append('\0');
    113 }
    114 
    115 static bool isUnnecessarilyWide(const StringImpl* string)
    116 {
    117     if (string->is8Bit())
    118         return false;
    119     UChar c = 0;
    120     for (unsigned i = 0; i < string->length(); ++i)
    121         c |= (*string)[i] >> 8;
    122     return !c;
    123 }
    124 
    125 class PerStringStats : public RefCounted<PerStringStats> {
    126 public:
    127     static PassRefPtr<PerStringStats> create()
    128     {
    129         return adoptRef(new PerStringStats);
    130     }
    131 
    132     void add(const StringImpl* string)
    133     {
    134         ++m_numberOfCopies;
    135         if (!m_length) {
    136             m_length = string->length();
    137             fillWithSnippet(string, m_snippet);
    138         }
    139         if (string->isAtomic())
    140             ++m_numberOfAtomicCopies;
    141         if (isUnnecessarilyWide(string))
    142             m_unnecessarilyWide = true;
    143     }
    144 
    145     size_t totalCharacters() const
    146     {
    147         return m_numberOfCopies * m_length;
    148     }
    149 
    150     void print()
    151     {
    152         const char* status = "ok";
    153         if (m_unnecessarilyWide)
    154             status = "16";
    155         dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_length, m_snippet.data());
    156     }
    157 
    158     bool m_unnecessarilyWide;
    159     unsigned m_numberOfCopies;
    160     unsigned m_length;
    161     unsigned m_numberOfAtomicCopies;
    162     Vector<char> m_snippet;
    163 
    164 private:
    165     PerStringStats()
    166         : m_unnecessarilyWide(false)
    167         , m_numberOfCopies(0)
    168         , m_length(0)
    169         , m_numberOfAtomicCopies(0)
    170     {
    171     }
    172 };
    173 
    174 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b)
    175 {
    176     if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)
    177         return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;
    178     if (a->totalCharacters() != b->totalCharacters())
    179         return a->totalCharacters() < b->totalCharacters();
    180     if (a->m_numberOfCopies != b->m_numberOfCopies)
    181         return a->m_numberOfCopies < b->m_numberOfCopies;
    182     if (a->m_length != b->m_length)
    183         return a->m_length < b->m_length;
    184     return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;
    185 }
    186 
    187 static void printLiveStringStats(void*)
    188 {
    189     MutexLocker locker(statsMutex());
    190     HashSet<void*>& strings = liveStrings();
    191 
    192     HashMap<StringImpl*, RefPtr<PerStringStats> > stats;
    193     for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); ++iter) {
    194         StringImpl* string = static_cast<StringImpl*>(*iter);
    195         HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator entry = stats.find(string);
    196         RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) : entry->value;
    197         value->add(string);
    198         stats.set(string, value.release());
    199     }
    200 
    201     Vector<RefPtr<PerStringStats> > all;
    202     for (HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator iter = stats.begin(); iter != stats.end(); ++iter)
    203         all.append(iter->value);
    204 
    205     std::sort(all.begin(), all.end());
    206     std::reverse(all.begin(), all.end());
    207     for (size_t i = 0; i < 20 && i < all.size(); ++i)
    208         all[i]->print();
    209 }
    210 
    211 StringStats StringImpl::m_stringStats;
    212 
    213 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printStringStatsFrequency;
    214 
    215 void StringStats::removeString(StringImpl* string)
    216 {
    217     unsigned length = string->length();
    218     --m_totalNumberStrings;
    219 
    220     if (string->is8Bit()) {
    221         --m_number8BitStrings;
    222         m_total8BitData -= length;
    223     } else {
    224         --m_number16BitStrings;
    225         m_total16BitData -= length;
    226     }
    227 
    228     if (!--s_stringRemovesTillPrintStats) {
    229         s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
    230         printStats();
    231     }
    232 }
    233 
    234 void StringStats::printStats()
    235 {
    236     dataLogF("String stats for process id %d:\n", getCurrentProcessID());
    237 
    238     unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
    239     double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
    240     double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : 0.0;
    241     dataLogF("%8u (%5.2f%%) 8 bit        %12llu chars  %12llu bytes  avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, average8bitLength);
    242 
    243     double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
    244     double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0;
    245     dataLogF("%8u (%5.2f%%) 16 bit       %12llu chars  %12llu bytes  avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength);
    246 
    247     double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0;
    248     unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
    249     dataLogF("%8u Total                 %12llu chars  %12llu bytes  avg length %6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLength);
    250     unsigned long long totalSavedBytes = m_total8BitData;
    251     double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0;
    252     dataLogF("         Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings);
    253 
    254     unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);
    255     double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;
    256     dataLogF("         StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, overheadPercent);
    257 
    258     callOnMainThread(printLiveStringStats, 0);
    259 }
    260 #endif
    261 
    262 void* StringImpl::operator new(size_t size)
    263 {
    264     ASSERT(size == sizeof(StringImpl));
    265     return partitionAllocGeneric(Partitions::getBufferPartition(), size);
    266 }
    267 
    268 void StringImpl::operator delete(void* ptr)
    269 {
    270     partitionFreeGeneric(Partitions::getBufferPartition(), ptr);
    271 }
    272 
    273 inline StringImpl::~StringImpl()
    274 {
    275     ASSERT(!isStatic());
    276 
    277     STRING_STATS_REMOVE_STRING(this);
    278 
    279     if (isAtomic())
    280         AtomicString::remove(this);
    281 }
    282 
    283 void StringImpl::destroyIfNotStatic()
    284 {
    285     if (!isStatic())
    286         delete this;
    287 }
    288 
    289 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)
    290 {
    291     if (!length) {
    292         data = 0;
    293         return empty();
    294     }
    295 
    296     // Allocate a single buffer large enough to contain the StringImpl
    297     // struct as well as the data which it contains. This removes one
    298     // heap allocation from this call.
    299     StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<LChar>(length)));
    300 
    301     data = reinterpret_cast<LChar*>(string + 1);
    302     return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
    303 }
    304 
    305 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
    306 {
    307     if (!length) {
    308         data = 0;
    309         return empty();
    310     }
    311 
    312     // Allocate a single buffer large enough to contain the StringImpl
    313     // struct as well as the data which it contains. This removes one
    314     // heap allocation from this call.
    315     StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<UChar>(length)));
    316 
    317     data = reinterpret_cast<UChar*>(string + 1);
    318     return adoptRef(new (string) StringImpl(length));
    319 }
    320 
    321 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length)
    322 {
    323     ASSERT(originalString->hasOneRef());
    324 
    325     if (!length)
    326         return empty();
    327 
    328     bool is8Bit = originalString->is8Bit();
    329     // Same as createUninitialized() except here we use realloc.
    330     size_t size = is8Bit ? allocationSize<LChar>(length) : allocationSize<UChar>(length);
    331     originalString->~StringImpl();
    332     StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size));
    333     if (is8Bit)
    334         return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
    335     return adoptRef(new (string) StringImpl(length));
    336 }
    337 
    338 static StaticStringsTable& staticStrings()
    339 {
    340     DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
    341     return staticStrings;
    342 }
    343 
    344 #ifndef NDEBUG
    345 static bool s_allowCreationOfStaticStrings = true;
    346 #endif
    347 
    348 const StaticStringsTable& StringImpl::allStaticStrings()
    349 {
    350     return staticStrings();
    351 }
    352 
    353 void StringImpl::freezeStaticStrings()
    354 {
    355     ASSERT(isMainThread());
    356 
    357 #ifndef NDEBUG
    358     s_allowCreationOfStaticStrings = false;
    359 #endif
    360 }
    361 
    362 unsigned StringImpl::m_highestStaticStringLength = 0;
    363 
    364 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsigned hash)
    365 {
    366     ASSERT(s_allowCreationOfStaticStrings);
    367     ASSERT(string);
    368     ASSERT(length);
    369 
    370     StaticStringsTable::const_iterator it = staticStrings().find(hash);
    371     if (it != staticStrings().end()) {
    372         ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar)));
    373         return it->value;
    374     }
    375 
    376     // Allocate a single buffer large enough to contain the StringImpl
    377     // struct as well as the data which it contains. This removes one
    378     // heap allocation from this call.
    379     RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
    380     size_t size = sizeof(StringImpl) + length * sizeof(LChar);
    381 
    382     WTF_ANNOTATE_SCOPED_MEMORY_LEAK;
    383     StringImpl* impl = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
    384 
    385     LChar* data = reinterpret_cast<LChar*>(impl + 1);
    386     impl = new (impl) StringImpl(length, hash, StaticString);
    387     memcpy(data, string, length * sizeof(LChar));
    388 #ifndef NDEBUG
    389     impl->assertHashIsCorrect();
    390 #endif
    391 
    392     ASSERT(isMainThread());
    393     m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
    394     staticStrings().add(hash, impl);
    395     WTF_ANNOTATE_BENIGN_RACE(impl,
    396         "Benign race on the reference counter of a static string created by StringImpl::createStatic");
    397 
    398     return impl;
    399 }
    400 
    401 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
    402 {
    403     if (!characters || !length)
    404         return empty();
    405 
    406     UChar* data;
    407     RefPtr<StringImpl> string = createUninitialized(length, data);
    408     memcpy(data, characters, length * sizeof(UChar));
    409     return string.release();
    410 }
    411 
    412 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned length)
    413 {
    414     if (!characters || !length)
    415         return empty();
    416 
    417     LChar* data;
    418     RefPtr<StringImpl> string = createUninitialized(length, data);
    419     memcpy(data, characters, length * sizeof(LChar));
    420     return string.release();
    421 }
    422 
    423 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)
    424 {
    425     if (!characters || !length)
    426         return empty();
    427 
    428     LChar* data;
    429     RefPtr<StringImpl> string = createUninitialized(length, data);
    430 
    431     for (size_t i = 0; i < length; ++i) {
    432         if (characters[i] & 0xff00)
    433             return create(characters, length);
    434         data[i] = static_cast<LChar>(characters[i]);
    435     }
    436 
    437     return string.release();
    438 }
    439 
    440 PassRefPtr<StringImpl> StringImpl::create(const LChar* string)
    441 {
    442     if (!string)
    443         return empty();
    444     size_t length = strlen(reinterpret_cast<const char*>(string));
    445     RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
    446     return create(string, length);
    447 }
    448 
    449 bool StringImpl::containsOnlyWhitespace()
    450 {
    451     // FIXME: The definition of whitespace here includes a number of characters
    452     // that are not whitespace from the point of view of RenderText; I wonder if
    453     // that's a problem in practice.
    454     if (is8Bit()) {
    455         for (unsigned i = 0; i < m_length; ++i) {
    456             UChar c = characters8()[i];
    457             if (!isASCIISpace(c))
    458                 return false;
    459         }
    460 
    461         return true;
    462     }
    463 
    464     for (unsigned i = 0; i < m_length; ++i) {
    465         UChar c = characters16()[i];
    466         if (!isASCIISpace(c))
    467             return false;
    468     }
    469     return true;
    470 }
    471 
    472 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
    473 {
    474     if (start >= m_length)
    475         return empty();
    476     unsigned maxLength = m_length - start;
    477     if (length >= maxLength) {
    478         if (!start)
    479             return this;
    480         length = maxLength;
    481     }
    482     if (is8Bit())
    483         return create(characters8() + start, length);
    484 
    485     return create(characters16() + start, length);
    486 }
    487 
    488 UChar32 StringImpl::characterStartingAt(unsigned i)
    489 {
    490     if (is8Bit())
    491         return characters8()[i];
    492     if (U16_IS_SINGLE(characters16()[i]))
    493         return characters16()[i];
    494     if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(characters16()[i + 1]))
    495         return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);
    496     return 0;
    497 }
    498 
    499 PassRefPtr<StringImpl> StringImpl::lower()
    500 {
    501     // Note: This is a hot function in the Dromaeo benchmark, specifically the
    502     // no-op code path up through the first 'return' statement.
    503 
    504     // First scan the string for uppercase and non-ASCII characters:
    505     bool noUpper = true;
    506     UChar ored = 0;
    507     if (is8Bit()) {
    508         const LChar* end = characters8() + m_length;
    509         for (const LChar* chp = characters8(); chp != end; ++chp) {
    510             if (UNLIKELY(isASCIIUpper(*chp)))
    511                 noUpper = false;
    512             ored |= *chp;
    513         }
    514         // Nothing to do if the string is all ASCII with no uppercase.
    515         if (noUpper && !(ored & ~0x7F))
    516             return this;
    517 
    518         RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
    519         int32_t length = m_length;
    520 
    521         LChar* data8;
    522         RefPtr<StringImpl> newImpl = createUninitialized(length, data8);
    523 
    524         if (!(ored & ~0x7F)) {
    525             for (int32_t i = 0; i < length; ++i)
    526                 data8[i] = toASCIILower(characters8()[i]);
    527 
    528             return newImpl.release();
    529         }
    530 
    531         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
    532         for (int32_t i = 0; i < length; ++i)
    533             data8[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
    534 
    535         return newImpl.release();
    536     }
    537 
    538     const UChar* end = characters16() + m_length;
    539     for (const UChar* chp = characters16(); chp != end; ++chp) {
    540         if (UNLIKELY(isASCIIUpper(*chp)))
    541             noUpper = false;
    542         ored |= *chp;
    543     }
    544     // Nothing to do if the string is all ASCII with no uppercase.
    545     if (noUpper && !(ored & ~0x7F))
    546         return this;
    547 
    548     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
    549     int32_t length = m_length;
    550 
    551     if (!(ored & ~0x7F)) {
    552         UChar* data16;
    553         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
    554 
    555         for (int32_t i = 0; i < length; ++i) {
    556             UChar c = characters16()[i];
    557             data16[i] = toASCIILower(c);
    558         }
    559         return newImpl.release();
    560     }
    561 
    562     // Do a slower implementation for cases that include non-ASCII characters.
    563     UChar* data16;
    564     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
    565 
    566     bool error;
    567     int32_t realLength = Unicode::toLower(data16, length, characters16(), m_length, &error);
    568     if (!error && realLength == length)
    569         return newImpl.release();
    570 
    571     newImpl = createUninitialized(realLength, data16);
    572     Unicode::toLower(data16, realLength, characters16(), m_length, &error);
    573     if (error)
    574         return this;
    575     return newImpl.release();
    576 }
    577 
    578 PassRefPtr<StringImpl> StringImpl::upper()
    579 {
    580     // This function could be optimized for no-op cases the way lower() is,
    581     // but in empirical testing, few actual calls to upper() are no-ops, so
    582     // it wouldn't be worth the extra time for pre-scanning.
    583 
    584     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
    585     int32_t length = m_length;
    586 
    587     if (is8Bit()) {
    588         LChar* data8;
    589         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
    590 
    591         // Do a faster loop for the case where all the characters are ASCII.
    592         LChar ored = 0;
    593         for (int i = 0; i < length; ++i) {
    594             LChar c = characters8()[i];
    595             ored |= c;
    596             data8[i] = toASCIIUpper(c);
    597         }
    598         if (!(ored & ~0x7F))
    599             return newImpl.release();
    600 
    601         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
    602         int numberSharpSCharacters = 0;
    603 
    604         // There are two special cases.
    605         //  1. latin-1 characters when converted to upper case are 16 bit characters.
    606         //  2. Lower case sharp-S converts to "SS" (two characters)
    607         for (int32_t i = 0; i < length; ++i) {
    608             LChar c = characters8()[i];
    609             if (UNLIKELY(c == smallLetterSharpS))
    610                 ++numberSharpSCharacters;
    611             UChar upper = Unicode::toUpper(c);
    612             if (UNLIKELY(upper > 0xff)) {
    613                 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
    614                 goto upconvert;
    615             }
    616             data8[i] = static_cast<LChar>(upper);
    617         }
    618 
    619         if (!numberSharpSCharacters)
    620             return newImpl.release();
    621 
    622         // We have numberSSCharacters sharp-s characters, but none of the other special characters.
    623         newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
    624 
    625         LChar* dest = data8;
    626 
    627         for (int32_t i = 0; i < length; ++i) {
    628             LChar c = characters8()[i];
    629             if (c == smallLetterSharpS) {
    630                 *dest++ = 'S';
    631                 *dest++ = 'S';
    632             } else
    633                 *dest++ = static_cast<LChar>(Unicode::toUpper(c));
    634         }
    635 
    636         return newImpl.release();
    637     }
    638 
    639 upconvert:
    640     RefPtr<StringImpl> upconverted = upconvertedString();
    641     const UChar* source16 = upconverted->characters16();
    642 
    643     UChar* data16;
    644     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
    645 
    646     // Do a faster loop for the case where all the characters are ASCII.
    647     UChar ored = 0;
    648     for (int i = 0; i < length; ++i) {
    649         UChar c = source16[i];
    650         ored |= c;
    651         data16[i] = toASCIIUpper(c);
    652     }
    653     if (!(ored & ~0x7F))
    654         return newImpl.release();
    655 
    656     // Do a slower implementation for cases that include non-ASCII characters.
    657     bool error;
    658     int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &error);
    659     if (!error && realLength == length)
    660         return newImpl;
    661     newImpl = createUninitialized(realLength, data16);
    662     Unicode::toUpper(data16, realLength, source16, m_length, &error);
    663     if (error)
    664         return this;
    665     return newImpl.release();
    666 }
    667 
    668 static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang)
    669 {
    670     if (equalIgnoringCase(localeId, lang))
    671         return true;
    672     static char localeIdPrefix[4];
    673     static const char delimeter[4] = "-_@";
    674 
    675     size_t langLength = strlen(lang);
    676     RELEASE_ASSERT(langLength >= 2 && langLength <= 3);
    677     strncpy(localeIdPrefix, lang, langLength);
    678     for (int i = 0; i < 3; ++i) {
    679         localeIdPrefix[langLength] = delimeter[i];
    680         // case-insensitive comparison
    681         if (localeId.impl() && localeId.impl()->startsWith(localeIdPrefix, langLength + 1, false))
    682             return true;
    683     }
    684     return false;
    685 }
    686 
    687 typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, const char*, UErrorCode*);
    688 
    689 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length, icuCaseConverter converter, const char* locale, StringImpl* originalString)
    690 {
    691     UChar* data16;
    692     int32_t targetLength = length;
    693     RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16);
    694     do {
    695         UErrorCode status = U_ZERO_ERROR;
    696         targetLength = converter(data16, targetLength, source16, length, locale, &status);
    697         if (U_SUCCESS(status)) {
    698             output->truncateAssumingIsolated(targetLength);
    699             return output.release();
    700         }
    701         if (status != U_BUFFER_OVERFLOW_ERROR)
    702             return originalString;
    703         // Expand the buffer.
    704         output = StringImpl::createUninitialized(targetLength, data16);
    705     } while (true);
    706 }
    707 
    708 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
    709 {
    710     // Use the more-optimized code path most of the time.
    711     // Only Turkic (tr and az) languages and Lithuanian requires
    712     // locale-specific lowercasing rules. Even though CLDR has el-Lower,
    713     // it's identical to the locale-agnostic lowercasing. Context-dependent
    714     // handling of Greek capital sigma is built into the common lowercasing
    715     // function in ICU.
    716     const char* localeForConversion = 0;
    717     if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
    718         localeForConversion = "tr";
    719     else if (localeIdMatchesLang(localeIdentifier, "lt"))
    720         localeForConversion = "lt";
    721     else
    722         return lower();
    723 
    724     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
    725         CRASH();
    726     int length = m_length;
    727 
    728     RefPtr<StringImpl> upconverted = upconvertedString();
    729     const UChar* source16 = upconverted->characters16();
    730     return caseConvert(source16, length, u_strToLower, localeForConversion, this);
    731 }
    732 
    733 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier)
    734 {
    735     // Use the more-optimized code path most of the time.
    736     // Only Turkic (tr and az) languages and Greek require locale-specific
    737     // lowercasing rules.
    738     icu::UnicodeString transliteratorId;
    739     const char* localeForConversion = 0;
    740     if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
    741         localeForConversion = "tr";
    742     else if (localeIdMatchesLang(localeIdentifier, "el"))
    743         transliteratorId = UNICODE_STRING_SIMPLE("el-Upper");
    744     else if (localeIdMatchesLang(localeIdentifier, "lt"))
    745         localeForConversion = "lt";
    746     else
    747         return upper();
    748 
    749     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
    750         CRASH();
    751     int length = m_length;
    752 
    753     RefPtr<StringImpl> upconverted = upconvertedString();
    754     const UChar* source16 = upconverted->characters16();
    755 
    756     if (localeForConversion)
    757         return caseConvert(source16, length, u_strToUpper, localeForConversion, this);
    758 
    759     // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek.
    760     UErrorCode status = U_ZERO_ERROR;
    761     OwnPtr<icu::Transliterator> translit =
    762         adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FORWARD, status));
    763     if (U_FAILURE(status))
    764         return upper();
    765 
    766     // target will be copy-on-write.
    767     icu::UnicodeString target(false, source16, length);
    768     translit->transliterate(target);
    769 
    770     return create(target.getBuffer(), target.length());
    771 }
    772 
    773 PassRefPtr<StringImpl> StringImpl::fill(UChar character)
    774 {
    775     if (!(character & ~0x7F)) {
    776         LChar* data;
    777         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    778         for (unsigned i = 0; i < m_length; ++i)
    779             data[i] = character;
    780         return newImpl.release();
    781     }
    782     UChar* data;
    783     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    784     for (unsigned i = 0; i < m_length; ++i)
    785         data[i] = character;
    786     return newImpl.release();
    787 }
    788 
    789 PassRefPtr<StringImpl> StringImpl::foldCase()
    790 {
    791     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
    792     int32_t length = m_length;
    793 
    794     if (is8Bit()) {
    795         // Do a faster loop for the case where all the characters are ASCII.
    796         LChar* data;
    797         RefPtr <StringImpl>newImpl = createUninitialized(m_length, data);
    798         LChar ored = 0;
    799 
    800         for (int32_t i = 0; i < length; ++i) {
    801             LChar c = characters8()[i];
    802             data[i] = toASCIILower(c);
    803             ored |= c;
    804         }
    805 
    806         if (!(ored & ~0x7F))
    807             return newImpl.release();
    808 
    809         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
    810         for (int32_t i = 0; i < length; ++i)
    811             data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
    812 
    813         return newImpl.release();
    814     }
    815 
    816     // Do a faster loop for the case where all the characters are ASCII.
    817     UChar* data;
    818     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    819     UChar ored = 0;
    820     for (int32_t i = 0; i < length; ++i) {
    821         UChar c = characters16()[i];
    822         ored |= c;
    823         data[i] = toASCIILower(c);
    824     }
    825     if (!(ored & ~0x7F))
    826         return newImpl.release();
    827 
    828     // Do a slower implementation for cases that include non-ASCII characters.
    829     bool error;
    830     int32_t realLength = Unicode::foldCase(data, length, characters16(), m_length, &error);
    831     if (!error && realLength == length)
    832         return newImpl.release();
    833     newImpl = createUninitialized(realLength, data);
    834     Unicode::foldCase(data, realLength, characters16(), m_length, &error);
    835     if (error)
    836         return this;
    837     return newImpl.release();
    838 }
    839 
    840 template <class UCharPredicate>
    841 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)
    842 {
    843     if (!m_length)
    844         return empty();
    845 
    846     unsigned start = 0;
    847     unsigned end = m_length - 1;
    848 
    849     // skip white space from start
    850     while (start <= end && predicate(is8Bit() ? characters8()[start] : characters16()[start]))
    851         ++start;
    852 
    853     // only white space
    854     if (start > end)
    855         return empty();
    856 
    857     // skip white space from end
    858     while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))
    859         --end;
    860 
    861     if (!start && end == m_length - 1)
    862         return this;
    863     if (is8Bit())
    864         return create(characters8() + start, end + 1 - start);
    865     return create(characters16() + start, end + 1 - start);
    866 }
    867 
    868 class UCharPredicate {
    869 public:
    870     inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(function) { }
    871 
    872     inline bool operator()(UChar ch) const
    873     {
    874         return m_function(ch);
    875     }
    876 
    877 private:
    878     const CharacterMatchFunctionPtr m_function;
    879 };
    880 
    881 class SpaceOrNewlinePredicate {
    882 public:
    883     inline bool operator()(UChar ch) const
    884     {
    885         return isSpaceOrNewline(ch);
    886     }
    887 };
    888 
    889 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
    890 {
    891     return stripMatchedCharacters(SpaceOrNewlinePredicate());
    892 }
    893 
    894 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)
    895 {
    896     return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
    897 }
    898 
    899 template <typename CharType>
    900 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch)
    901 {
    902     const CharType* from = characters;
    903     const CharType* fromend = from + m_length;
    904 
    905     // Assume the common case will not remove any characters
    906     while (from != fromend && !findMatch(*from))
    907         ++from;
    908     if (from == fromend)
    909         return this;
    910 
    911     StringBuffer<CharType> data(m_length);
    912     CharType* to = data.characters();
    913     unsigned outc = from - characters;
    914 
    915     if (outc)
    916         memcpy(to, characters, outc * sizeof(CharType));
    917 
    918     while (true) {
    919         while (from != fromend && findMatch(*from))
    920             ++from;
    921         while (from != fromend && !findMatch(*from))
    922             to[outc++] = *from++;
    923         if (from == fromend)
    924             break;
    925     }
    926 
    927     data.shrink(outc);
    928 
    929     return data.release();
    930 }
    931 
    932 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
    933 {
    934     if (is8Bit())
    935         return removeCharacters(characters8(), findMatch);
    936     return removeCharacters(characters16(), findMatch);
    937 }
    938 
    939 template <typename CharType, class UCharPredicate>
    940 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate, StripBehavior stripBehavior)
    941 {
    942     StringBuffer<CharType> data(m_length);
    943 
    944     const CharType* from = getCharacters<CharType>();
    945     const CharType* fromend = from + m_length;
    946     int outc = 0;
    947     bool changedToSpace = false;
    948 
    949     CharType* to = data.characters();
    950 
    951     if (stripBehavior == StripExtraWhiteSpace) {
    952         while (true) {
    953             while (from != fromend && predicate(*from)) {
    954                 if (*from != ' ')
    955                     changedToSpace = true;
    956                 ++from;
    957             }
    958             while (from != fromend && !predicate(*from))
    959                 to[outc++] = *from++;
    960             if (from != fromend)
    961                 to[outc++] = ' ';
    962             else
    963                 break;
    964         }
    965 
    966         if (outc > 0 && to[outc - 1] == ' ')
    967             --outc;
    968     } else {
    969         for (; from != fromend; ++from) {
    970             if (predicate(*from)) {
    971                 if (*from != ' ')
    972                     changedToSpace = true;
    973                 to[outc++] = ' ';
    974             } else {
    975                 to[outc++] = *from;
    976             }
    977         }
    978     }
    979 
    980     if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
    981         return this;
    982 
    983     data.shrink(outc);
    984 
    985     return data.release();
    986 }
    987 
    988 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavior)
    989 {
    990     if (is8Bit())
    991         return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePredicate(), stripBehavior);
    992     return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredicate(), stripBehavior);
    993 }
    994 
    995 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, StripBehavior stripBehavior)
    996 {
    997     if (is8Bit())
    998         return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(isWhiteSpace), stripBehavior);
    999     return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(isWhiteSpace), stripBehavior);
   1000 }
   1001 
   1002 int StringImpl::toIntStrict(bool* ok, int base)
   1003 {
   1004     if (is8Bit())
   1005         return charactersToIntStrict(characters8(), m_length, ok, base);
   1006     return charactersToIntStrict(characters16(), m_length, ok, base);
   1007 }
   1008 
   1009 unsigned StringImpl::toUIntStrict(bool* ok, int base)
   1010 {
   1011     if (is8Bit())
   1012         return charactersToUIntStrict(characters8(), m_length, ok, base);
   1013     return charactersToUIntStrict(characters16(), m_length, ok, base);
   1014 }
   1015 
   1016 int64_t StringImpl::toInt64Strict(bool* ok, int base)
   1017 {
   1018     if (is8Bit())
   1019         return charactersToInt64Strict(characters8(), m_length, ok, base);
   1020     return charactersToInt64Strict(characters16(), m_length, ok, base);
   1021 }
   1022 
   1023 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
   1024 {
   1025     if (is8Bit())
   1026         return charactersToUInt64Strict(characters8(), m_length, ok, base);
   1027     return charactersToUInt64Strict(characters16(), m_length, ok, base);
   1028 }
   1029 
   1030 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
   1031 {
   1032     if (is8Bit())
   1033         return charactersToIntPtrStrict(characters8(), m_length, ok, base);
   1034     return charactersToIntPtrStrict(characters16(), m_length, ok, base);
   1035 }
   1036 
   1037 int StringImpl::toInt(bool* ok)
   1038 {
   1039     if (is8Bit())
   1040         return charactersToInt(characters8(), m_length, ok);
   1041     return charactersToInt(characters16(), m_length, ok);
   1042 }
   1043 
   1044 unsigned StringImpl::toUInt(bool* ok)
   1045 {
   1046     if (is8Bit())
   1047         return charactersToUInt(characters8(), m_length, ok);
   1048     return charactersToUInt(characters16(), m_length, ok);
   1049 }
   1050 
   1051 int64_t StringImpl::toInt64(bool* ok)
   1052 {
   1053     if (is8Bit())
   1054         return charactersToInt64(characters8(), m_length, ok);
   1055     return charactersToInt64(characters16(), m_length, ok);
   1056 }
   1057 
   1058 uint64_t StringImpl::toUInt64(bool* ok)
   1059 {
   1060     if (is8Bit())
   1061         return charactersToUInt64(characters8(), m_length, ok);
   1062     return charactersToUInt64(characters16(), m_length, ok);
   1063 }
   1064 
   1065 intptr_t StringImpl::toIntPtr(bool* ok)
   1066 {
   1067     if (is8Bit())
   1068         return charactersToIntPtr(characters8(), m_length, ok);
   1069     return charactersToIntPtr(characters16(), m_length, ok);
   1070 }
   1071 
   1072 double StringImpl::toDouble(bool* ok)
   1073 {
   1074     if (is8Bit())
   1075         return charactersToDouble(characters8(), m_length, ok);
   1076     return charactersToDouble(characters16(), m_length, ok);
   1077 }
   1078 
   1079 float StringImpl::toFloat(bool* ok)
   1080 {
   1081     if (is8Bit())
   1082         return charactersToFloat(characters8(), m_length, ok);
   1083     return charactersToFloat(characters16(), m_length, ok);
   1084 }
   1085 
   1086 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length)
   1087 {
   1088     while (length--) {
   1089         LChar bc = *b++;
   1090         if (foldCase(*a++) != foldCase(bc))
   1091             return false;
   1092     }
   1093     return true;
   1094 }
   1095 
   1096 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length)
   1097 {
   1098     while (length--) {
   1099         LChar bc = *b++;
   1100         if (foldCase(*a++) != foldCase(bc))
   1101             return false;
   1102     }
   1103     return true;
   1104 }
   1105 
   1106 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
   1107 {
   1108     if (is8Bit())
   1109         return WTF::find(characters8(), m_length, matchFunction, start);
   1110     return WTF::find(characters16(), m_length, matchFunction, start);
   1111 }
   1112 
   1113 size_t StringImpl::find(const LChar* matchString, unsigned index)
   1114 {
   1115     // Check for null or empty string to match against
   1116     if (!matchString)
   1117         return kNotFound;
   1118     size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
   1119     RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
   1120     unsigned matchLength = matchStringLength;
   1121     if (!matchLength)
   1122         return min(index, length());
   1123 
   1124     // Optimization 1: fast case for strings of length 1.
   1125     if (matchLength == 1)
   1126         return WTF::find(characters16(), length(), *matchString, index);
   1127 
   1128     // Check index & matchLength are in range.
   1129     if (index > length())
   1130         return kNotFound;
   1131     unsigned searchLength = length() - index;
   1132     if (matchLength > searchLength)
   1133         return kNotFound;
   1134     // delta is the number of additional times to test; delta == 0 means test only once.
   1135     unsigned delta = searchLength - matchLength;
   1136 
   1137     const UChar* searchCharacters = characters16() + index;
   1138 
   1139     // Optimization 2: keep a running hash of the strings,
   1140     // only call equal if the hashes match.
   1141     unsigned searchHash = 0;
   1142     unsigned matchHash = 0;
   1143     for (unsigned i = 0; i < matchLength; ++i) {
   1144         searchHash += searchCharacters[i];
   1145         matchHash += matchString[i];
   1146     }
   1147 
   1148     unsigned i = 0;
   1149     // keep looping until we match
   1150     while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
   1151         if (i == delta)
   1152             return kNotFound;
   1153         searchHash += searchCharacters[i + matchLength];
   1154         searchHash -= searchCharacters[i];
   1155         ++i;
   1156     }
   1157     return index + i;
   1158 }
   1159 
   1160 template<typename CharType>
   1161 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchLength)
   1162 {
   1163     // delta is the number of additional times to test; delta == 0 means test only once.
   1164     unsigned delta = searchLength - matchLength;
   1165 
   1166     unsigned i = 0;
   1167     while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
   1168         if (i == delta)
   1169             return kNotFound;
   1170         ++i;
   1171     }
   1172     return index + i;
   1173 }
   1174 
   1175 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index)
   1176 {
   1177     // Check for null or empty string to match against
   1178     if (!matchString)
   1179         return kNotFound;
   1180     size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
   1181     RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
   1182     unsigned matchLength = matchStringLength;
   1183     if (!matchLength)
   1184         return min(index, length());
   1185 
   1186     // Check index & matchLength are in range.
   1187     if (index > length())
   1188         return kNotFound;
   1189     unsigned searchLength = length() - index;
   1190     if (matchLength > searchLength)
   1191         return kNotFound;
   1192 
   1193     if (is8Bit())
   1194         return findIgnoringCaseInternal(characters8() + index, matchString, index, searchLength, matchLength);
   1195     return findIgnoringCaseInternal(characters16() + index, matchString, index, searchLength, matchLength);
   1196 }
   1197 
   1198 template <typename SearchCharacterType, typename MatchCharacterType>
   1199 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
   1200 {
   1201     // Optimization: keep a running hash of the strings,
   1202     // only call equal() if the hashes match.
   1203 
   1204     // delta is the number of additional times to test; delta == 0 means test only once.
   1205     unsigned delta = searchLength - matchLength;
   1206 
   1207     unsigned searchHash = 0;
   1208     unsigned matchHash = 0;
   1209 
   1210     for (unsigned i = 0; i < matchLength; ++i) {
   1211         searchHash += searchCharacters[i];
   1212         matchHash += matchCharacters[i];
   1213     }
   1214 
   1215     unsigned i = 0;
   1216     // keep looping until we match
   1217     while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) {
   1218         if (i == delta)
   1219             return kNotFound;
   1220         searchHash += searchCharacters[i + matchLength];
   1221         searchHash -= searchCharacters[i];
   1222         ++i;
   1223     }
   1224     return index + i;
   1225 }
   1226 
   1227 size_t StringImpl::find(StringImpl* matchString)
   1228 {
   1229     // Check for null string to match against
   1230     if (UNLIKELY(!matchString))
   1231         return kNotFound;
   1232     unsigned matchLength = matchString->length();
   1233 
   1234     // Optimization 1: fast case for strings of length 1.
   1235     if (matchLength == 1) {
   1236         if (is8Bit()) {
   1237             if (matchString->is8Bit())
   1238                 return WTF::find(characters8(), length(), matchString->characters8()[0]);
   1239             return WTF::find(characters8(), length(), matchString->characters16()[0]);
   1240         }
   1241         if (matchString->is8Bit())
   1242             return WTF::find(characters16(), length(), matchString->characters8()[0]);
   1243         return WTF::find(characters16(), length(), matchString->characters16()[0]);
   1244     }
   1245 
   1246     // Check matchLength is in range.
   1247     if (matchLength > length())
   1248         return kNotFound;
   1249 
   1250     // Check for empty string to match against
   1251     if (UNLIKELY(!matchLength))
   1252         return 0;
   1253 
   1254     if (is8Bit()) {
   1255         if (matchString->is8Bit())
   1256             return findInternal(characters8(), matchString->characters8(), 0, length(), matchLength);
   1257         return findInternal(characters8(), matchString->characters16(), 0, length(), matchLength);
   1258     }
   1259 
   1260     if (matchString->is8Bit())
   1261         return findInternal(characters16(), matchString->characters8(), 0, length(), matchLength);
   1262 
   1263     return findInternal(characters16(), matchString->characters16(), 0, length(), matchLength);
   1264 }
   1265 
   1266 size_t StringImpl::find(StringImpl* matchString, unsigned index)
   1267 {
   1268     // Check for null or empty string to match against
   1269     if (UNLIKELY(!matchString))
   1270         return kNotFound;
   1271 
   1272     unsigned matchLength = matchString->length();
   1273 
   1274     // Optimization 1: fast case for strings of length 1.
   1275     if (matchLength == 1) {
   1276         if (is8Bit())
   1277             return WTF::find(characters8(), length(), (*matchString)[0], index);
   1278         return WTF::find(characters16(), length(), (*matchString)[0], index);
   1279     }
   1280 
   1281     if (UNLIKELY(!matchLength))
   1282         return min(index, length());
   1283 
   1284     // Check index & matchLength are in range.
   1285     if (index > length())
   1286         return kNotFound;
   1287     unsigned searchLength = length() - index;
   1288     if (matchLength > searchLength)
   1289         return kNotFound;
   1290 
   1291     if (is8Bit()) {
   1292         if (matchString->is8Bit())
   1293             return findInternal(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
   1294         return findInternal(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
   1295     }
   1296 
   1297     if (matchString->is8Bit())
   1298         return findInternal(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
   1299 
   1300     return findInternal(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
   1301 }
   1302 
   1303 template <typename SearchCharacterType, typename MatchCharacterType>
   1304 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
   1305 {
   1306     // delta is the number of additional times to test; delta == 0 means test only once.
   1307     unsigned delta = searchLength - matchLength;
   1308 
   1309     unsigned i = 0;
   1310     // keep looping until we match
   1311     while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
   1312         if (i == delta)
   1313             return kNotFound;
   1314         ++i;
   1315     }
   1316     return index + i;
   1317 }
   1318 
   1319 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
   1320 {
   1321     // Check for null or empty string to match against
   1322     if (!matchString)
   1323         return kNotFound;
   1324     unsigned matchLength = matchString->length();
   1325     if (!matchLength)
   1326         return min(index, length());
   1327 
   1328     // Check index & matchLength are in range.
   1329     if (index > length())
   1330         return kNotFound;
   1331     unsigned searchLength = length() - index;
   1332     if (matchLength > searchLength)
   1333         return kNotFound;
   1334 
   1335     if (is8Bit()) {
   1336         if (matchString->is8Bit())
   1337             return findIgnoringCaseInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
   1338         return findIgnoringCaseInner(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
   1339     }
   1340 
   1341     if (matchString->is8Bit())
   1342         return findIgnoringCaseInner(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
   1343 
   1344     return findIgnoringCaseInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
   1345 }
   1346 
   1347 size_t StringImpl::findNextLineStart(unsigned index)
   1348 {
   1349     if (is8Bit())
   1350         return WTF::findNextLineStart(characters8(), m_length, index);
   1351     return WTF::findNextLineStart(characters16(), m_length, index);
   1352 }
   1353 
   1354 size_t StringImpl::count(LChar c) const
   1355 {
   1356     int count = 0;
   1357     if (is8Bit()) {
   1358         for (size_t i = 0; i < m_length; ++i)
   1359             count += characters8()[i] == c;
   1360     } else {
   1361         for (size_t i = 0; i < m_length; ++i)
   1362             count += characters16()[i] == c;
   1363     }
   1364     return count;
   1365 }
   1366 
   1367 size_t StringImpl::reverseFind(UChar c, unsigned index)
   1368 {
   1369     if (is8Bit())
   1370         return WTF::reverseFind(characters8(), m_length, c, index);
   1371     return WTF::reverseFind(characters16(), m_length, c, index);
   1372 }
   1373 
   1374 template <typename SearchCharacterType, typename MatchCharacterType>
   1375 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
   1376 {
   1377     // Optimization: keep a running hash of the strings,
   1378     // only call equal if the hashes match.
   1379 
   1380     // delta is the number of additional times to test; delta == 0 means test only once.
   1381     unsigned delta = min(index, length - matchLength);
   1382 
   1383     unsigned searchHash = 0;
   1384     unsigned matchHash = 0;
   1385     for (unsigned i = 0; i < matchLength; ++i) {
   1386         searchHash += searchCharacters[delta + i];
   1387         matchHash += matchCharacters[i];
   1388     }
   1389 
   1390     // keep looping until we match
   1391     while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharacters, matchLength)) {
   1392         if (!delta)
   1393             return kNotFound;
   1394         --delta;
   1395         searchHash -= searchCharacters[delta + matchLength];
   1396         searchHash += searchCharacters[delta];
   1397     }
   1398     return delta;
   1399 }
   1400 
   1401 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
   1402 {
   1403     // Check for null or empty string to match against
   1404     if (!matchString)
   1405         return kNotFound;
   1406     unsigned matchLength = matchString->length();
   1407     unsigned ourLength = length();
   1408     if (!matchLength)
   1409         return min(index, ourLength);
   1410 
   1411     // Optimization 1: fast case for strings of length 1.
   1412     if (matchLength == 1) {
   1413         if (is8Bit())
   1414             return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index);
   1415         return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], index);
   1416     }
   1417 
   1418     // Check index & matchLength are in range.
   1419     if (matchLength > ourLength)
   1420         return kNotFound;
   1421 
   1422     if (is8Bit()) {
   1423         if (matchString->is8Bit())
   1424             return reverseFindInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
   1425         return reverseFindInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
   1426     }
   1427 
   1428     if (matchString->is8Bit())
   1429         return reverseFindInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
   1430 
   1431     return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
   1432 }
   1433 
   1434 template <typename SearchCharacterType, typename MatchCharacterType>
   1435 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
   1436 {
   1437     // delta is the number of additional times to test; delta == 0 means test only once.
   1438     unsigned delta = min(index, length - matchLength);
   1439 
   1440     // keep looping until we match
   1441     while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
   1442         if (!delta)
   1443             return kNotFound;
   1444         --delta;
   1445     }
   1446     return delta;
   1447 }
   1448 
   1449 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
   1450 {
   1451     // Check for null or empty string to match against
   1452     if (!matchString)
   1453         return kNotFound;
   1454     unsigned matchLength = matchString->length();
   1455     unsigned ourLength = length();
   1456     if (!matchLength)
   1457         return min(index, ourLength);
   1458 
   1459     // Check index & matchLength are in range.
   1460     if (matchLength > ourLength)
   1461         return kNotFound;
   1462 
   1463     if (is8Bit()) {
   1464         if (matchString->is8Bit())
   1465             return reverseFindIgnoringCaseInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
   1466         return reverseFindIgnoringCaseInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
   1467     }
   1468 
   1469     if (matchString->is8Bit())
   1470         return reverseFindIgnoringCaseInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
   1471 
   1472     return reverseFindIgnoringCaseInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
   1473 }
   1474 
   1475 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned startOffset, const char* matchString, unsigned matchLength, bool caseSensitive)
   1476 {
   1477     ASSERT(stringImpl);
   1478     ASSERT(matchLength <= stringImpl->length());
   1479     ASSERT(startOffset + matchLength <= stringImpl->length());
   1480 
   1481     if (caseSensitive) {
   1482         if (stringImpl->is8Bit())
   1483             return equal(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
   1484         return equal(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
   1485     }
   1486     if (stringImpl->is8Bit())
   1487         return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
   1488     return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
   1489 }
   1490 
   1491 bool StringImpl::startsWith(UChar character) const
   1492 {
   1493     return m_length && (*this)[0] == character;
   1494 }
   1495 
   1496 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
   1497 {
   1498     ASSERT(matchLength);
   1499     if (matchLength > length())
   1500         return false;
   1501     return equalInner(this, 0, matchString, matchLength, caseSensitive);
   1502 }
   1503 
   1504 bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive)
   1505 {
   1506     ASSERT(matchString);
   1507     if (m_length >= matchString->m_length) {
   1508         unsigned start = m_length - matchString->m_length;
   1509         return (caseSensitive ? find(matchString, start) : findIgnoringCase(matchString, start)) == start;
   1510     }
   1511     return false;
   1512 }
   1513 
   1514 bool StringImpl::endsWith(UChar character) const
   1515 {
   1516     return m_length && (*this)[m_length - 1] == character;
   1517 }
   1518 
   1519 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
   1520 {
   1521     ASSERT(matchLength);
   1522     if (matchLength > length())
   1523         return false;
   1524     unsigned startOffset = length() - matchLength;
   1525     return equalInner(this, startOffset, matchString, matchLength, caseSensitive);
   1526 }
   1527 
   1528 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
   1529 {
   1530     if (oldC == newC)
   1531         return this;
   1532 
   1533     if (find(oldC) == kNotFound)
   1534         return this;
   1535 
   1536     unsigned i;
   1537     if (is8Bit()) {
   1538         if (newC <= 0xff) {
   1539             LChar* data;
   1540             LChar oldChar = static_cast<LChar>(oldC);
   1541             LChar newChar = static_cast<LChar>(newC);
   1542 
   1543             RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
   1544 
   1545             for (i = 0; i != m_length; ++i) {
   1546                 LChar ch = characters8()[i];
   1547                 if (ch == oldChar)
   1548                     ch = newChar;
   1549                 data[i] = ch;
   1550             }
   1551             return newImpl.release();
   1552         }
   1553 
   1554         // There is the possibility we need to up convert from 8 to 16 bit,
   1555         // create a 16 bit string for the result.
   1556         UChar* data;
   1557         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
   1558 
   1559         for (i = 0; i != m_length; ++i) {
   1560             UChar ch = characters8()[i];
   1561             if (ch == oldC)
   1562                 ch = newC;
   1563             data[i] = ch;
   1564         }
   1565 
   1566         return newImpl.release();
   1567     }
   1568 
   1569     UChar* data;
   1570     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
   1571 
   1572     for (i = 0; i != m_length; ++i) {
   1573         UChar ch = characters16()[i];
   1574         if (ch == oldC)
   1575             ch = newC;
   1576         data[i] = ch;
   1577     }
   1578     return newImpl.release();
   1579 }
   1580 
   1581 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
   1582 {
   1583     position = min(position, length());
   1584     lengthToReplace = min(lengthToReplace, length() - position);
   1585     unsigned lengthToInsert = str ? str->length() : 0;
   1586     if (!lengthToReplace && !lengthToInsert)
   1587         return this;
   1588 
   1589     RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max() - lengthToInsert));
   1590 
   1591     if (is8Bit() && (!str || str->is8Bit())) {
   1592         LChar* data;
   1593         RefPtr<StringImpl> newImpl =
   1594         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
   1595         memcpy(data, characters8(), position * sizeof(LChar));
   1596         if (str)
   1597             memcpy(data + position, str->characters8(), lengthToInsert * sizeof(LChar));
   1598         memcpy(data + position + lengthToInsert, characters8() + position + lengthToReplace,
   1599                (length() - position - lengthToReplace) * sizeof(LChar));
   1600         return newImpl.release();
   1601     }
   1602     UChar* data;
   1603     RefPtr<StringImpl> newImpl =
   1604         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
   1605     if (is8Bit())
   1606         for (unsigned i = 0; i < position; ++i)
   1607             data[i] = characters8()[i];
   1608     else
   1609         memcpy(data, characters16(), position * sizeof(UChar));
   1610     if (str) {
   1611         if (str->is8Bit())
   1612             for (unsigned i = 0; i < lengthToInsert; ++i)
   1613                 data[i + position] = str->characters8()[i];
   1614         else
   1615             memcpy(data + position, str->characters16(), lengthToInsert * sizeof(UChar));
   1616     }
   1617     if (is8Bit()) {
   1618         for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
   1619             data[i + position + lengthToInsert] = characters8()[i + position + lengthToReplace];
   1620     } else {
   1621         memcpy(data + position + lengthToInsert, characters16() + position + lengthToReplace,
   1622             (length() - position - lengthToReplace) * sizeof(UChar));
   1623     }
   1624     return newImpl.release();
   1625 }
   1626 
   1627 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
   1628 {
   1629     if (!replacement)
   1630         return this;
   1631 
   1632     if (replacement->is8Bit())
   1633         return replace(pattern, replacement->characters8(), replacement->length());
   1634 
   1635     return replace(pattern, replacement->characters16(), replacement->length());
   1636 }
   1637 
   1638 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength)
   1639 {
   1640     ASSERT(replacement);
   1641 
   1642     size_t srcSegmentStart = 0;
   1643     unsigned matchCount = 0;
   1644 
   1645     // Count the matches.
   1646     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
   1647         ++matchCount;
   1648         ++srcSegmentStart;
   1649     }
   1650 
   1651     // If we have 0 matches then we don't have to do any more work.
   1652     if (!matchCount)
   1653         return this;
   1654 
   1655     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
   1656 
   1657     unsigned replaceSize = matchCount * repStrLength;
   1658     unsigned newSize = m_length - matchCount;
   1659     RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
   1660 
   1661     newSize += replaceSize;
   1662 
   1663     // Construct the new data.
   1664     size_t srcSegmentEnd;
   1665     unsigned srcSegmentLength;
   1666     srcSegmentStart = 0;
   1667     unsigned dstOffset = 0;
   1668 
   1669     if (is8Bit()) {
   1670         LChar* data;
   1671         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
   1672 
   1673         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
   1674             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
   1675             memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
   1676             dstOffset += srcSegmentLength;
   1677             memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
   1678             dstOffset += repStrLength;
   1679             srcSegmentStart = srcSegmentEnd + 1;
   1680         }
   1681 
   1682         srcSegmentLength = m_length - srcSegmentStart;
   1683         memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
   1684 
   1685         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
   1686 
   1687         return newImpl.release();
   1688     }
   1689 
   1690     UChar* data;
   1691     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
   1692 
   1693     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
   1694         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
   1695         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
   1696 
   1697         dstOffset += srcSegmentLength;
   1698         for (unsigned i = 0; i < repStrLength; ++i)
   1699             data[i + dstOffset] = replacement[i];
   1700 
   1701         dstOffset += repStrLength;
   1702         srcSegmentStart = srcSegmentEnd + 1;
   1703     }
   1704 
   1705     srcSegmentLength = m_length - srcSegmentStart;
   1706     memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
   1707 
   1708     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
   1709 
   1710     return newImpl.release();
   1711 }
   1712 
   1713 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength)
   1714 {
   1715     ASSERT(replacement);
   1716 
   1717     size_t srcSegmentStart = 0;
   1718     unsigned matchCount = 0;
   1719 
   1720     // Count the matches.
   1721     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
   1722         ++matchCount;
   1723         ++srcSegmentStart;
   1724     }
   1725 
   1726     // If we have 0 matches then we don't have to do any more work.
   1727     if (!matchCount)
   1728         return this;
   1729 
   1730     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
   1731 
   1732     unsigned replaceSize = matchCount * repStrLength;
   1733     unsigned newSize = m_length - matchCount;
   1734     RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
   1735 
   1736     newSize += replaceSize;
   1737 
   1738     // Construct the new data.
   1739     size_t srcSegmentEnd;
   1740     unsigned srcSegmentLength;
   1741     srcSegmentStart = 0;
   1742     unsigned dstOffset = 0;
   1743 
   1744     if (is8Bit()) {
   1745         UChar* data;
   1746         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
   1747 
   1748         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
   1749             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
   1750             for (unsigned i = 0; i < srcSegmentLength; ++i)
   1751                 data[i + dstOffset] = characters8()[i + srcSegmentStart];
   1752 
   1753             dstOffset += srcSegmentLength;
   1754             memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
   1755 
   1756             dstOffset += repStrLength;
   1757             srcSegmentStart = srcSegmentEnd + 1;
   1758         }
   1759 
   1760         srcSegmentLength = m_length - srcSegmentStart;
   1761         for (unsigned i = 0; i < srcSegmentLength; ++i)
   1762             data[i + dstOffset] = characters8()[i + srcSegmentStart];
   1763 
   1764         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
   1765 
   1766         return newImpl.release();
   1767     }
   1768 
   1769     UChar* data;
   1770     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
   1771 
   1772     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
   1773         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
   1774         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
   1775 
   1776         dstOffset += srcSegmentLength;
   1777         memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
   1778 
   1779         dstOffset += repStrLength;
   1780         srcSegmentStart = srcSegmentEnd + 1;
   1781     }
   1782 
   1783     srcSegmentLength = m_length - srcSegmentStart;
   1784     memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
   1785 
   1786     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
   1787 
   1788     return newImpl.release();
   1789 }
   1790 
   1791 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
   1792 {
   1793     if (!pattern || !replacement)
   1794         return this;
   1795 
   1796     unsigned patternLength = pattern->length();
   1797     if (!patternLength)
   1798         return this;
   1799 
   1800     unsigned repStrLength = replacement->length();
   1801     size_t srcSegmentStart = 0;
   1802     unsigned matchCount = 0;
   1803 
   1804     // Count the matches.
   1805     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
   1806         ++matchCount;
   1807         srcSegmentStart += patternLength;
   1808     }
   1809 
   1810     // If we have 0 matches, we don't have to do any more work
   1811     if (!matchCount)
   1812         return this;
   1813 
   1814     unsigned newSize = m_length - matchCount * patternLength;
   1815     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
   1816 
   1817     RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * repStrLength));
   1818 
   1819     newSize += matchCount * repStrLength;
   1820 
   1821 
   1822     // Construct the new data
   1823     size_t srcSegmentEnd;
   1824     unsigned srcSegmentLength;
   1825     srcSegmentStart = 0;
   1826     unsigned dstOffset = 0;
   1827     bool srcIs8Bit = is8Bit();
   1828     bool replacementIs8Bit = replacement->is8Bit();
   1829 
   1830     // There are 4 cases:
   1831     // 1. This and replacement are both 8 bit.
   1832     // 2. This and replacement are both 16 bit.
   1833     // 3. This is 8 bit and replacement is 16 bit.
   1834     // 4. This is 16 bit and replacement is 8 bit.
   1835     if (srcIs8Bit && replacementIs8Bit) {
   1836         // Case 1
   1837         LChar* data;
   1838         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
   1839         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
   1840             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
   1841             memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
   1842             dstOffset += srcSegmentLength;
   1843             memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof(LChar));
   1844             dstOffset += repStrLength;
   1845             srcSegmentStart = srcSegmentEnd + patternLength;
   1846         }
   1847 
   1848         srcSegmentLength = m_length - srcSegmentStart;
   1849         memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
   1850 
   1851         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
   1852 
   1853         return newImpl.release();
   1854     }
   1855 
   1856     UChar* data;
   1857     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
   1858     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
   1859         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
   1860         if (srcIs8Bit) {
   1861             // Case 3.
   1862             for (unsigned i = 0; i < srcSegmentLength; ++i)
   1863                 data[i + dstOffset] = characters8()[i + srcSegmentStart];
   1864         } else {
   1865             // Case 2 & 4.
   1866             memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
   1867         }
   1868         dstOffset += srcSegmentLength;
   1869         if (replacementIs8Bit) {
   1870             // Cases 2 & 3.
   1871             for (unsigned i = 0; i < repStrLength; ++i)
   1872                 data[i + dstOffset] = replacement->characters8()[i];
   1873         } else {
   1874             // Case 4
   1875             memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeof(UChar));
   1876         }
   1877         dstOffset += repStrLength;
   1878         srcSegmentStart = srcSegmentEnd + patternLength;
   1879     }
   1880 
   1881     srcSegmentLength = m_length - srcSegmentStart;
   1882     if (srcIs8Bit) {
   1883         // Case 3.
   1884         for (unsigned i = 0; i < srcSegmentLength; ++i)
   1885             data[i + dstOffset] = characters8()[i + srcSegmentStart];
   1886     } else {
   1887         // Cases 2 & 4.
   1888         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
   1889     }
   1890 
   1891     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
   1892 
   1893     return newImpl.release();
   1894 }
   1895 
   1896 PassRefPtr<StringImpl> StringImpl::upconvertedString()
   1897 {
   1898     if (is8Bit())
   1899         return String::make16BitFrom8BitSource(characters8(), m_length).releaseImpl();
   1900     return this;
   1901 }
   1902 
   1903 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)
   1904 {
   1905     unsigned aLength = a->length();
   1906     unsigned bLength = b->length();
   1907     if (aLength != bLength)
   1908         return false;
   1909 
   1910     if (a->is8Bit()) {
   1911         if (b->is8Bit())
   1912             return equal(a->characters8(), b->characters8(), aLength);
   1913 
   1914         return equal(a->characters8(), b->characters16(), aLength);
   1915     }
   1916 
   1917     if (b->is8Bit())
   1918         return equal(a->characters16(), b->characters8(), aLength);
   1919 
   1920     return equal(a->characters16(), b->characters16(), aLength);
   1921 }
   1922 
   1923 bool equal(const StringImpl* a, const StringImpl* b)
   1924 {
   1925     if (a == b)
   1926         return true;
   1927     if (!a || !b)
   1928         return false;
   1929     if (a->isAtomic() && b->isAtomic())
   1930         return false;
   1931 
   1932     return stringImplContentEqual(a, b);
   1933 }
   1934 
   1935 template <typename CharType>
   1936 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned length)
   1937 {
   1938     if (!a)
   1939         return !b;
   1940     if (!b)
   1941         return false;
   1942 
   1943     if (a->length() != length)
   1944         return false;
   1945     if (a->is8Bit())
   1946         return equal(a->characters8(), b, length);
   1947     return equal(a->characters16(), b, length);
   1948 }
   1949 
   1950 bool equal(const StringImpl* a, const LChar* b, unsigned length)
   1951 {
   1952     return equalInternal(a, b, length);
   1953 }
   1954 
   1955 bool equal(const StringImpl* a, const UChar* b, unsigned length)
   1956 {
   1957     return equalInternal(a, b, length);
   1958 }
   1959 
   1960 bool equal(const StringImpl* a, const LChar* b)
   1961 {
   1962     if (!a)
   1963         return !b;
   1964     if (!b)
   1965         return !a;
   1966 
   1967     unsigned length = a->length();
   1968 
   1969     if (a->is8Bit()) {
   1970         const LChar* aPtr = a->characters8();
   1971         for (unsigned i = 0; i != length; ++i) {
   1972             LChar bc = b[i];
   1973             LChar ac = aPtr[i];
   1974             if (!bc)
   1975                 return false;
   1976             if (ac != bc)
   1977                 return false;
   1978         }
   1979 
   1980         return !b[length];
   1981     }
   1982 
   1983     const UChar* aPtr = a->characters16();
   1984     for (unsigned i = 0; i != length; ++i) {
   1985         LChar bc = b[i];
   1986         if (!bc)
   1987             return false;
   1988         if (aPtr[i] != bc)
   1989             return false;
   1990     }
   1991 
   1992     return !b[length];
   1993 }
   1994 
   1995 bool equalNonNull(const StringImpl* a, const StringImpl* b)
   1996 {
   1997     ASSERT(a && b);
   1998     if (a == b)
   1999         return true;
   2000 
   2001     return stringImplContentEqual(a, b);
   2002 }
   2003 
   2004 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
   2005 {
   2006     if (a == b)
   2007         return true;
   2008     if (!a || !b)
   2009         return false;
   2010 
   2011     return CaseFoldingHash::equal(a, b);
   2012 }
   2013 
   2014 bool equalIgnoringCase(const StringImpl* a, const LChar* b)
   2015 {
   2016     if (!a)
   2017         return !b;
   2018     if (!b)
   2019         return !a;
   2020 
   2021     unsigned length = a->length();
   2022 
   2023     // Do a faster loop for the case where all the characters are ASCII.
   2024     UChar ored = 0;
   2025     bool equal = true;
   2026     if (a->is8Bit()) {
   2027         const LChar* as = a->characters8();
   2028         for (unsigned i = 0; i != length; ++i) {
   2029             LChar bc = b[i];
   2030             if (!bc)
   2031                 return false;
   2032             UChar ac = as[i];
   2033             ored |= ac;
   2034             equal = equal && (toASCIILower(ac) == toASCIILower(bc));
   2035         }
   2036 
   2037         // Do a slower implementation for cases that include non-ASCII characters.
   2038         if (ored & ~0x7F) {
   2039             equal = true;
   2040             for (unsigned i = 0; i != length; ++i)
   2041                 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
   2042         }
   2043 
   2044         return equal && !b[length];
   2045     }
   2046 
   2047     const UChar* as = a->characters16();
   2048     for (unsigned i = 0; i != length; ++i) {
   2049         LChar bc = b[i];
   2050         if (!bc)
   2051             return false;
   2052         UChar ac = as[i];
   2053         ored |= ac;
   2054         equal = equal && (toASCIILower(ac) == toASCIILower(bc));
   2055     }
   2056 
   2057     // Do a slower implementation for cases that include non-ASCII characters.
   2058     if (ored & ~0x7F) {
   2059         equal = true;
   2060         for (unsigned i = 0; i != length; ++i) {
   2061             equal = equal && (foldCase(as[i]) == foldCase(b[i]));
   2062         }
   2063     }
   2064 
   2065     return equal && !b[length];
   2066 }
   2067 
   2068 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b)
   2069 {
   2070     ASSERT(a && b);
   2071     if (a == b)
   2072         return true;
   2073 
   2074     unsigned length = a->length();
   2075     if (length != b->length())
   2076         return false;
   2077 
   2078     if (a->is8Bit()) {
   2079         if (b->is8Bit())
   2080             return equalIgnoringCase(a->characters8(), b->characters8(), length);
   2081 
   2082         return equalIgnoringCase(b->characters16(), a->characters8(), length);
   2083     }
   2084 
   2085     if (b->is8Bit())
   2086         return equalIgnoringCase(a->characters16(), b->characters8(), length);
   2087 
   2088     return equalIgnoringCase(a->characters16(), b->characters16(), length);
   2089 }
   2090 
   2091 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
   2092 {
   2093     if (!a && b && !b->length())
   2094         return true;
   2095     if (!b && a && !a->length())
   2096         return true;
   2097     return equal(a, b);
   2098 }
   2099 
   2100 size_t StringImpl::sizeInBytes() const
   2101 {
   2102     size_t size = length();
   2103     if (!is8Bit())
   2104         size *= 2;
   2105     return size + sizeof(*this);
   2106 }
   2107 
   2108 } // namespace WTF
   2109