Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  *           (C) 1999 Antti Koivisto (koivisto (at) kde.org)
      4  *           (C) 2001 Dirk Mueller ( mueller (at) kde.org )
      5  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
      6  * Copyright (C) 2006 Andrew Wellington (proton (at) wiretapped.net)
      7  *
      8  * This library is free software; you can redistribute it and/or
      9  * modify it under the terms of the GNU Library General Public
     10  * License as published by the Free Software Foundation; either
     11  * version 2 of the License, or (at your option) any later version.
     12  *
     13  * This library is distributed in the hope that it will be useful,
     14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16  * Library General Public License for more details.
     17  *
     18  * You should have received a copy of the GNU Library General Public License
     19  * along with this library; see the file COPYING.LIB.  If not, write to
     20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     21  * Boston, MA 02110-1301, USA.
     22  *
     23  */
     24 
     25 #include "config.h"
     26 #include "StringImpl.h"
     27 
     28 #include "AtomicString.h"
     29 #include "StringBuffer.h"
     30 #include "StringHash.h"
     31 #include <wtf/StdLibExtras.h>
     32 #include <wtf/WTFThreadData.h>
     33 
     34 using namespace std;
     35 
     36 namespace WTF {
     37 
     38 using namespace Unicode;
     39 
     40 static const unsigned minLengthToShare = 20;
     41 
     42 COMPILE_ASSERT(sizeof(StringImpl) == 2 * sizeof(int) + 3 * sizeof(void*), StringImpl_should_stay_small);
     43 
     44 StringImpl::~StringImpl()
     45 {
     46     ASSERT(!isStatic());
     47 
     48     if (isAtomic())
     49         AtomicString::remove(this);
     50 #if USE(JSC)
     51     if (isIdentifier()) {
     52         if (!wtfThreadData().currentIdentifierTable()->remove(this))
     53             CRASH();
     54     }
     55 #endif
     56 
     57     BufferOwnership ownership = bufferOwnership();
     58     if (ownership != BufferInternal) {
     59         if (ownership == BufferOwned) {
     60             ASSERT(!m_sharedBuffer);
     61             ASSERT(m_data);
     62             fastFree(const_cast<UChar*>(m_data));
     63         } else if (ownership == BufferSubstring) {
     64             ASSERT(m_substringBuffer);
     65             m_substringBuffer->deref();
     66         } else {
     67             ASSERT(ownership == BufferShared);
     68             ASSERT(m_sharedBuffer);
     69             m_sharedBuffer->deref();
     70         }
     71     }
     72 }
     73 
     74 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
     75 {
     76     if (!length) {
     77         data = 0;
     78         return empty();
     79     }
     80 
     81     // Allocate a single buffer large enough to contain the StringImpl
     82     // struct as well as the data which it contains. This removes one
     83     // heap allocation from this call.
     84     if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar)))
     85         CRASH();
     86     size_t size = sizeof(StringImpl) + length * sizeof(UChar);
     87     StringImpl* string = static_cast<StringImpl*>(fastMalloc(size));
     88 
     89     data = reinterpret_cast<UChar*>(string + 1);
     90     return adoptRef(new (string) StringImpl(length));
     91 }
     92 
     93 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
     94 {
     95     if (!characters || !length)
     96         return empty();
     97 
     98     UChar* data;
     99     RefPtr<StringImpl> string = createUninitialized(length, data);
    100     memcpy(data, characters, length * sizeof(UChar));
    101     return string.release();
    102 }
    103 
    104 PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length)
    105 {
    106     if (!characters || !length)
    107         return empty();
    108 
    109     UChar* data;
    110     RefPtr<StringImpl> string = createUninitialized(length, data);
    111     for (unsigned i = 0; i != length; ++i) {
    112         unsigned char c = characters[i];
    113         data[i] = c;
    114     }
    115     return string.release();
    116 }
    117 
    118 PassRefPtr<StringImpl> StringImpl::create(const char* string)
    119 {
    120     if (!string)
    121         return empty();
    122     size_t length = strlen(string);
    123     if (length > numeric_limits<unsigned>::max())
    124         CRASH();
    125     return create(string, length);
    126 }
    127 
    128 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
    129 {
    130     ASSERT(characters);
    131     ASSERT(minLengthToShare && length >= minLengthToShare);
    132     return adoptRef(new StringImpl(characters, length, sharedBuffer));
    133 }
    134 
    135 SharedUChar* StringImpl::sharedBuffer()
    136 {
    137     if (m_length < minLengthToShare)
    138         return 0;
    139     // All static strings are smaller that the minimim length to share.
    140     ASSERT(!isStatic());
    141 
    142     BufferOwnership ownership = bufferOwnership();
    143 
    144     if (ownership == BufferInternal)
    145         return 0;
    146     if (ownership == BufferSubstring)
    147         return m_substringBuffer->sharedBuffer();
    148     if (ownership == BufferOwned) {
    149         ASSERT(!m_sharedBuffer);
    150         m_sharedBuffer = SharedUChar::create(new SharableUChar(m_data)).leakRef();
    151         m_refCountAndFlags = (m_refCountAndFlags & ~s_refCountMaskBufferOwnership) | BufferShared;
    152     }
    153 
    154     ASSERT(bufferOwnership() == BufferShared);
    155     ASSERT(m_sharedBuffer);
    156     return m_sharedBuffer;
    157 }
    158 
    159 bool StringImpl::containsOnlyWhitespace()
    160 {
    161     // FIXME: The definition of whitespace here includes a number of characters
    162     // that are not whitespace from the point of view of RenderText; I wonder if
    163     // that's a problem in practice.
    164     for (unsigned i = 0; i < m_length; i++)
    165         if (!isASCIISpace(m_data[i]))
    166             return false;
    167     return true;
    168 }
    169 
    170 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
    171 {
    172     if (start >= m_length)
    173         return empty();
    174     unsigned maxLength = m_length - start;
    175     if (length >= maxLength) {
    176         if (!start)
    177             return this;
    178         length = maxLength;
    179     }
    180     return create(m_data + start, length);
    181 }
    182 
    183 UChar32 StringImpl::characterStartingAt(unsigned i)
    184 {
    185     if (U16_IS_SINGLE(m_data[i]))
    186         return m_data[i];
    187     if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1]))
    188         return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]);
    189     return 0;
    190 }
    191 
    192 PassRefPtr<StringImpl> StringImpl::lower()
    193 {
    194     // Note: This is a hot function in the Dromaeo benchmark, specifically the
    195     // no-op code path up through the first 'return' statement.
    196 
    197     // First scan the string for uppercase and non-ASCII characters:
    198     UChar ored = 0;
    199     bool noUpper = true;
    200     const UChar *end = m_data + m_length;
    201     for (const UChar* chp = m_data; chp != end; chp++) {
    202         if (UNLIKELY(isASCIIUpper(*chp)))
    203             noUpper = false;
    204         ored |= *chp;
    205     }
    206 
    207     // Nothing to do if the string is all ASCII with no uppercase.
    208     if (noUpper && !(ored & ~0x7F))
    209         return this;
    210 
    211     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
    212         CRASH();
    213     int32_t length = m_length;
    214 
    215     UChar* data;
    216     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    217 
    218     if (!(ored & ~0x7F)) {
    219         // Do a faster loop for the case where all the characters are ASCII.
    220         for (int i = 0; i < length; i++) {
    221             UChar c = m_data[i];
    222             data[i] = toASCIILower(c);
    223         }
    224         return newImpl;
    225     }
    226 
    227     // Do a slower implementation for cases that include non-ASCII characters.
    228     bool error;
    229     int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error);
    230     if (!error && realLength == length)
    231         return newImpl;
    232     newImpl = createUninitialized(realLength, data);
    233     Unicode::toLower(data, realLength, m_data, m_length, &error);
    234     if (error)
    235         return this;
    236     return newImpl;
    237 }
    238 
    239 PassRefPtr<StringImpl> StringImpl::upper()
    240 {
    241     // This function could be optimized for no-op cases the way lower() is,
    242     // but in empirical testing, few actual calls to upper() are no-ops, so
    243     // it wouldn't be worth the extra time for pre-scanning.
    244     UChar* data;
    245     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    246 
    247     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
    248         CRASH();
    249     int32_t length = m_length;
    250 
    251     // Do a faster loop for the case where all the characters are ASCII.
    252     UChar ored = 0;
    253     for (int i = 0; i < length; i++) {
    254         UChar c = m_data[i];
    255         ored |= c;
    256         data[i] = toASCIIUpper(c);
    257     }
    258     if (!(ored & ~0x7F))
    259         return newImpl.release();
    260 
    261     // Do a slower implementation for cases that include non-ASCII characters.
    262     bool error;
    263     int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error);
    264     if (!error && realLength == length)
    265         return newImpl;
    266     newImpl = createUninitialized(realLength, data);
    267     Unicode::toUpper(data, realLength, m_data, m_length, &error);
    268     if (error)
    269         return this;
    270     return newImpl.release();
    271 }
    272 
    273 PassRefPtr<StringImpl> StringImpl::secure(UChar character, LastCharacterBehavior behavior)
    274 {
    275     if (!m_length)
    276         return this;
    277 
    278     UChar* data;
    279     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    280     unsigned lastCharacterIndex = m_length - 1;
    281     for (unsigned i = 0; i < lastCharacterIndex; ++i)
    282         data[i] = character;
    283     data[lastCharacterIndex] = (behavior == ObscureLastCharacter) ? character : m_data[lastCharacterIndex];
    284     return newImpl.release();
    285 }
    286 
    287 PassRefPtr<StringImpl> StringImpl::foldCase()
    288 {
    289     UChar* data;
    290     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    291 
    292     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
    293         CRASH();
    294     int32_t length = m_length;
    295 
    296     // Do a faster loop for the case where all the characters are ASCII.
    297     UChar ored = 0;
    298     for (int32_t i = 0; i < length; i++) {
    299         UChar c = m_data[i];
    300         ored |= c;
    301         data[i] = toASCIILower(c);
    302     }
    303     if (!(ored & ~0x7F))
    304         return newImpl.release();
    305 
    306     // Do a slower implementation for cases that include non-ASCII characters.
    307     bool error;
    308     int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error);
    309     if (!error && realLength == length)
    310         return newImpl.release();
    311     newImpl = createUninitialized(realLength, data);
    312     Unicode::foldCase(data, realLength, m_data, m_length, &error);
    313     if (error)
    314         return this;
    315     return newImpl.release();
    316 }
    317 
    318 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
    319 {
    320     if (!m_length)
    321         return empty();
    322 
    323     unsigned start = 0;
    324     unsigned end = m_length - 1;
    325 
    326     // skip white space from start
    327     while (start <= end && isSpaceOrNewline(m_data[start]))
    328         start++;
    329 
    330     // only white space
    331     if (start > end)
    332         return empty();
    333 
    334     // skip white space from end
    335     while (end && isSpaceOrNewline(m_data[end]))
    336         end--;
    337 
    338     if (!start && end == m_length - 1)
    339         return this;
    340     return create(m_data + start, end + 1 - start);
    341 }
    342 
    343 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
    344 {
    345     const UChar* from = m_data;
    346     const UChar* fromend = from + m_length;
    347 
    348     // Assume the common case will not remove any characters
    349     while (from != fromend && !findMatch(*from))
    350         from++;
    351     if (from == fromend)
    352         return this;
    353 
    354     StringBuffer data(m_length);
    355     UChar* to = data.characters();
    356     unsigned outc = from - m_data;
    357 
    358     if (outc)
    359         memcpy(to, m_data, outc * sizeof(UChar));
    360 
    361     while (true) {
    362         while (from != fromend && findMatch(*from))
    363             from++;
    364         while (from != fromend && !findMatch(*from))
    365             to[outc++] = *from++;
    366         if (from == fromend)
    367             break;
    368     }
    369 
    370     data.shrink(outc);
    371 
    372     return adopt(data);
    373 }
    374 
    375 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace()
    376 {
    377     StringBuffer data(m_length);
    378 
    379     const UChar* from = m_data;
    380     const UChar* fromend = from + m_length;
    381     int outc = 0;
    382     bool changedToSpace = false;
    383 
    384     UChar* to = data.characters();
    385 
    386     while (true) {
    387         while (from != fromend && isSpaceOrNewline(*from)) {
    388             if (*from != ' ')
    389                 changedToSpace = true;
    390             from++;
    391         }
    392         while (from != fromend && !isSpaceOrNewline(*from))
    393             to[outc++] = *from++;
    394         if (from != fromend)
    395             to[outc++] = ' ';
    396         else
    397             break;
    398     }
    399 
    400     if (outc > 0 && to[outc - 1] == ' ')
    401         outc--;
    402 
    403     if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
    404         return this;
    405 
    406     data.shrink(outc);
    407 
    408     return adopt(data);
    409 }
    410 
    411 int StringImpl::toIntStrict(bool* ok, int base)
    412 {
    413     return charactersToIntStrict(m_data, m_length, ok, base);
    414 }
    415 
    416 unsigned StringImpl::toUIntStrict(bool* ok, int base)
    417 {
    418     return charactersToUIntStrict(m_data, m_length, ok, base);
    419 }
    420 
    421 int64_t StringImpl::toInt64Strict(bool* ok, int base)
    422 {
    423     return charactersToInt64Strict(m_data, m_length, ok, base);
    424 }
    425 
    426 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
    427 {
    428     return charactersToUInt64Strict(m_data, m_length, ok, base);
    429 }
    430 
    431 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
    432 {
    433     return charactersToIntPtrStrict(m_data, m_length, ok, base);
    434 }
    435 
    436 int StringImpl::toInt(bool* ok)
    437 {
    438     return charactersToInt(m_data, m_length, ok);
    439 }
    440 
    441 unsigned StringImpl::toUInt(bool* ok)
    442 {
    443     return charactersToUInt(m_data, m_length, ok);
    444 }
    445 
    446 int64_t StringImpl::toInt64(bool* ok)
    447 {
    448     return charactersToInt64(m_data, m_length, ok);
    449 }
    450 
    451 uint64_t StringImpl::toUInt64(bool* ok)
    452 {
    453     return charactersToUInt64(m_data, m_length, ok);
    454 }
    455 
    456 intptr_t StringImpl::toIntPtr(bool* ok)
    457 {
    458     return charactersToIntPtr(m_data, m_length, ok);
    459 }
    460 
    461 double StringImpl::toDouble(bool* ok, bool* didReadNumber)
    462 {
    463     return charactersToDouble(m_data, m_length, ok, didReadNumber);
    464 }
    465 
    466 float StringImpl::toFloat(bool* ok, bool* didReadNumber)
    467 {
    468     return charactersToFloat(m_data, m_length, ok, didReadNumber);
    469 }
    470 
    471 static bool equal(const UChar* a, const char* b, int length)
    472 {
    473     ASSERT(length >= 0);
    474     while (length--) {
    475         unsigned char bc = *b++;
    476         if (*a++ != bc)
    477             return false;
    478     }
    479     return true;
    480 }
    481 
    482 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length)
    483 {
    484     while (length--) {
    485         unsigned char bc = *b++;
    486         if (foldCase(*a++) != foldCase(bc))
    487             return false;
    488     }
    489     return true;
    490 }
    491 
    492 static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length)
    493 {
    494     ASSERT(length >= 0);
    495     return umemcasecmp(a, b, length) == 0;
    496 }
    497 
    498 int codePointCompare(const StringImpl* s1, const StringImpl* s2)
    499 {
    500     const unsigned l1 = s1 ? s1->length() : 0;
    501     const unsigned l2 = s2 ? s2->length() : 0;
    502     const unsigned lmin = l1 < l2 ? l1 : l2;
    503     const UChar* c1 = s1 ? s1->characters() : 0;
    504     const UChar* c2 = s2 ? s2->characters() : 0;
    505     unsigned pos = 0;
    506     while (pos < lmin && *c1 == *c2) {
    507         c1++;
    508         c2++;
    509         pos++;
    510     }
    511 
    512     if (pos < lmin)
    513         return (c1[0] > c2[0]) ? 1 : -1;
    514 
    515     if (l1 == l2)
    516         return 0;
    517 
    518     return (l1 > l2) ? 1 : -1;
    519 }
    520 
    521 size_t StringImpl::find(UChar c, unsigned start)
    522 {
    523     return WTF::find(m_data, m_length, c, start);
    524 }
    525 
    526 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
    527 {
    528     return WTF::find(m_data, m_length, matchFunction, start);
    529 }
    530 
    531 size_t StringImpl::find(const char* matchString, unsigned index)
    532 {
    533     // Check for null or empty string to match against
    534     if (!matchString)
    535         return notFound;
    536     size_t matchStringLength = strlen(matchString);
    537     if (matchStringLength > numeric_limits<unsigned>::max())
    538         CRASH();
    539     unsigned matchLength = matchStringLength;
    540     if (!matchLength)
    541         return min(index, length());
    542 
    543     // Optimization 1: fast case for strings of length 1.
    544     if (matchLength == 1)
    545         return WTF::find(characters(), length(), *(const unsigned char*)matchString, index);
    546 
    547     // Check index & matchLength are in range.
    548     if (index > length())
    549         return notFound;
    550     unsigned searchLength = length() - index;
    551     if (matchLength > searchLength)
    552         return notFound;
    553     // delta is the number of additional times to test; delta == 0 means test only once.
    554     unsigned delta = searchLength - matchLength;
    555 
    556     const UChar* searchCharacters = characters() + index;
    557     const unsigned char* matchCharacters = (const unsigned char*)matchString;
    558 
    559     // Optimization 2: keep a running hash of the strings,
    560     // only call memcmp if the hashes match.
    561     unsigned searchHash = 0;
    562     unsigned matchHash = 0;
    563     for (unsigned i = 0; i < matchLength; ++i) {
    564         searchHash += searchCharacters[i];
    565         matchHash += matchCharacters[i];
    566     }
    567 
    568     unsigned i = 0;
    569     // keep looping until we match
    570     while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
    571         if (i == delta)
    572             return notFound;
    573         searchHash += searchCharacters[i + matchLength];
    574         searchHash -= searchCharacters[i];
    575         ++i;
    576     }
    577     return index + i;
    578 }
    579 
    580 size_t StringImpl::findIgnoringCase(const char* matchString, unsigned index)
    581 {
    582     // Check for null or empty string to match against
    583     if (!matchString)
    584         return notFound;
    585     size_t matchStringLength = strlen(matchString);
    586     if (matchStringLength > numeric_limits<unsigned>::max())
    587         CRASH();
    588     unsigned matchLength = matchStringLength;
    589     if (!matchLength)
    590         return min(index, length());
    591 
    592     // Check index & matchLength are in range.
    593     if (index > length())
    594         return notFound;
    595     unsigned searchLength = length() - index;
    596     if (matchLength > searchLength)
    597         return notFound;
    598     // delta is the number of additional times to test; delta == 0 means test only once.
    599     unsigned delta = searchLength - matchLength;
    600 
    601     const UChar* searchCharacters = characters() + index;
    602 
    603     unsigned i = 0;
    604     // keep looping until we match
    605     while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
    606         if (i == delta)
    607             return notFound;
    608         ++i;
    609     }
    610     return index + i;
    611 }
    612 
    613 size_t StringImpl::find(StringImpl* matchString, unsigned index)
    614 {
    615     // Check for null or empty string to match against
    616     if (!matchString)
    617         return notFound;
    618     unsigned matchLength = matchString->length();
    619     if (!matchLength)
    620         return min(index, length());
    621 
    622     // Optimization 1: fast case for strings of length 1.
    623     if (matchLength == 1)
    624         return WTF::find(characters(), length(), matchString->characters()[0], index);
    625 
    626     // Check index & matchLength are in range.
    627     if (index > length())
    628         return notFound;
    629     unsigned searchLength = length() - index;
    630     if (matchLength > searchLength)
    631         return notFound;
    632     // delta is the number of additional times to test; delta == 0 means test only once.
    633     unsigned delta = searchLength - matchLength;
    634 
    635     const UChar* searchCharacters = characters() + index;
    636     const UChar* matchCharacters = matchString->characters();
    637 
    638     // Optimization 2: keep a running hash of the strings,
    639     // only call memcmp if the hashes match.
    640     unsigned searchHash = 0;
    641     unsigned matchHash = 0;
    642     for (unsigned i = 0; i < matchLength; ++i) {
    643         searchHash += searchCharacters[i];
    644         matchHash += matchCharacters[i];
    645     }
    646 
    647     unsigned i = 0;
    648     // keep looping until we match
    649     while (searchHash != matchHash || memcmp(searchCharacters + i, matchCharacters, matchLength * sizeof(UChar))) {
    650         if (i == delta)
    651             return notFound;
    652         searchHash += searchCharacters[i + matchLength];
    653         searchHash -= searchCharacters[i];
    654         ++i;
    655     }
    656     return index + i;
    657 }
    658 
    659 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
    660 {
    661     // Check for null or empty string to match against
    662     if (!matchString)
    663         return notFound;
    664     unsigned matchLength = matchString->length();
    665     if (!matchLength)
    666         return min(index, length());
    667 
    668     // Check index & matchLength are in range.
    669     if (index > length())
    670         return notFound;
    671     unsigned searchLength = length() - index;
    672     if (matchLength > searchLength)
    673         return notFound;
    674     // delta is the number of additional times to test; delta == 0 means test only once.
    675     unsigned delta = searchLength - matchLength;
    676 
    677     const UChar* searchCharacters = characters() + index;
    678     const UChar* matchCharacters = matchString->characters();
    679 
    680     unsigned i = 0;
    681     // keep looping until we match
    682     while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
    683         if (i == delta)
    684             return notFound;
    685         ++i;
    686     }
    687     return index + i;
    688 }
    689 
    690 size_t StringImpl::reverseFind(UChar c, unsigned index)
    691 {
    692     return WTF::reverseFind(m_data, m_length, c, index);
    693 }
    694 
    695 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
    696 {
    697     // Check for null or empty string to match against
    698     if (!matchString)
    699         return notFound;
    700     unsigned matchLength = matchString->length();
    701     if (!matchLength)
    702         return min(index, length());
    703 
    704     // Optimization 1: fast case for strings of length 1.
    705     if (matchLength == 1)
    706         return WTF::reverseFind(characters(), length(), matchString->characters()[0], index);
    707 
    708     // Check index & matchLength are in range.
    709     if (matchLength > length())
    710         return notFound;
    711     // delta is the number of additional times to test; delta == 0 means test only once.
    712     unsigned delta = min(index, length() - matchLength);
    713 
    714     const UChar *searchCharacters = characters();
    715     const UChar *matchCharacters = matchString->characters();
    716 
    717     // Optimization 2: keep a running hash of the strings,
    718     // only call memcmp if the hashes match.
    719     unsigned searchHash = 0;
    720     unsigned matchHash = 0;
    721     for (unsigned i = 0; i < matchLength; ++i) {
    722         searchHash += searchCharacters[delta + i];
    723         matchHash += matchCharacters[i];
    724     }
    725 
    726     // keep looping until we match
    727     while (searchHash != matchHash || memcmp(searchCharacters + delta, matchCharacters, matchLength * sizeof(UChar))) {
    728         if (!delta)
    729             return notFound;
    730         delta--;
    731         searchHash -= searchCharacters[delta + matchLength];
    732         searchHash += searchCharacters[delta];
    733     }
    734     return delta;
    735 }
    736 
    737 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
    738 {
    739     // Check for null or empty string to match against
    740     if (!matchString)
    741         return notFound;
    742     unsigned matchLength = matchString->length();
    743     if (!matchLength)
    744         return min(index, length());
    745 
    746     // Check index & matchLength are in range.
    747     if (matchLength > length())
    748         return notFound;
    749     // delta is the number of additional times to test; delta == 0 means test only once.
    750     unsigned delta = min(index, length() - matchLength);
    751 
    752     const UChar *searchCharacters = characters();
    753     const UChar *matchCharacters = matchString->characters();
    754 
    755     // keep looping until we match
    756     while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
    757         if (!delta)
    758             return notFound;
    759         delta--;
    760     }
    761     return delta;
    762 }
    763 
    764 bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive)
    765 {
    766     ASSERT(m_data);
    767     if (m_length >= m_data->m_length) {
    768         unsigned start = m_length - m_data->m_length;
    769         return (caseSensitive ? find(m_data, start) : findIgnoringCase(m_data, start)) == start;
    770     }
    771     return false;
    772 }
    773 
    774 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
    775 {
    776     if (oldC == newC)
    777         return this;
    778     unsigned i;
    779     for (i = 0; i != m_length; ++i)
    780         if (m_data[i] == oldC)
    781             break;
    782     if (i == m_length)
    783         return this;
    784 
    785     UChar* data;
    786     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    787 
    788     for (i = 0; i != m_length; ++i) {
    789         UChar ch = m_data[i];
    790         if (ch == oldC)
    791             ch = newC;
    792         data[i] = ch;
    793     }
    794     return newImpl.release();
    795 }
    796 
    797 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
    798 {
    799     position = min(position, length());
    800     lengthToReplace = min(lengthToReplace, length() - position);
    801     unsigned lengthToInsert = str ? str->length() : 0;
    802     if (!lengthToReplace && !lengthToInsert)
    803         return this;
    804     UChar* data;
    805 
    806     if ((length() - lengthToReplace) >= (numeric_limits<unsigned>::max() - lengthToInsert))
    807         CRASH();
    808 
    809     RefPtr<StringImpl> newImpl =
    810         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
    811     memcpy(data, characters(), position * sizeof(UChar));
    812     if (str)
    813         memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar));
    814     memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace,
    815         (length() - position - lengthToReplace) * sizeof(UChar));
    816     return newImpl.release();
    817 }
    818 
    819 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
    820 {
    821     if (!replacement)
    822         return this;
    823 
    824     unsigned repStrLength = replacement->length();
    825     size_t srcSegmentStart = 0;
    826     unsigned matchCount = 0;
    827 
    828     // Count the matches
    829     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
    830         ++matchCount;
    831         ++srcSegmentStart;
    832     }
    833 
    834     // If we have 0 matches, we don't have to do any more work
    835     if (!matchCount)
    836         return this;
    837 
    838     if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength)
    839         CRASH();
    840 
    841     unsigned replaceSize = matchCount * repStrLength;
    842     unsigned newSize = m_length - matchCount;
    843     if (newSize >= (numeric_limits<unsigned>::max() - replaceSize))
    844         CRASH();
    845 
    846     newSize += replaceSize;
    847 
    848     UChar* data;
    849     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
    850 
    851     // Construct the new data
    852     size_t srcSegmentEnd;
    853     unsigned srcSegmentLength;
    854     srcSegmentStart = 0;
    855     unsigned dstOffset = 0;
    856 
    857     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
    858         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
    859         memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
    860         dstOffset += srcSegmentLength;
    861         memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
    862         dstOffset += repStrLength;
    863         srcSegmentStart = srcSegmentEnd + 1;
    864     }
    865 
    866     srcSegmentLength = m_length - srcSegmentStart;
    867     memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
    868 
    869     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
    870 
    871     return newImpl.release();
    872 }
    873 
    874 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
    875 {
    876     if (!pattern || !replacement)
    877         return this;
    878 
    879     unsigned patternLength = pattern->length();
    880     if (!patternLength)
    881         return this;
    882 
    883     unsigned repStrLength = replacement->length();
    884     size_t srcSegmentStart = 0;
    885     unsigned matchCount = 0;
    886 
    887     // Count the matches
    888     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
    889         ++matchCount;
    890         srcSegmentStart += patternLength;
    891     }
    892 
    893     // If we have 0 matches, we don't have to do any more work
    894     if (!matchCount)
    895         return this;
    896 
    897     unsigned newSize = m_length - matchCount * patternLength;
    898     if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength)
    899         CRASH();
    900 
    901     if (newSize > (numeric_limits<unsigned>::max() - matchCount * repStrLength))
    902         CRASH();
    903 
    904     newSize += matchCount * repStrLength;
    905 
    906     UChar* data;
    907     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
    908 
    909     // Construct the new data
    910     size_t srcSegmentEnd;
    911     unsigned srcSegmentLength;
    912     srcSegmentStart = 0;
    913     unsigned dstOffset = 0;
    914 
    915     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
    916         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
    917         memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
    918         dstOffset += srcSegmentLength;
    919         memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
    920         dstOffset += repStrLength;
    921         srcSegmentStart = srcSegmentEnd + patternLength;
    922     }
    923 
    924     srcSegmentLength = m_length - srcSegmentStart;
    925     memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
    926 
    927     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
    928 
    929     return newImpl.release();
    930 }
    931 
    932 bool equal(const StringImpl* a, const StringImpl* b)
    933 {
    934     return StringHash::equal(a, b);
    935 }
    936 
    937 bool equal(const StringImpl* a, const char* b)
    938 {
    939     if (!a)
    940         return !b;
    941     if (!b)
    942         return !a;
    943 
    944     unsigned length = a->length();
    945     const UChar* as = a->characters();
    946     for (unsigned i = 0; i != length; ++i) {
    947         unsigned char bc = b[i];
    948         if (!bc)
    949             return false;
    950         if (as[i] != bc)
    951             return false;
    952     }
    953 
    954     return !b[length];
    955 }
    956 
    957 bool equalIgnoringCase(StringImpl* a, StringImpl* b)
    958 {
    959     return CaseFoldingHash::equal(a, b);
    960 }
    961 
    962 bool equalIgnoringCase(StringImpl* a, const char* b)
    963 {
    964     if (!a)
    965         return !b;
    966     if (!b)
    967         return !a;
    968 
    969     unsigned length = a->length();
    970     const UChar* as = a->characters();
    971 
    972     // Do a faster loop for the case where all the characters are ASCII.
    973     UChar ored = 0;
    974     bool equal = true;
    975     for (unsigned i = 0; i != length; ++i) {
    976         char bc = b[i];
    977         if (!bc)
    978             return false;
    979         UChar ac = as[i];
    980         ored |= ac;
    981         equal = equal && (toASCIILower(ac) == toASCIILower(bc));
    982     }
    983 
    984     // Do a slower implementation for cases that include non-ASCII characters.
    985     if (ored & ~0x7F) {
    986         equal = true;
    987         for (unsigned i = 0; i != length; ++i) {
    988             unsigned char bc = b[i];
    989             equal = equal && (foldCase(as[i]) == foldCase(bc));
    990         }
    991     }
    992 
    993     return equal && !b[length];
    994 }
    995 
    996 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
    997 {
    998     if (StringHash::equal(a, b))
    999         return true;
   1000     if (!a && b && !b->length())
   1001         return true;
   1002     if (!b && a && !a->length())
   1003         return true;
   1004 
   1005     return false;
   1006 }
   1007 
   1008 WTF::Unicode::Direction StringImpl::defaultWritingDirection(bool* hasStrongDirectionality)
   1009 {
   1010     for (unsigned i = 0; i < m_length; ++i) {
   1011         WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]);
   1012         if (charDirection == WTF::Unicode::LeftToRight) {
   1013             if (hasStrongDirectionality)
   1014                 *hasStrongDirectionality = true;
   1015             return WTF::Unicode::LeftToRight;
   1016         }
   1017         if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) {
   1018             if (hasStrongDirectionality)
   1019                 *hasStrongDirectionality = true;
   1020             return WTF::Unicode::RightToLeft;
   1021         }
   1022     }
   1023     if (hasStrongDirectionality)
   1024         *hasStrongDirectionality = false;
   1025     return WTF::Unicode::LeftToRight;
   1026 }
   1027 
   1028 // This is a hot function because it's used when parsing HTML.
   1029 PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length)
   1030 {
   1031     StringBuffer strippedCopy(length);
   1032     unsigned strippedLength = 0;
   1033     for (unsigned i = 0; i < length; i++) {
   1034         if (int c = characters[i])
   1035             strippedCopy[strippedLength++] = c;
   1036     }
   1037     ASSERT(strippedLength < length);  // Only take the slow case when stripping.
   1038     strippedCopy.shrink(strippedLength);
   1039     return adopt(strippedCopy);
   1040 }
   1041 
   1042 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer)
   1043 {
   1044     unsigned length = buffer.length();
   1045     if (length == 0)
   1046         return empty();
   1047     return adoptRef(new StringImpl(buffer.release(), length));
   1048 }
   1049 
   1050 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string)
   1051 {
   1052     // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer
   1053     // get allocated in a single memory block.
   1054     UChar* data;
   1055     unsigned length = string.m_length;
   1056     if (length >= numeric_limits<unsigned>::max())
   1057         CRASH();
   1058     RefPtr<StringImpl> terminatedString = createUninitialized(length + 1, data);
   1059     memcpy(data, string.m_data, length * sizeof(UChar));
   1060     data[length] = 0;
   1061     terminatedString->m_length--;
   1062     terminatedString->m_hash = string.m_hash;
   1063     terminatedString->m_refCountAndFlags |= s_refCountFlagHasTerminatingNullCharacter;
   1064     return terminatedString.release();
   1065 }
   1066 
   1067 PassRefPtr<StringImpl> StringImpl::threadsafeCopy() const
   1068 {
   1069     return create(m_data, m_length);
   1070 }
   1071 
   1072 PassRefPtr<StringImpl> StringImpl::crossThreadString()
   1073 {
   1074     if (SharedUChar* sharedBuffer = this->sharedBuffer())
   1075         return adoptRef(new StringImpl(m_data, m_length, sharedBuffer->crossThreadCopy()));
   1076 
   1077     // If no shared buffer is available, create a copy.
   1078     return threadsafeCopy();
   1079 }
   1080 
   1081 } // namespace WTF
   1082