Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  *           (C) 1999 Antti Koivisto (koivisto (at) kde.org)
      4  *           (C) 2001 Dirk Mueller ( mueller (at) kde.org )
      5  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
      6  * Copyright (C) 2006 Andrew Wellington (proton (at) wiretapped.net)
      7  *
      8  * This library is free software; you can redistribute it and/or
      9  * modify it under the terms of the GNU Library General Public
     10  * License as published by the Free Software Foundation; either
     11  * version 2 of the License, or (at your option) any later version.
     12  *
     13  * This library is distributed in the hope that it will be useful,
     14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     16  * Library General Public License for more details.
     17  *
     18  * You should have received a copy of the GNU Library General Public License
     19  * along with this library; see the file COPYING.LIB.  If not, write to
     20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     21  * Boston, MA 02110-1301, USA.
     22  *
     23  */
     24 
     25 #include "config.h"
     26 #include "StringImpl.h"
     27 
     28 #include "AtomicString.h"
     29 #include "CString.h"
     30 #include "CharacterNames.h"
     31 #include "FloatConversion.h"
     32 #include "StringBuffer.h"
     33 #include "StringHash.h"
     34 #include "TextBreakIterator.h"
     35 #include "TextEncoding.h"
     36 #include "ThreadGlobalData.h"
     37 #include <runtime/UString.h>
     38 #include <wtf/dtoa.h>
     39 #include <wtf/Assertions.h>
     40 #include <wtf/Threading.h>
     41 #include <wtf/unicode/Unicode.h>
     42 
     43 using namespace WTF;
     44 using namespace Unicode;
     45 
     46 namespace WebCore {
     47 
     48 static const unsigned minLengthToShare = 20;
     49 
     50 static inline UChar* newUCharVector(unsigned n)
     51 {
     52     return static_cast<UChar*>(fastMalloc(sizeof(UChar) * n));
     53 }
     54 
     55 static inline void deleteUCharVector(const UChar* p)
     56 {
     57     fastFree(const_cast<UChar*>(p));
     58 }
     59 
     60 // Some of the factory methods create buffers using fastMalloc.
     61 // We must ensure that all allocations of StringImpl are allocated using
     62 // fastMalloc so that we don't have mis-matched frees. We accomplish
     63 // this by overriding the new and delete operators.
     64 void* StringImpl::operator new(size_t size, void* address)
     65 {
     66     if (address)
     67         return address;  // Allocating using an internal buffer
     68     return fastMalloc(size);
     69 }
     70 
     71 void* StringImpl::operator new(size_t size)
     72 {
     73     return fastMalloc(size);
     74 }
     75 
     76 void StringImpl::operator delete(void* address)
     77 {
     78     fastFree(address);
     79 }
     80 
     81 // This constructor is used only to create the empty string.
     82 StringImpl::StringImpl()
     83     : m_data(0)
     84     , m_length(0)
     85     , m_hash(0)
     86 {
     87     // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
     88     // with impunity. The empty string is special because it is never entered into
     89     // AtomicString's HashKey, but still needs to compare correctly.
     90     hash();
     91 }
     92 
     93 inline StringImpl::StringImpl(const UChar* characters, unsigned length)
     94     : m_data(characters)
     95     , m_length(length)
     96     , m_hash(0)
     97 {
     98     ASSERT(characters);
     99     ASSERT(length);
    100     ASSERT(!bufferIsInternal());
    101 }
    102 
    103 inline StringImpl::StringImpl(unsigned length)
    104     : m_data(reinterpret_cast<const UChar*>(this + 1))
    105     , m_length(length)
    106     , m_hash(0)
    107 {
    108     ASSERT(length);
    109     ASSERT(bufferIsInternal());
    110 }
    111 
    112 StringImpl::~StringImpl()
    113 {
    114     if (inTable())
    115         AtomicString::remove(this);
    116     if (!bufferIsInternal()) {
    117         SharedUChar* sharedBuffer = m_sharedBufferAndFlags.get();
    118         if (sharedBuffer)
    119             sharedBuffer->deref();
    120         else
    121             deleteUCharVector(m_data);
    122     }
    123 }
    124 
    125 StringImpl* StringImpl::empty()
    126 {
    127     return threadGlobalData().emptyString();
    128 }
    129 
    130 bool StringImpl::containsOnlyWhitespace()
    131 {
    132     // FIXME: The definition of whitespace here includes a number of characters
    133     // that are not whitespace from the point of view of RenderText; I wonder if
    134     // that's a problem in practice.
    135     for (unsigned i = 0; i < m_length; i++)
    136         if (!isASCIISpace(m_data[i]))
    137             return false;
    138     return true;
    139 }
    140 
    141 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
    142 {
    143     if (start >= m_length)
    144         return empty();
    145     unsigned maxLength = m_length - start;
    146     if (length >= maxLength) {
    147         if (!start)
    148             return this;
    149         length = maxLength;
    150     }
    151     return create(m_data + start, length);
    152 }
    153 
    154 UChar32 StringImpl::characterStartingAt(unsigned i)
    155 {
    156     if (U16_IS_SINGLE(m_data[i]))
    157         return m_data[i];
    158     if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1]))
    159         return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]);
    160     return 0;
    161 }
    162 
    163 PassRefPtr<StringImpl> StringImpl::lower()
    164 {
    165     // Note: This is a hot function in the Dromaeo benchmark, specifically the
    166     // no-op code path up through the first 'return' statement.
    167 
    168     // First scan the string for uppercase and non-ASCII characters:
    169     UChar ored = 0;
    170     bool noUpper = true;
    171     const UChar *end = m_data + m_length;
    172     for (const UChar* chp = m_data; chp != end; chp++) {
    173         if (UNLIKELY(isASCIIUpper(*chp)))
    174             noUpper = false;
    175         ored |= *chp;
    176     }
    177 
    178     // Nothing to do if the string is all ASCII with no uppercase.
    179     if (noUpper && !(ored & ~0x7F))
    180         return this;
    181 
    182     int32_t length = m_length;
    183     UChar* data;
    184     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    185 
    186     if (!(ored & ~0x7F)) {
    187         // Do a faster loop for the case where all the characters are ASCII.
    188         for (int i = 0; i < length; i++) {
    189             UChar c = m_data[i];
    190             data[i] = toASCIILower(c);
    191         }
    192         return newImpl;
    193     }
    194 
    195     // Do a slower implementation for cases that include non-ASCII characters.
    196     bool error;
    197     int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error);
    198     if (!error && realLength == length)
    199         return newImpl;
    200     newImpl = createUninitialized(realLength, data);
    201     Unicode::toLower(data, realLength, m_data, m_length, &error);
    202     if (error)
    203         return this;
    204     return newImpl;
    205 }
    206 
    207 PassRefPtr<StringImpl> StringImpl::upper()
    208 {
    209     // This function could be optimized for no-op cases the way lower() is,
    210     // but in empirical testing, few actual calls to upper() are no-ops, so
    211     // it wouldn't be worth the extra time for pre-scanning.
    212     UChar* data;
    213     PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    214     int32_t length = m_length;
    215 
    216     // Do a faster loop for the case where all the characters are ASCII.
    217     UChar ored = 0;
    218     for (int i = 0; i < length; i++) {
    219         UChar c = m_data[i];
    220         ored |= c;
    221         data[i] = toASCIIUpper(c);
    222     }
    223     if (!(ored & ~0x7F))
    224         return newImpl;
    225 
    226     // Do a slower implementation for cases that include non-ASCII characters.
    227     bool error;
    228     int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error);
    229     if (!error && realLength == length)
    230         return newImpl;
    231     newImpl = createUninitialized(realLength, data);
    232     Unicode::toUpper(data, realLength, m_data, m_length, &error);
    233     if (error)
    234         return this;
    235     return newImpl;
    236 }
    237 
    238 PassRefPtr<StringImpl> StringImpl::secure(UChar aChar)
    239 {
    240     UChar* data;
    241     PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    242     int32_t length = m_length;
    243     for (int i = 0; i < length; ++i)
    244         data[i] = aChar;
    245     return newImpl;
    246 }
    247 
    248 PassRefPtr<StringImpl> StringImpl::foldCase()
    249 {
    250     UChar* data;
    251     PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    252     int32_t length = m_length;
    253 
    254     // Do a faster loop for the case where all the characters are ASCII.
    255     UChar ored = 0;
    256     for (int i = 0; i < length; i++) {
    257         UChar c = m_data[i];
    258         ored |= c;
    259         data[i] = toASCIILower(c);
    260     }
    261     if (!(ored & ~0x7F))
    262         return newImpl;
    263 
    264     // Do a slower implementation for cases that include non-ASCII characters.
    265     bool error;
    266     int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error);
    267     if (!error && realLength == length)
    268         return newImpl;
    269     newImpl = createUninitialized(realLength, data);
    270     Unicode::foldCase(data, realLength, m_data, m_length, &error);
    271     if (error)
    272         return this;
    273     return newImpl;
    274 }
    275 
    276 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
    277 {
    278     if (!m_length)
    279         return empty();
    280 
    281     unsigned start = 0;
    282     unsigned end = m_length - 1;
    283 
    284     // skip white space from start
    285     while (start <= end && isSpaceOrNewline(m_data[start]))
    286         start++;
    287 
    288     // only white space
    289     if (start > end)
    290         return empty();
    291 
    292     // skip white space from end
    293     while (end && isSpaceOrNewline(m_data[end]))
    294         end--;
    295 
    296     if (!start && end == m_length - 1)
    297         return this;
    298     return create(m_data + start, end + 1 - start);
    299 }
    300 
    301 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
    302 {
    303     const UChar* from = m_data;
    304     const UChar* fromend = from + m_length;
    305 
    306     // Assume the common case will not remove any characters
    307     while (from != fromend && !findMatch(*from))
    308         from++;
    309     if (from == fromend)
    310         return this;
    311 
    312     StringBuffer data(m_length);
    313     UChar* to = data.characters();
    314     unsigned outc = from - m_data;
    315 
    316     if (outc)
    317         memcpy(to, m_data, outc * sizeof(UChar));
    318 
    319     while (true) {
    320         while (from != fromend && findMatch(*from))
    321             from++;
    322         while (from != fromend && !findMatch(*from))
    323             to[outc++] = *from++;
    324         if (from == fromend)
    325             break;
    326     }
    327 
    328     data.shrink(outc);
    329 
    330     return adopt(data);
    331 }
    332 
    333 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace()
    334 {
    335     StringBuffer data(m_length);
    336 
    337     const UChar* from = m_data;
    338     const UChar* fromend = from + m_length;
    339     int outc = 0;
    340     bool changedToSpace = false;
    341 
    342     UChar* to = data.characters();
    343 
    344     while (true) {
    345         while (from != fromend && isSpaceOrNewline(*from)) {
    346             if (*from != ' ')
    347                 changedToSpace = true;
    348             from++;
    349         }
    350         while (from != fromend && !isSpaceOrNewline(*from))
    351             to[outc++] = *from++;
    352         if (from != fromend)
    353             to[outc++] = ' ';
    354         else
    355             break;
    356     }
    357 
    358     if (outc > 0 && to[outc - 1] == ' ')
    359         outc--;
    360 
    361     if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
    362         return this;
    363 
    364     data.shrink(outc);
    365 
    366     return adopt(data);
    367 }
    368 
    369 PassRefPtr<StringImpl> StringImpl::capitalize(UChar previous)
    370 {
    371     StringBuffer stringWithPrevious(m_length + 1);
    372     stringWithPrevious[0] = previous == noBreakSpace ? ' ' : previous;
    373     for (unsigned i = 1; i < m_length + 1; i++) {
    374         // Replace &nbsp with a real space since ICU no longer treats &nbsp as a word separator.
    375         if (m_data[i - 1] == noBreakSpace)
    376             stringWithPrevious[i] = ' ';
    377         else
    378             stringWithPrevious[i] = m_data[i - 1];
    379     }
    380 
    381     TextBreakIterator* boundary = wordBreakIterator(stringWithPrevious.characters(), m_length + 1);
    382     if (!boundary)
    383         return this;
    384 
    385     StringBuffer data(m_length);
    386 
    387     int32_t endOfWord;
    388     int32_t startOfWord = textBreakFirst(boundary);
    389     for (endOfWord = textBreakNext(boundary); endOfWord != TextBreakDone; startOfWord = endOfWord, endOfWord = textBreakNext(boundary)) {
    390         if (startOfWord != 0) // Ignore first char of previous string
    391             data[startOfWord - 1] = m_data[startOfWord - 1] == noBreakSpace ? noBreakSpace : toTitleCase(stringWithPrevious[startOfWord]);
    392         for (int i = startOfWord + 1; i < endOfWord; i++)
    393             data[i - 1] = m_data[i - 1];
    394     }
    395 
    396     return adopt(data);
    397 }
    398 
    399 int StringImpl::toIntStrict(bool* ok, int base)
    400 {
    401     return charactersToIntStrict(m_data, m_length, ok, base);
    402 }
    403 
    404 unsigned StringImpl::toUIntStrict(bool* ok, int base)
    405 {
    406     return charactersToUIntStrict(m_data, m_length, ok, base);
    407 }
    408 
    409 int64_t StringImpl::toInt64Strict(bool* ok, int base)
    410 {
    411     return charactersToInt64Strict(m_data, m_length, ok, base);
    412 }
    413 
    414 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
    415 {
    416     return charactersToUInt64Strict(m_data, m_length, ok, base);
    417 }
    418 
    419 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
    420 {
    421     return charactersToIntPtrStrict(m_data, m_length, ok, base);
    422 }
    423 
    424 int StringImpl::toInt(bool* ok)
    425 {
    426     return charactersToInt(m_data, m_length, ok);
    427 }
    428 
    429 unsigned StringImpl::toUInt(bool* ok)
    430 {
    431     return charactersToUInt(m_data, m_length, ok);
    432 }
    433 
    434 int64_t StringImpl::toInt64(bool* ok)
    435 {
    436     return charactersToInt64(m_data, m_length, ok);
    437 }
    438 
    439 uint64_t StringImpl::toUInt64(bool* ok)
    440 {
    441     return charactersToUInt64(m_data, m_length, ok);
    442 }
    443 
    444 intptr_t StringImpl::toIntPtr(bool* ok)
    445 {
    446     return charactersToIntPtr(m_data, m_length, ok);
    447 }
    448 
    449 double StringImpl::toDouble(bool* ok)
    450 {
    451     return charactersToDouble(m_data, m_length, ok);
    452 }
    453 
    454 float StringImpl::toFloat(bool* ok)
    455 {
    456     return charactersToFloat(m_data, m_length, ok);
    457 }
    458 
    459 static bool equal(const UChar* a, const char* b, int length)
    460 {
    461     ASSERT(length >= 0);
    462     while (length--) {
    463         unsigned char bc = *b++;
    464         if (*a++ != bc)
    465             return false;
    466     }
    467     return true;
    468 }
    469 
    470 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length)
    471 {
    472     while (length--) {
    473         unsigned char bc = *b++;
    474         if (foldCase(*a++) != foldCase(bc))
    475             return false;
    476     }
    477     return true;
    478 }
    479 
    480 static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length)
    481 {
    482     ASSERT(length >= 0);
    483     return umemcasecmp(a, b, length) == 0;
    484 }
    485 
    486 int StringImpl::find(const char* chs, int index, bool caseSensitive)
    487 {
    488     if (!chs || index < 0)
    489         return -1;
    490 
    491     int chsLength = strlen(chs);
    492     int n = m_length - index;
    493     if (n < 0)
    494         return -1;
    495     n -= chsLength - 1;
    496     if (n <= 0)
    497         return -1;
    498 
    499     const char* chsPlusOne = chs + 1;
    500     int chsLengthMinusOne = chsLength - 1;
    501 
    502     const UChar* ptr = m_data + index - 1;
    503     if (caseSensitive) {
    504         UChar c = *chs;
    505         do {
    506             if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne))
    507                 return m_length - chsLength - n + 1;
    508         } while (--n);
    509     } else {
    510         UChar lc = Unicode::foldCase(*chs);
    511         do {
    512             if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne))
    513                 return m_length - chsLength - n + 1;
    514         } while (--n);
    515     }
    516 
    517     return -1;
    518 }
    519 
    520 int StringImpl::find(UChar c, int start)
    521 {
    522     return WebCore::find(m_data, m_length, c, start);
    523 }
    524 
    525 int StringImpl::find(CharacterMatchFunctionPtr matchFunction, int start)
    526 {
    527     return WebCore::find(m_data, m_length, matchFunction, start);
    528 }
    529 
    530 int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
    531 {
    532     /*
    533       We use a simple trick for efficiency's sake. Instead of
    534       comparing strings, we compare the sum of str with that of
    535       a part of this string. Only if that matches, we call memcmp
    536       or ucstrnicmp.
    537     */
    538     ASSERT(str);
    539     if (index < 0)
    540         index += m_length;
    541     int lstr = str->m_length;
    542     int lthis = m_length - index;
    543     if ((unsigned)lthis > m_length)
    544         return -1;
    545     int delta = lthis - lstr;
    546     if (delta < 0)
    547         return -1;
    548 
    549     const UChar* uthis = m_data + index;
    550     const UChar* ustr = str->m_data;
    551     unsigned hthis = 0;
    552     unsigned hstr = 0;
    553     if (caseSensitive) {
    554         for (int i = 0; i < lstr; i++) {
    555             hthis += uthis[i];
    556             hstr += ustr[i];
    557         }
    558         int i = 0;
    559         while (1) {
    560             if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
    561                 return index + i;
    562             if (i == delta)
    563                 return -1;
    564             hthis += uthis[i + lstr];
    565             hthis -= uthis[i];
    566             i++;
    567         }
    568     } else {
    569         for (int i = 0; i < lstr; i++ ) {
    570             hthis += toASCIILower(uthis[i]);
    571             hstr += toASCIILower(ustr[i]);
    572         }
    573         int i = 0;
    574         while (1) {
    575             if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr))
    576                 return index + i;
    577             if (i == delta)
    578                 return -1;
    579             hthis += toASCIILower(uthis[i + lstr]);
    580             hthis -= toASCIILower(uthis[i]);
    581             i++;
    582         }
    583     }
    584 }
    585 
    586 int StringImpl::reverseFind(UChar c, int index)
    587 {
    588     return WebCore::reverseFind(m_data, m_length, c, index);
    589 }
    590 
    591 int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive)
    592 {
    593     /*
    594      See StringImpl::find() for explanations.
    595      */
    596     ASSERT(str);
    597     int lthis = m_length;
    598     if (index < 0)
    599         index += lthis;
    600 
    601     int lstr = str->m_length;
    602     int delta = lthis - lstr;
    603     if ( index < 0 || index > lthis || delta < 0 )
    604         return -1;
    605     if ( index > delta )
    606         index = delta;
    607 
    608     const UChar *uthis = m_data;
    609     const UChar *ustr = str->m_data;
    610     unsigned hthis = 0;
    611     unsigned hstr = 0;
    612     int i;
    613     if (caseSensitive) {
    614         for ( i = 0; i < lstr; i++ ) {
    615             hthis += uthis[index + i];
    616             hstr += ustr[i];
    617         }
    618         i = index;
    619         while (1) {
    620             if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
    621                 return i;
    622             if (i == 0)
    623                 return -1;
    624             i--;
    625             hthis -= uthis[i + lstr];
    626             hthis += uthis[i];
    627         }
    628     } else {
    629         for (i = 0; i < lstr; i++) {
    630             hthis += toASCIILower(uthis[index + i]);
    631             hstr += toASCIILower(ustr[i]);
    632         }
    633         i = index;
    634         while (1) {
    635             if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) )
    636                 return i;
    637             if (i == 0)
    638                 return -1;
    639             i--;
    640             hthis -= toASCIILower(uthis[i + lstr]);
    641             hthis += toASCIILower(uthis[i]);
    642         }
    643     }
    644 
    645     // Should never get here.
    646     return -1;
    647 }
    648 
    649 bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive)
    650 {
    651     ASSERT(m_data);
    652     int start = m_length - m_data->m_length;
    653     if (start >= 0)
    654         return (find(m_data, start, caseSensitive) == start);
    655     return false;
    656 }
    657 
    658 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
    659 {
    660     if (oldC == newC)
    661         return this;
    662     unsigned i;
    663     for (i = 0; i != m_length; ++i)
    664         if (m_data[i] == oldC)
    665             break;
    666     if (i == m_length)
    667         return this;
    668 
    669     UChar* data;
    670     PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
    671 
    672     for (i = 0; i != m_length; ++i) {
    673         UChar ch = m_data[i];
    674         if (ch == oldC)
    675             ch = newC;
    676         data[i] = ch;
    677     }
    678     return newImpl;
    679 }
    680 
    681 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
    682 {
    683     position = min(position, length());
    684     lengthToReplace = min(lengthToReplace, length() - position);
    685     unsigned lengthToInsert = str ? str->length() : 0;
    686     if (!lengthToReplace && !lengthToInsert)
    687         return this;
    688     UChar* data;
    689     PassRefPtr<StringImpl> newImpl =
    690         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
    691     memcpy(data, characters(), position * sizeof(UChar));
    692     if (str)
    693         memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar));
    694     memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace,
    695         (length() - position - lengthToReplace) * sizeof(UChar));
    696     return newImpl;
    697 }
    698 
    699 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
    700 {
    701     if (!replacement)
    702         return this;
    703 
    704     int repStrLength = replacement->length();
    705     int srcSegmentStart = 0;
    706     int matchCount = 0;
    707 
    708     // Count the matches
    709     while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
    710         ++matchCount;
    711         ++srcSegmentStart;
    712     }
    713 
    714     // If we have 0 matches, we don't have to do any more work
    715     if (!matchCount)
    716         return this;
    717 
    718     UChar* data;
    719     PassRefPtr<StringImpl> newImpl =
    720         createUninitialized(m_length - matchCount + (matchCount * repStrLength), data);
    721 
    722     // Construct the new data
    723     int srcSegmentEnd;
    724     int srcSegmentLength;
    725     srcSegmentStart = 0;
    726     int dstOffset = 0;
    727 
    728     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
    729         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
    730         memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
    731         dstOffset += srcSegmentLength;
    732         memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
    733         dstOffset += repStrLength;
    734         srcSegmentStart = srcSegmentEnd + 1;
    735     }
    736 
    737     srcSegmentLength = m_length - srcSegmentStart;
    738     memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
    739 
    740     ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
    741 
    742     return newImpl;
    743 }
    744 
    745 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
    746 {
    747     if (!pattern || !replacement)
    748         return this;
    749 
    750     int patternLength = pattern->length();
    751     if (!patternLength)
    752         return this;
    753 
    754     int repStrLength = replacement->length();
    755     int srcSegmentStart = 0;
    756     int matchCount = 0;
    757 
    758     // Count the matches
    759     while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
    760         ++matchCount;
    761         srcSegmentStart += patternLength;
    762     }
    763 
    764     // If we have 0 matches, we don't have to do any more work
    765     if (!matchCount)
    766         return this;
    767 
    768     UChar* data;
    769     PassRefPtr<StringImpl> newImpl =
    770         createUninitialized(m_length + matchCount * (repStrLength - patternLength), data);
    771 
    772     // Construct the new data
    773     int srcSegmentEnd;
    774     int srcSegmentLength;
    775     srcSegmentStart = 0;
    776     int dstOffset = 0;
    777 
    778     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
    779         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
    780         memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
    781         dstOffset += srcSegmentLength;
    782         memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
    783         dstOffset += repStrLength;
    784         srcSegmentStart = srcSegmentEnd + patternLength;
    785     }
    786 
    787     srcSegmentLength = m_length - srcSegmentStart;
    788     memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
    789 
    790     ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
    791 
    792     return newImpl;
    793 }
    794 
    795 bool equal(StringImpl* a, StringImpl* b)
    796 {
    797     return StringHash::equal(a, b);
    798 }
    799 
    800 bool equal(StringImpl* a, const char* b)
    801 {
    802     if (!a)
    803         return !b;
    804     if (!b)
    805         return !a;
    806 
    807     unsigned length = a->length();
    808     const UChar* as = a->characters();
    809     for (unsigned i = 0; i != length; ++i) {
    810         unsigned char bc = b[i];
    811         if (!bc)
    812             return false;
    813         if (as[i] != bc)
    814             return false;
    815     }
    816 
    817     return !b[length];
    818 }
    819 
    820 bool equalIgnoringCase(StringImpl* a, StringImpl* b)
    821 {
    822     return CaseFoldingHash::equal(a, b);
    823 }
    824 
    825 bool equalIgnoringCase(StringImpl* a, const char* b)
    826 {
    827     if (!a)
    828         return !b;
    829     if (!b)
    830         return !a;
    831 
    832     unsigned length = a->length();
    833     const UChar* as = a->characters();
    834 
    835     // Do a faster loop for the case where all the characters are ASCII.
    836     UChar ored = 0;
    837     bool equal = true;
    838     for (unsigned i = 0; i != length; ++i) {
    839         char bc = b[i];
    840         if (!bc)
    841             return false;
    842         UChar ac = as[i];
    843         ored |= ac;
    844         equal = equal && (toASCIILower(ac) == toASCIILower(bc));
    845     }
    846 
    847     // Do a slower implementation for cases that include non-ASCII characters.
    848     if (ored & ~0x7F) {
    849         equal = true;
    850         for (unsigned i = 0; i != length; ++i) {
    851             unsigned char bc = b[i];
    852             equal = equal && (foldCase(as[i]) == foldCase(bc));
    853         }
    854     }
    855 
    856     return equal && !b[length];
    857 }
    858 
    859 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
    860 {
    861     if (StringHash::equal(a, b))
    862         return true;
    863     if (!a && b && !b->length())
    864         return true;
    865     if (!b && a && !a->length())
    866         return true;
    867 
    868     return false;
    869 }
    870 
    871 Vector<char> StringImpl::ascii()
    872 {
    873     Vector<char> buffer(m_length + 1);
    874     for (unsigned i = 0; i != m_length; ++i) {
    875         UChar c = m_data[i];
    876         if ((c >= 0x20 && c < 0x7F) || c == 0x00)
    877             buffer[i] = c;
    878         else
    879             buffer[i] = '?';
    880     }
    881     buffer[m_length] = '\0';
    882     return buffer;
    883 }
    884 
    885 WTF::Unicode::Direction StringImpl::defaultWritingDirection()
    886 {
    887     for (unsigned i = 0; i < m_length; ++i) {
    888         WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]);
    889         if (charDirection == WTF::Unicode::LeftToRight)
    890             return WTF::Unicode::LeftToRight;
    891         if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic)
    892             return WTF::Unicode::RightToLeft;
    893     }
    894     return WTF::Unicode::LeftToRight;
    895 }
    896 
    897 // This is a hot function because it's used when parsing HTML.
    898 PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length)
    899 {
    900     StringBuffer strippedCopy(length);
    901     unsigned strippedLength = 0;
    902     for (unsigned i = 0; i < length; i++) {
    903         if (int c = characters[i])
    904             strippedCopy[strippedLength++] = c;
    905     }
    906     ASSERT(strippedLength < length);  // Only take the slow case when stripping.
    907     strippedCopy.shrink(strippedLength);
    908     return adopt(strippedCopy);
    909 }
    910 
    911 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer)
    912 {
    913     unsigned length = buffer.length();
    914     if (length == 0)
    915         return empty();
    916     return adoptRef(new StringImpl(buffer.release(), length));
    917 }
    918 
    919 PassRefPtr<StringImpl> StringImpl::adopt(Vector<UChar>& vector)
    920 {
    921     size_t size = vector.size();
    922     if (size == 0)
    923         return empty();
    924     return adoptRef(new StringImpl(vector.releaseBuffer(), size));
    925 }
    926 
    927 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
    928 {
    929     if (!length) {
    930         data = 0;
    931         return empty();
    932     }
    933 
    934     // Allocate a single buffer large enough to contain the StringImpl
    935     // struct as well as the data which it contains. This removes one
    936     // heap allocation from this call.
    937     size_t size = sizeof(StringImpl) + length * sizeof(UChar);
    938     StringImpl* string = static_cast<StringImpl*>(fastMalloc(size));
    939     data = reinterpret_cast<UChar*>(string + 1);
    940     string = new (string) StringImpl(length);
    941     return adoptRef(string);
    942 }
    943 
    944 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
    945 {
    946     if (!characters || !length)
    947         return empty();
    948 
    949     UChar* data;
    950     PassRefPtr<StringImpl> string = createUninitialized(length, data);
    951     memcpy(data, characters, length * sizeof(UChar));
    952     return string;
    953 }
    954 
    955 PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length)
    956 {
    957     if (!characters || !length)
    958         return empty();
    959 
    960     UChar* data;
    961     PassRefPtr<StringImpl> string = createUninitialized(length, data);
    962     for (unsigned i = 0; i != length; ++i) {
    963         unsigned char c = characters[i];
    964         data[i] = c;
    965     }
    966     return string;
    967 }
    968 
    969 PassRefPtr<StringImpl> StringImpl::create(const char* string)
    970 {
    971     if (!string)
    972         return empty();
    973     return create(string, strlen(string));
    974 }
    975 
    976 #if USE(JSC)
    977 PassRefPtr<StringImpl> StringImpl::create(const JSC::UString& str)
    978 {
    979     SharedUChar* sharedBuffer = const_cast<JSC::UString*>(&str)->rep()->sharedBuffer();
    980     if (sharedBuffer) {
    981         PassRefPtr<StringImpl> impl = adoptRef(new StringImpl(str.data(), str.size()));
    982         sharedBuffer->ref();
    983         impl->m_sharedBufferAndFlags.set(sharedBuffer);
    984         return impl;
    985     }
    986     return StringImpl::create(str.data(), str.size());
    987 }
    988 
    989 JSC::UString StringImpl::ustring()
    990 {
    991     SharedUChar* sharedBuffer = this->sharedBuffer();
    992     if (sharedBuffer)
    993         return JSC::UString::Rep::create(sharedBuffer, const_cast<UChar*>(m_data), m_length);
    994 
    995     return JSC::UString(m_data, m_length);
    996 }
    997 #endif
    998 
    999 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string)
   1000 {
   1001     // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer
   1002     // get allocated in a single malloc block.
   1003     UChar* data;
   1004     int length = string.m_length;
   1005     RefPtr<StringImpl> terminatedString = createUninitialized(length + 1, data);
   1006     memcpy(data, string.m_data, length * sizeof(UChar));
   1007     data[length] = 0;
   1008     terminatedString->m_length--;
   1009     terminatedString->m_hash = string.m_hash;
   1010     terminatedString->m_sharedBufferAndFlags.setFlag(HasTerminatingNullCharacter);
   1011     return terminatedString.release();
   1012 }
   1013 
   1014 PassRefPtr<StringImpl> StringImpl::threadsafeCopy() const
   1015 {
   1016     // Special-case empty strings to make sure that per-thread empty string instance isn't returned.
   1017     if (m_length == 0)
   1018         return adoptRef(new StringImpl);
   1019     return create(m_data, m_length);
   1020 }
   1021 
   1022 PassRefPtr<StringImpl> StringImpl::crossThreadString()
   1023 {
   1024     SharedUChar* shared = sharedBuffer();
   1025     if (shared) {
   1026         RefPtr<StringImpl> impl = adoptRef(new StringImpl(m_data, m_length));
   1027         impl->m_sharedBufferAndFlags.set(shared->crossThreadCopy().releaseRef());
   1028         return impl.release();
   1029     }
   1030 
   1031     // If no shared buffer is available, create a copy.
   1032     return threadsafeCopy();
   1033 }
   1034 
   1035 StringImpl::SharedUChar* StringImpl::sharedBuffer()
   1036 {
   1037     if (m_length < minLengthToShare || bufferIsInternal())
   1038         return 0;
   1039 
   1040     if (!m_sharedBufferAndFlags.get())
   1041         m_sharedBufferAndFlags.set(SharedUChar::create(new OwnFastMallocPtr<UChar>(const_cast<UChar*>(m_data))).releaseRef());
   1042     return m_sharedBufferAndFlags.get();
   1043 }
   1044 
   1045 
   1046 } // namespace WebCore
   1047