Home | History | Annotate | Download | only in text
      1 /*
      2  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2013 Apple Inc. All rights reserved.
      3  * Copyright (C) 2010 Patrick Gansterer <paroga (at) paroga.com>
      4  * Copyright (C) 2012 Google Inc. All rights reserved.
      5  *
      6  * This library is free software; you can redistribute it and/or
      7  * modify it under the terms of the GNU Library General Public
      8  * License as published by the Free Software Foundation; either
      9  * version 2 of the License, or (at your option) any later version.
     10  *
     11  * This library is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  * Library General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU Library General Public License
     17  * along with this library; see the file COPYING.LIB.  If not, write to
     18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  * Boston, MA 02110-1301, USA.
     20  *
     21  */
     22 
     23 #include "config.h"
     24 #include "AtomicString.h"
     25 
     26 #include "StringHash.h"
     27 #include "wtf/HashSet.h"
     28 #include "wtf/WTFThreadData.h"
     29 #include "wtf/unicode/UTF8.h"
     30 
     31 namespace WTF {
     32 
     33 using namespace Unicode;
     34 
     35 COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size);
     36 
     37 class AtomicStringTable {
     38 public:
     39     static AtomicStringTable* create(WTFThreadData& data)
     40     {
     41         data.m_atomicStringTable = new AtomicStringTable;
     42         data.m_atomicStringTableDestructor = AtomicStringTable::destroy;
     43         return data.m_atomicStringTable;
     44     }
     45 
     46     HashSet<StringImpl*>& table()
     47     {
     48         return m_table;
     49     }
     50 
     51 private:
     52     static void destroy(AtomicStringTable* table)
     53     {
     54         HashSet<StringImpl*>::iterator end = table->m_table.end();
     55         for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter)
     56             (*iter)->setIsAtomic(false);
     57         delete table;
     58     }
     59 
     60     HashSet<StringImpl*> m_table;
     61 };
     62 
     63 static inline HashSet<StringImpl*>& stringTable()
     64 {
     65     // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor).
     66     WTFThreadData& data = wtfThreadData();
     67     AtomicStringTable* table = data.atomicStringTable();
     68     if (UNLIKELY(!table))
     69         table = AtomicStringTable::create(data);
     70     return table->table();
     71 }
     72 
     73 template<typename T, typename HashTranslator>
     74 static inline PassRefPtr<StringImpl> addToStringTable(const T& value)
     75 {
     76     HashSet<StringImpl*>::AddResult addResult = stringTable().add<HashTranslator>(value);
     77 
     78     // If the string is newly-translated, then we need to adopt it.
     79     // The boolean in the pair tells us if that is so.
     80     return addResult.isNewEntry ? adoptRef(*addResult.iterator) : *addResult.iterator;
     81 }
     82 
     83 struct CStringTranslator {
     84     static unsigned hash(const LChar* c)
     85     {
     86         return StringHasher::computeHashAndMaskTop8Bits(c);
     87     }
     88 
     89     static inline bool equal(StringImpl* r, const LChar* s)
     90     {
     91         return WTF::equal(r, s);
     92     }
     93 
     94     static void translate(StringImpl*& location, const LChar* const& c, unsigned hash)
     95     {
     96         location = StringImpl::create(c).leakRef();
     97         location->setHash(hash);
     98         location->setIsAtomic(true);
     99     }
    100 };
    101 
    102 PassRefPtr<StringImpl> AtomicString::add(const LChar* c)
    103 {
    104     if (!c)
    105         return 0;
    106     if (!*c)
    107         return StringImpl::empty();
    108 
    109     return addToStringTable<const LChar*, CStringTranslator>(c);
    110 }
    111 
    112 template<typename CharacterType>
    113 struct HashTranslatorCharBuffer {
    114     const CharacterType* s;
    115     unsigned length;
    116 };
    117 
    118 typedef HashTranslatorCharBuffer<UChar> UCharBuffer;
    119 struct UCharBufferTranslator {
    120     static unsigned hash(const UCharBuffer& buf)
    121     {
    122         return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
    123     }
    124 
    125     static bool equal(StringImpl* const& str, const UCharBuffer& buf)
    126     {
    127         return WTF::equal(str, buf.s, buf.length);
    128     }
    129 
    130     static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
    131     {
    132         location = StringImpl::create8BitIfPossible(buf.s, buf.length).leakRef();
    133         location->setHash(hash);
    134         location->setIsAtomic(true);
    135     }
    136 };
    137 
    138 template<typename CharacterType>
    139 struct HashAndCharacters {
    140     unsigned hash;
    141     const CharacterType* characters;
    142     unsigned length;
    143 };
    144 
    145 template<typename CharacterType>
    146 struct HashAndCharactersTranslator {
    147     static unsigned hash(const HashAndCharacters<CharacterType>& buffer)
    148     {
    149         ASSERT(buffer.hash == StringHasher::computeHashAndMaskTop8Bits(buffer.characters, buffer.length));
    150         return buffer.hash;
    151     }
    152 
    153     static bool equal(StringImpl* const& string, const HashAndCharacters<CharacterType>& buffer)
    154     {
    155         return WTF::equal(string, buffer.characters, buffer.length);
    156     }
    157 
    158     static void translate(StringImpl*& location, const HashAndCharacters<CharacterType>& buffer, unsigned hash)
    159     {
    160         location = StringImpl::create(buffer.characters, buffer.length).leakRef();
    161         location->setHash(hash);
    162         location->setIsAtomic(true);
    163     }
    164 };
    165 
    166 struct HashAndUTF8Characters {
    167     unsigned hash;
    168     const char* characters;
    169     unsigned length;
    170     unsigned utf16Length;
    171 };
    172 
    173 struct HashAndUTF8CharactersTranslator {
    174     static unsigned hash(const HashAndUTF8Characters& buffer)
    175     {
    176         return buffer.hash;
    177     }
    178 
    179     static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
    180     {
    181         if (buffer.utf16Length != string->length())
    182             return false;
    183 
    184         // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
    185         if (buffer.utf16Length != buffer.length) {
    186             if (string->is8Bit()) {
    187                 const LChar* characters8 = string->characters8();
    188                 return equalLatin1WithUTF8(characters8, characters8 + string->length(), buffer.characters, buffer.characters + buffer.length);
    189             }
    190             const UChar* characters16 = string->characters16();
    191             return equalUTF16WithUTF8(characters16, characters16 + string->length(), buffer.characters, buffer.characters + buffer.length);
    192         }
    193 
    194         if (string->is8Bit()) {
    195             const LChar* stringCharacters = string->characters8();
    196 
    197             for (unsigned i = 0; i < buffer.length; ++i) {
    198                 ASSERT(isASCII(buffer.characters[i]));
    199                 if (stringCharacters[i] != buffer.characters[i])
    200                     return false;
    201             }
    202 
    203             return true;
    204         }
    205 
    206         const UChar* stringCharacters = string->characters16();
    207 
    208         for (unsigned i = 0; i < buffer.length; ++i) {
    209             ASSERT(isASCII(buffer.characters[i]));
    210             if (stringCharacters[i] != buffer.characters[i])
    211                 return false;
    212         }
    213 
    214         return true;
    215     }
    216 
    217     static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
    218     {
    219         UChar* target;
    220         RefPtr<StringImpl> newString = StringImpl::createUninitialized(buffer.utf16Length, target);
    221 
    222         bool isAllASCII;
    223         const char* source = buffer.characters;
    224         if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length, &isAllASCII) != conversionOK)
    225             ASSERT_NOT_REACHED();
    226 
    227         if (isAllASCII)
    228             newString = StringImpl::create(buffer.characters, buffer.length);
    229 
    230         location = newString.release().leakRef();
    231         location->setHash(hash);
    232         location->setIsAtomic(true);
    233     }
    234 };
    235 
    236 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
    237 {
    238     if (!s)
    239         return 0;
    240 
    241     if (!length)
    242         return StringImpl::empty();
    243 
    244     UCharBuffer buffer = { s, length };
    245     return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
    246 }
    247 
    248 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash)
    249 {
    250     ASSERT(s);
    251     ASSERT(existingHash);
    252 
    253     if (!length)
    254         return StringImpl::empty();
    255 
    256     HashAndCharacters<UChar> buffer = { existingHash, s, length };
    257     return addToStringTable<HashAndCharacters<UChar>, HashAndCharactersTranslator<UChar> >(buffer);
    258 }
    259 
    260 PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
    261 {
    262     if (!s)
    263         return 0;
    264 
    265     unsigned length = 0;
    266     while (s[length] != UChar(0))
    267         ++length;
    268 
    269     if (!length)
    270         return StringImpl::empty();
    271 
    272     UCharBuffer buffer = { s, length };
    273     return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
    274 }
    275 
    276 struct SubstringLocation {
    277     StringImpl* baseString;
    278     unsigned start;
    279     unsigned length;
    280 };
    281 
    282 struct SubstringTranslator {
    283     static unsigned hash(const SubstringLocation& buffer)
    284     {
    285         if (buffer.baseString->is8Bit())
    286             return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters8() + buffer.start, buffer.length);
    287         return StringHasher::computeHashAndMaskTop8Bits(buffer.baseString->characters16() + buffer.start, buffer.length);
    288     }
    289 
    290     static bool equal(StringImpl* const& string, const SubstringLocation& buffer)
    291     {
    292         if (buffer.baseString->is8Bit())
    293             return WTF::equal(string, buffer.baseString->characters8() + buffer.start, buffer.length);
    294         return WTF::equal(string, buffer.baseString->characters16() + buffer.start, buffer.length);
    295     }
    296 
    297     static void translate(StringImpl*& location, const SubstringLocation& buffer, unsigned hash)
    298     {
    299         location = buffer.baseString->substring(buffer.start, buffer.length).leakRef();
    300         location->setHash(hash);
    301         location->setIsAtomic(true);
    302     }
    303 };
    304 
    305 PassRefPtr<StringImpl> AtomicString::add(StringImpl* baseString, unsigned start, unsigned length)
    306 {
    307     if (!baseString)
    308         return 0;
    309 
    310     if (!length || start >= baseString->length())
    311         return StringImpl::empty();
    312 
    313     unsigned maxLength = baseString->length() - start;
    314     if (length >= maxLength) {
    315         if (!start)
    316             return add(baseString);
    317         length = maxLength;
    318     }
    319 
    320     SubstringLocation buffer = { baseString, start, length };
    321     return addToStringTable<SubstringLocation, SubstringTranslator>(buffer);
    322 }
    323 
    324 typedef HashTranslatorCharBuffer<LChar> LCharBuffer;
    325 struct LCharBufferTranslator {
    326     static unsigned hash(const LCharBuffer& buf)
    327     {
    328         return StringHasher::computeHashAndMaskTop8Bits(buf.s, buf.length);
    329     }
    330 
    331     static bool equal(StringImpl* const& str, const LCharBuffer& buf)
    332     {
    333         return WTF::equal(str, buf.s, buf.length);
    334     }
    335 
    336     static void translate(StringImpl*& location, const LCharBuffer& buf, unsigned hash)
    337     {
    338         location = StringImpl::create(buf.s, buf.length).leakRef();
    339         location->setHash(hash);
    340         location->setIsAtomic(true);
    341     }
    342 };
    343 
    344 typedef HashTranslatorCharBuffer<char> CharBuffer;
    345 struct CharBufferFromLiteralDataTranslator {
    346     static unsigned hash(const CharBuffer& buf)
    347     {
    348         return StringHasher::computeHashAndMaskTop8Bits(reinterpret_cast<const LChar*>(buf.s), buf.length);
    349     }
    350 
    351     static bool equal(StringImpl* const& str, const CharBuffer& buf)
    352     {
    353         return WTF::equal(str, buf.s, buf.length);
    354     }
    355 
    356     static void translate(StringImpl*& location, const CharBuffer& buf, unsigned hash)
    357     {
    358         location = StringImpl::create(buf.s, buf.length).leakRef();
    359         location->setHash(hash);
    360         location->setIsAtomic(true);
    361     }
    362 };
    363 
    364 PassRefPtr<StringImpl> AtomicString::add(const LChar* s, unsigned length)
    365 {
    366     if (!s)
    367         return 0;
    368 
    369     if (!length)
    370         return StringImpl::empty();
    371 
    372     LCharBuffer buffer = { s, length };
    373     return addToStringTable<LCharBuffer, LCharBufferTranslator>(buffer);
    374 }
    375 
    376 PassRefPtr<StringImpl> AtomicString::addFromLiteralData(const char* characters, unsigned length)
    377 {
    378     ASSERT(characters);
    379     ASSERT(length);
    380 
    381     CharBuffer buffer = { characters, length };
    382     return addToStringTable<CharBuffer, CharBufferFromLiteralDataTranslator>(buffer);
    383 }
    384 
    385 PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r)
    386 {
    387     if (!r->length())
    388         return StringImpl::empty();
    389 
    390     StringImpl* result = *stringTable().add(r).iterator;
    391     if (result == r)
    392         r->setIsAtomic(true);
    393     ASSERT(!r->isStatic() || result->isStatic());
    394     return result;
    395 }
    396 
    397 template<typename CharacterType>
    398 static inline HashSet<StringImpl*>::iterator findString(const StringImpl* stringImpl)
    399 {
    400     HashAndCharacters<CharacterType> buffer = { stringImpl->existingHash(), stringImpl->getCharacters<CharacterType>(), stringImpl->length() };
    401     return stringTable().find<HashAndCharactersTranslator<CharacterType> >(buffer);
    402 }
    403 
    404 StringImpl* AtomicString::find(const StringImpl* stringImpl)
    405 {
    406     ASSERT(stringImpl);
    407     ASSERT(stringImpl->existingHash());
    408 
    409     if (!stringImpl->length())
    410         return StringImpl::empty();
    411 
    412     HashSet<StringImpl*>::iterator iterator;
    413     if (stringImpl->is8Bit())
    414         iterator = findString<LChar>(stringImpl);
    415     else
    416         iterator = findString<UChar>(stringImpl);
    417     if (iterator == stringTable().end())
    418         return 0;
    419     return *iterator;
    420 }
    421 
    422 void AtomicString::remove(StringImpl* r)
    423 {
    424     HashSet<StringImpl*>::iterator iterator;
    425     if (r->is8Bit())
    426         iterator = findString<LChar>(r);
    427     else
    428         iterator = findString<UChar>(r);
    429     RELEASE_ASSERT(iterator != stringTable().end());
    430     stringTable().remove(iterator);
    431 }
    432 
    433 AtomicString AtomicString::lower() const
    434 {
    435     // Note: This is a hot function in the Dromaeo benchmark.
    436     StringImpl* impl = this->impl();
    437     if (UNLIKELY(!impl))
    438         return *this;
    439     RefPtr<StringImpl> newImpl = impl->lower();
    440     if (LIKELY(newImpl == impl))
    441         return *this;
    442     return AtomicString(newImpl);
    443 }
    444 
    445 AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd)
    446 {
    447     HashAndUTF8Characters buffer;
    448     buffer.characters = charactersStart;
    449     buffer.hash = calculateStringHashAndLengthFromUTF8MaskingTop8Bits(charactersStart, charactersEnd, buffer.length, buffer.utf16Length);
    450 
    451     if (!buffer.hash)
    452         return nullAtom;
    453 
    454     AtomicString atomicString;
    455     atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
    456     return atomicString;
    457 }
    458 
    459 #ifndef NDEBUG
    460 void AtomicString::show() const
    461 {
    462     m_string.show();
    463 }
    464 #endif
    465 
    466 } // namespace WTF
    467